Index: stable/9/sys/amd64/amd64/fpu.c =================================================================== --- stable/9/sys/amd64/amd64/fpu.c (revision 273911) +++ stable/9/sys/amd64/amd64/fpu.c (revision 273912) @@ -1,1018 +1,1018 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Floating point support. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); void xrstor(char *addr, uint64_t mask); void xsave(char *addr, uint64_t mask); #endif /* __GNUCLIKE_ASM && !lint */ #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() CTASSERT(sizeof(struct savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, - NULL, 1, "Floating point instructions executed in hardware"); + SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; void fpusave(void *addr) { if (use_xsave) xsave((char *)addr, xsave_mask); else fxsave((char *)addr); } void fpurestore(void *addr) { if (use_xsave) xrstor((char *)addr, xsave_mask); else fxrstor((char *)addr); } void fpususpend(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void fpuinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { /* * Patch the XSAVE instruction in the cpu_switch code * to XSAVEOPT. We assume that XSAVE encoding used * REX byte, and set the bit 4 of the r/m byte. */ ctx_switch_xsave[3] |= 0x10; } } /* * Calculate the fpu save area size. */ static void fpuinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(struct savefpu); } /* * Initialize the floating point unit. */ void fpuinit(void) { register_t saveintr; u_int mxcsr; u_short control; if (IS_BSP()) fpuinit_bsp1(); if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (IS_BSP()) fpuinit_bsp2(); /* * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); fninit(); control = __INITIAL_FPUCW__; fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void fpuinitstate(void *arg __unused) { register_t saveintr; int cp[4], i, max_ext_n; fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); saveintr = intr_disable(); stop_emulating(); fpusave(fpu_initialstate); if (fpu_initialstate->sv_env.en_mxcsr_mask) cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM registers. The * fpusave call dumped the garbage contained in the registers * after reset to the initial state saved. Clear XMM * registers file image to make the startup program state and * signal handler XMM register content predictable. */ bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); /* * Create a table describing the layout of the CPU Extended * Save Area. */ if (use_xsave) { max_ext_n = flsl(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); start_emulating(); intr_restore(saveintr); } SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); /* * Free coprocessor (if we have it). */ void fpuexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } critical_exit(); } int fpuformat() { return (_MC_FPFMT_XMM); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int fputrap_x87(void) { struct savefpu *pcb_save; u_short control, status; critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { pcb_save = curpcb->pcb_save; control = pcb_save->sv_env.en_cw; status = pcb_save->sv_env.en_sw; } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int fputrap_sse(void) { u_int mxcsr; critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ static int err_count = 0; void fpudna(void) { critical_enter(); if (PCPU_GET(fpcurthread) == curthread) { printf("fpudna: fpcurthread == curthread %d times\n", ++err_count); stop_emulating(); critical_exit(); return; } if (PCPU_GET(fpcurthread) != NULL) { printf("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, curthread, curthread->td_proc->p_pid); panic("fpudna"); } stop_emulating(); /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); fpu_clean_state(); if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * fpu_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); fpurestore(curpcb->pcb_save); if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(curpcb->pcb_initial_fpucw); if (PCB_USER_FPU(curpcb)) set_pcb_flags(curpcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(curpcb, PCB_FPUINITDONE); } else fpurestore(curpcb->pcb_save); critical_exit(); } void fpudrop() { struct thread *td; td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int fpugetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i, owned; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); get_pcb_user_save_pcb(pcb)->sv_env.en_cw = pcb->pcb_initial_fpucw; fpuuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(get_pcb_user_save_pcb(pcb)); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } critical_exit(); if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + offsetof(struct xstate_hdr, xstate_bv)); max_ext_n = flsl(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)fpu_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } return (owned); } void fpuuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(struct savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } /* * Set the state of the FPU. */ int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error != 0) { critical_exit(); return (error); } bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurestore(get_pcb_user_save_td(td)); critical_exit(); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } else { critical_exit(); error = fpusetxstate(td, xfpustate, xfpustate_size); if (error != 0) return (error); bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } return (0); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } /* * This really sucks. We want the acpi version only, but it requires * the isa_if.h file in order to get the definitions. */ #include "opt_isa.h" #ifdef DEV_ISA #include /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id fpupnp_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int fpupnp_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); if (result <= 0) device_quiet(dev); return (result); } static int fpupnp_attach(device_t dev) { return (0); } static device_method_t fpupnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fpupnp_probe), DEVMETHOD(device_attach, fpupnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t fpupnp_driver = { "fpupnp", fpupnp_methods, 1, /* no softc */ }; static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_FPUINITDONE 0x01 struct fpu_kern_ctx { struct savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static struct savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((struct savefpu *)p); } int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = 0; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); return (0); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; critical_enter(); if (curthread == PCPU_GET(fpcurthread)) fpudrop(); critical_exit(); pcb->pcb_save = ctx->prev; if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); clear_pcb_flags(pcb, PCB_KERNFPU); } else clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); } else { if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) set_pcb_flags(pcb, PCB_FPUINITDONE); else clear_pcb_flags(pcb, PCB_FPUINITDONE); KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); set_pcb_flags(curpcb, PCB_KERNFPU); return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNFPU) != 0); } /* * FPU save area alloc/free/init utility routines */ struct savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, 0)); } void fpu_save_area_free(struct savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(struct savefpu *fsa) { bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); } Index: stable/9/sys/amd64/include/xen =================================================================== --- stable/9/sys/amd64/include/xen (revision 273911) +++ stable/9/sys/amd64/include/xen (revision 273912) Property changes on: stable/9/sys/amd64/include/xen ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/amd64/include/xen:r263710,273377-273378,273423,273455 Index: stable/9/sys/arm/arm/busdma_machdep.c =================================================================== --- stable/9/sys/arm/arm/busdma_machdep.c (revision 273911) +++ stable/9/sys/arm/arm/busdma_machdep.c (revision 273912) @@ -1,1516 +1,1516 @@ /*- * Copyright (c) 2012 Ian Lepore * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2002 Peter Grehan * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); /* * ARM bus dma support routines. * * XXX Things to investigate / fix some day... * - What is the earliest that this API can be called? Could there be any * fallout from changing the SYSINIT() order from SI_SUB_VM to SI_SUB_KMEM? * - The manpage mentions the BUS_DMA_NOWAIT flag only in the context of the * bus_dmamap_load() function. This code has historically (and still does) * honor it in bus_dmamem_alloc(). If we got rid of that we could lose some * error checking because some resource management calls would become WAITOK * and thus "cannot fail." * - The decisions made by _bus_dma_can_bounce() should be made once, at tag * creation time, and the result stored in the tag. * - It should be possible to take some shortcuts when mapping a buffer we know * came from the uma(9) allocators based on what we know about such buffers * (aligned, contiguous, etc). * - The allocation of bounce pages could probably be cleaned up, then we could * retire arm_remap_nocache(). */ #define _ARM32_BUS_DMA_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 64 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_size_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; /* * DMA range for this tag. If the page doesn't fall within * one of these ranges, an error is returned. The caller * may then decide what to do with the transfer. If the * range pointer is NULL, it is ignored. */ struct arm32_dma_range *ranges; int _nranges; struct bounce_zone *bounce_zone; /* * Most tags need one or two segments, and can use the local tagsegs * array. For tags with a larger limit, we'll allocate a bigger array * on first use. */ bus_dma_segment_t *segments; bus_dma_segment_t tagsegs[2]; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ vm_offset_t vaddr_nocache; /* kva of bounce buffer uncached */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ bus_addr_t dataaddr; /* client physical address */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); #define DMAMAP_COHERENT 0x8 #define DMAMAP_CACHE_ALIGNED 0x10 struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; int flags; STAILQ_ENTRY(bus_dmamap) links; bus_dmamap_callback_t *callback; void *callback_arg; int sync_count; struct sync_list *slist; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static struct mtx busdma_mtx; MTX_SYSINIT(busdma_mtx, &busdma_mtx, "busdma lock", MTX_DEF); static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); /* Default tag, as most drivers provide no parent tag. */ bus_dma_tag_t arm_root_dma_tag; /* * ---------------------------------------------------------------------------- * Begin block of code useful to transplant to other implementations. */ static uma_zone_t dmamap_zone; /* Cache of struct bus_dmamap items */ static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ /* * This is the ctor function passed to uma_zcreate() for the pool of dma maps. * It'll need platform-specific changes if this code is copied. */ static int dmamap_ctor(void *mem, int size, void *arg, int flags) { bus_dmamap_t map; bus_dma_tag_t dmat; map = (bus_dmamap_t)mem; dmat = (bus_dma_tag_t)arg; dmat->map_count++; map->dmat = dmat; map->flags = 0; STAILQ_INIT(&map->bpages); return (0); } /* * This is the dtor function passed to uma_zcreate() for the pool of dma maps. * It may need platform-specific changes if this code is copied . */ static void dmamap_dtor(void *mem, int size, void *arg) { bus_dmamap_t map; map = (bus_dmamap_t)mem; map->dmat->map_count--; } static void busdma_init(void *dummy) { /* Create a cache of maps for bus_dmamap_create(). */ dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap), dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", arm_dcache_align, /* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ 0); /* uma_zcreate_flags */ /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. */ coherent_allocator = busdma_bufalloc_create("coherent", arm_dcache_align, /* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, 0); /* uma_zcreate_flags */ } /* * This init historically used SI_SUB_VM, but now the init code requires * malloc(9) using M_DEVBUF memory, which is set up later than SI_SUB_VM, by * SI_SUB_KMEM and SI_ORDER_SECOND, so we'll go right after that by using * SI_SUB_KMEM and SI_ORDER_THIRD. */ SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_THIRD, busdma_init, NULL); /* * End block of code useful to transplant to other implementations. * ---------------------------------------------------------------------------- */ /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || ((paddr & (dmat->alignment - 1)) != 0)) && (dmat->filter == NULL || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * This routine checks the exclusion zone constraints from a tag against the * physical RAM available on the machine. If a tag specifies an exclusion zone * but there's no RAM in that zone, then we avoid allocating resources to bounce * a request, and we can use any memory allocator (as opposed to needing * kmem_alloc_contig() just because it can allocate pages in an address range). * * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the * same value on 32-bit architectures) as their lowaddr constraint, and we can't * possibly have RAM at an address higher than the highest address we can * express, so we take a fast out. */ static __inline int _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; if (lowaddr >= BUS_SPACE_MAXADDR) return (0); for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr > phys_avail[i])) return (1); } return (0); } static __inline struct arm32_dma_range * _bus_dma_inrange(struct arm32_dma_range *ranges, int nranges, bus_addr_t curaddr) { struct arm32_dma_range *dr; int i; for (i = 0, dr = ranges; i < nranges; i++, dr++) { if (curaddr >= dr->dr_sysbase && round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len)) return (dr); } return (NULL); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { #ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); #else printf("DRIVER_ERROR: busdma dflt_lock called\n"); #endif } /* * Allocate a device specific dma_tag. */ #define SEG_NB 1024 int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_size_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; if (!parent) parent = arm_root_dma_tag; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment ? alignment : 1; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; newtag->ranges = bus_dma_get_range(); newtag->_nranges = bus_dma_get_range_nb(); if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* * If all the segments we need fit into the local tagsegs array, set the * pointer now. Otherwise NULL the pointer and an array of segments * will be allocated later, on first use. We don't pre-allocate now * because some tags exist just to pass contraints to children in the * device hierarchy, and they tend to use BUS_SPACE_UNRESTRICTED and we * sure don't want to try to allocate an array for that. */ if (newtag->nsegments <= nitems(newtag->tagsegs)) newtag->segments = newtag->tagsegs; else newtag->segments = NULL; /* * Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = min(parent->lowaddr, newtag->lowaddr); newtag->highaddr = max(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = min(parent->boundary, newtag->boundary); if ((newtag->filter != NULL) || ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { #ifdef KTR bus_dma_tag_t dmat_copy = dmat; #endif if (dmat != NULL) { if (dmat->map_count != 0) return (EBUSY); while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { if (dmat->segments != NULL && dmat->segments != dmat->tagsegs) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy); return (0); } #include /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct sync_list *slist; bus_dmamap_t map; int error = 0; slist = malloc(sizeof(*slist) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if (slist == NULL) return (ENOMEM); map = uma_zalloc_arg(dmamap_zone, dmat, M_NOWAIT); *mapp = map; if (map == NULL) { free(slist, M_DEVBUF); return (ENOMEM); } /* * If the tag's segments haven't been allocated yet we need to do it * now, because we can't sleep for resources at map load time. */ if (dmat->segments == NULL) { dmat->segments = malloc(dmat->nsegments * sizeof(*dmat->segments), M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { free(slist, M_DEVBUF); uma_zfree(dmamap_zone, map); *mapp = NULL; return (ENOMEM); } } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { free(slist, M_DEVBUF); uma_zfree(dmamap_zone, map); *mapp = NULL; return (error); } } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } map->sync_count = 0; map->slist = slist; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } free(map->slist, M_DEVBUF); uma_zfree(dmamap_zone, map); if (dmat->bounce_zone) dmat->bounce_zone->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into bus device * space based on the constraints listed in the dma tag. Returns a pointer to * the allocated memory, and a pointer to an associated bus_dmamap. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddrp, int flags, bus_dmamap_t *mapp) { struct sync_list *slist; void * vaddr; struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; bus_dmamap_t map; int mflags; vm_memattr_t memattr; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; /* * If the tag's segments haven't been allocated yet we need to do it * now, because we can't sleep for resources at map load time. */ if (dmat->segments == NULL) dmat->segments = malloc(dmat->nsegments * sizeof(*dmat->segments), M_DEVBUF, mflags); slist = malloc(sizeof(*slist) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if (slist == NULL) return (ENOMEM); map = uma_zalloc_arg(dmamap_zone, dmat, mflags); if (map == NULL) { free(slist, M_DEVBUF); return (ENOMEM); } if (flags & BUS_DMA_COHERENT) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; map->flags |= DMAMAP_COHERENT; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* All buffers we allocate are cache-aligned. */ map->flags |= DMAMAP_CACHE_ALIGNED; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed nsegments. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= btoc(dmat->maxsize) && dmat->alignment <= PAGE_SIZE && dmat->boundary == 0) { vaddr = (void *)kmem_alloc_attr(kernel_map, dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { vaddr = (void *)kmem_alloc_contig(kernel_map, dmat->maxsize, mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (vaddr == NULL) { free(slist, M_DEVBUF); uma_zfree(dmamap_zone, map); map = NULL; } else { map->slist = slist; map->sync_count = 0; } *vaddrp = vaddr; *mapp = map; return (vaddr == NULL ? ENOMEM : 0); } /* * Free a piece of memory that was allocated via bus_dmamem_alloc, along with * its associated map. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if (map->flags & DMAMAP_COHERENT) ba = coherent_allocator; else ba = standard_allocator; uma_zfree(dmamap_zone, map); free(map->slist, M_DEVBUF); /* Be careful not to access map from here on. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_map, (vm_offset_t)vaddr, dmat->maxsize); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if ((map->pagesneeded == 0)) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if ((map->pagesneeded == 0)) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = trunc_page((vm_offset_t)buf); vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { if (__predict_true(pmap == kernel_pmap)) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (run_filter(dmat, paddr) != 0) map->pagesneeded++; vaddr += PAGE_SIZE; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } if (dmat->ranges) { struct arm32_dma_range *dr; dr = _bus_dma_inrange(dmat->ranges, dmat->_nranges, curaddr); if (dr == NULL) return (0); /* * In a valid DMA range. Translate the physical * memory address to an address in the DMA window. */ curaddr = (curaddr - dr->dr_sysbase) + dr->dr_busbase; } seg = *segp; /* * Insert chunk into a segment, coalescing with * the previous segment if possible. */ if (seg >= 0 && curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) { segs[seg].ds_len += sgsize; } else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { _bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; struct sync_list *sl; vm_offset_t vaddr = (vm_offset_t)buf; int error = 0; if (segs == NULL) segs = dmat->segments; if ((flags & BUS_DMA_LOAD_MBUF) != 0) map->flags |= DMAMAP_CACHE_ALIGNED; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); while (buflen > 0) { /* * Get the physical address for this segment. */ if (__predict_true(pmap == kernel_pmap)) { curaddr = pmap_kextract(vaddr); } else { curaddr = pmap_extract(pmap, vaddr); map->flags &= ~DMAMAP_COHERENT; } /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { sl = &map->slist[map->sync_count - 1]; if (map->sync_count == 0 || vaddr != sl->vaddr + sl->datacount) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = vaddr; sl->datacount = sgsize; sl->busaddr = curaddr; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { _bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } void __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { KASSERT(dmat != NULL, ("dmatag is NULL")); KASSERT(map != NULL, ("dmamap is NULL")); map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ void _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; return; } static void bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int bufaligned) { char _tmp_cl[arm_dcache_align], _tmp_clend[arm_dcache_align]; register_t s; int partial; if ((op & BUS_DMASYNC_PREWRITE) && !(op & BUS_DMASYNC_PREREAD)) { cpu_dcache_wb_range(buf, len); cpu_l2cache_wb_range(buf, len); } /* * If the caller promises the buffer is properly aligned to a cache line * (even if the call parms make it look like it isn't) we can avoid * attempting to preserve the non-DMA part of the cache line in the * POSTREAD case, but we MUST still do a writeback in the PREREAD case. * * This covers the case of mbufs, where we know how they're aligned and * know the CPU doesn't touch the header in front of the DMA data area * during the IO, but it may have touched it right before invoking the * sync, so a PREREAD writeback is required. * * It also handles buffers we created in bus_dmamem_alloc(), which are * always aligned and padded to cache line size even if the IO length * isn't a multiple of cache line size. In this case the PREREAD * writeback probably isn't required, but it's harmless. */ partial = (((vm_offset_t)buf) | len) & arm_dcache_align_mask; if (op & BUS_DMASYNC_PREREAD) { if (!(op & BUS_DMASYNC_PREWRITE) && !partial) { cpu_dcache_inv_range(buf, len); cpu_l2cache_inv_range(buf, len); } else { cpu_dcache_wbinv_range(buf, len); cpu_l2cache_wbinv_range(buf, len); } } if (op & BUS_DMASYNC_POSTREAD) { if (partial && !bufaligned) { s = intr_disable(); if (buf & arm_dcache_align_mask) memcpy(_tmp_cl, (void *)(buf & ~arm_dcache_align_mask), buf & arm_dcache_align_mask); if ((buf + len) & arm_dcache_align_mask) memcpy(_tmp_clend, (void *)(buf + len), arm_dcache_align - ((buf + len) & arm_dcache_align_mask)); } cpu_dcache_inv_range(buf, len); cpu_l2cache_inv_range(buf, len); if (partial && !bufaligned) { if (buf & arm_dcache_align_mask) memcpy((void *)(buf & ~arm_dcache_align_mask), _tmp_cl, buf & arm_dcache_align_mask); if ((buf + len) & arm_dcache_align_mask) memcpy((void *)(buf + len), _tmp_clend, arm_dcache_align - ((buf + len) & arm_dcache_align_mask)); intr_restore(s); } } } static void _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; STAILQ_FOREACH(bpage, &map->bpages, links) { if (op & BUS_DMASYNC_PREWRITE) { if (bpage->datavaddr != 0) bcopy((void *)bpage->datavaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); else physcopyout(bpage->dataaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); if (bpage->vaddr_nocache == 0) { cpu_dcache_wb_range(bpage->vaddr, bpage->datacount); cpu_l2cache_wb_range(bpage->vaddr, bpage->datacount); } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { if (bpage->vaddr_nocache == 0) { cpu_dcache_inv_range(bpage->vaddr, bpage->datacount); cpu_l2cache_inv_range(bpage->vaddr, bpage->datacount); } if (bpage->datavaddr != 0) bcopy((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), (void *)bpage->datavaddr, bpage->datacount); else physcopyin((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->dataaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } } } void _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct sync_list *sl, *end; int bufaligned; if (op == BUS_DMASYNC_POSTWRITE) return; if (map->flags & DMAMAP_COHERENT) goto drain; if (STAILQ_FIRST(&map->bpages)) _bus_dmamap_sync_bp(dmat, map, op); CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); bufaligned = (map->flags & DMAMAP_CACHE_ALIGNED); if (map->sync_count) { end = &map->slist[map->sync_count]; for (sl = &map->slist[0]; sl != end; sl++) bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, bufaligned); } drain: cpu_drain_writebuf(); } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "alignment", CTLFLAG_RD, &bz->alignment, 0, ""); + "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); bpage->vaddr_nocache = (vm_offset_t)arm_remap_nocache( (void *)bpage->vaddr, PAGE_SIZE); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= vaddr & PAGE_MASK; bpage->busaddr |= vaddr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->dataaddr = addr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: stable/9/sys/boot/forth =================================================================== --- stable/9/sys/boot/forth (revision 273911) +++ stable/9/sys/boot/forth (revision 273912) Property changes on: stable/9/sys/boot/forth ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/forth:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot/i386/efi =================================================================== --- stable/9/sys/boot/i386/efi (revision 273911) +++ stable/9/sys/boot/i386/efi (revision 273912) Property changes on: stable/9/sys/boot/i386/efi ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/i386/efi:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot/i386/gptboot =================================================================== --- stable/9/sys/boot/i386/gptboot (revision 273911) +++ stable/9/sys/boot/i386/gptboot (revision 273912) Property changes on: stable/9/sys/boot/i386/gptboot ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/i386/gptboot:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot/ia64/efi =================================================================== --- stable/9/sys/boot/ia64/efi (revision 273911) +++ stable/9/sys/boot/ia64/efi (revision 273912) Property changes on: stable/9/sys/boot/ia64/efi ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/ia64/efi:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot/ia64/ski =================================================================== --- stable/9/sys/boot/ia64/ski (revision 273911) +++ stable/9/sys/boot/ia64/ski (revision 273912) Property changes on: stable/9/sys/boot/ia64/ski ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/ia64/ski:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot/powerpc/boot1.chrp =================================================================== --- stable/9/sys/boot/powerpc/boot1.chrp (revision 273911) +++ stable/9/sys/boot/powerpc/boot1.chrp (revision 273912) Property changes on: stable/9/sys/boot/powerpc/boot1.chrp ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/powerpc/boot1.chrp:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot/powerpc/ofw =================================================================== --- stable/9/sys/boot/powerpc/ofw (revision 273911) +++ stable/9/sys/boot/powerpc/ofw (revision 273912) Property changes on: stable/9/sys/boot/powerpc/ofw ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot/powerpc/ofw:r263710,273377-273378,273423,273455 Index: stable/9/sys/boot =================================================================== --- stable/9/sys/boot (revision 273911) +++ stable/9/sys/boot (revision 273912) Property changes on: stable/9/sys/boot ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/boot:r263710,273377-273378,273423,273455 Index: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c =================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c (revision 273911) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c (revision 273912) @@ -1,2180 +1,2180 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2011 Martin Matuska */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include #include #include #include "zfs_prop.h" #include "zfs_comutil.h" /* Used by fstat(1). */ -SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 0, sizeof(znode_t), - "sizeof(znode_t)"); +SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, + SYSCTL_NULL_INT_PTR, sizeof(znode_t), "sizeof(znode_t)"); /* * Define ZNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */ #ifdef DEBUG #define ZNODE_STATS #endif /* DEBUG */ #ifdef ZNODE_STATS #define ZNODE_STAT_ADD(stat) ((stat)++) #else #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ /* * Functions needed for userland (ie: libzpool) are not put under * #ifdef_KERNEL; the rest of the functions have dependencies * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL /* * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to * be freed before it can be safely accessed. */ krwlock_t zfsvfs_lock; static kmem_cache_t *znode_cache = NULL; /*ARGSUSED*/ static void znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) { /* * We should never drop all dbuf refs without first clearing * the eviction callback. */ panic("evicting znode %p\n", user_ptr); } extern struct vop_vector zfs_vnodeops; extern struct vop_vector zfs_fifoops; extern struct vop_vector zfs_shareops; static int zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) { znode_t *zp = buf; POINTER_INVALIDATE(&zp->z_zfsvfs); list_link_init(&zp->z_link_node); mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zp->z_range_avl, zfs_range_compare, sizeof (rl_t), offsetof(rl_t, r_node)); zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; zp->z_vnode = NULL; zp->z_moved = 0; return (0); } /*ARGSUSED*/ static void zfs_znode_cache_destructor(void *buf, void *arg) { znode_t *zp = buf; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); ASSERT(ZTOV(zp) == NULL); vn_free(ZTOV(zp)); ASSERT(!list_link_active(&zp->z_link_node)); mutex_destroy(&zp->z_lock); rw_destroy(&zp->z_parent_lock); rw_destroy(&zp->z_name_lock); mutex_destroy(&zp->z_acl_lock); avl_destroy(&zp->z_range_avl); mutex_destroy(&zp->z_range_lock); ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_acl_cached == NULL); } #ifdef ZNODE_STATS static struct { uint64_t zms_zfsvfs_invalid; uint64_t zms_zfsvfs_recheck1; uint64_t zms_zfsvfs_unmounted; uint64_t zms_zfsvfs_recheck2; uint64_t zms_obj_held; uint64_t zms_vnode_locked; uint64_t zms_not_only_dnlc; } znode_move_stats; #endif /* ZNODE_STATS */ #ifdef sun static void zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) { vnode_t *vp; /* Copy fields. */ nzp->z_zfsvfs = ozp->z_zfsvfs; /* Swap vnodes. */ vp = nzp->z_vnode; nzp->z_vnode = ozp->z_vnode; ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ ZTOV(ozp)->v_data = ozp; ZTOV(nzp)->v_data = nzp; nzp->z_id = ozp->z_id; ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); nzp->z_unlinked = ozp->z_unlinked; nzp->z_atime_dirty = ozp->z_atime_dirty; nzp->z_zn_prefetch = ozp->z_zn_prefetch; nzp->z_blksz = ozp->z_blksz; nzp->z_seq = ozp->z_seq; nzp->z_mapcnt = ozp->z_mapcnt; nzp->z_gen = ozp->z_gen; nzp->z_sync_cnt = ozp->z_sync_cnt; nzp->z_is_sa = ozp->z_is_sa; nzp->z_sa_hdl = ozp->z_sa_hdl; bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); nzp->z_links = ozp->z_links; nzp->z_size = ozp->z_size; nzp->z_pflags = ozp->z_pflags; nzp->z_uid = ozp->z_uid; nzp->z_gid = ozp->z_gid; nzp->z_mode = ozp->z_mode; /* * Since this is just an idle znode and kmem is already dealing with * memory pressure, release any cached ACL. */ if (ozp->z_acl_cached) { zfs_acl_free(ozp->z_acl_cached); ozp->z_acl_cached = NULL; } sa_set_userp(nzp->z_sa_hdl, nzp); /* * Invalidate the original znode by clearing fields that provide a * pointer back to the znode. Set the low bit of the vfs pointer to * ensure that zfs_znode_move() recognizes the znode as invalid in any * subsequent callback. */ ozp->z_sa_hdl = NULL; POINTER_INVALIDATE(&ozp->z_zfsvfs); /* * Mark the znode. */ nzp->z_moved = 1; ozp->z_moved = (uint8_t)-1; } /*ARGSUSED*/ static kmem_cbrc_t zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) { znode_t *ozp = buf, *nzp = newbuf; zfsvfs_t *zfsvfs; vnode_t *vp; /* * The znode is on the file system's list of known znodes if the vfs * pointer is valid. We set the low bit of the vfs pointer when freeing * the znode to invalidate it, and the memory patterns written by kmem * (baddcafe and deadbeef) set at least one of the two low bits. A newly * created znode sets the vfs pointer last of all to indicate that the * znode is known and in a valid state to be moved by this function. */ zfsvfs = ozp->z_zfsvfs; if (!POINTER_IS_VALID(zfsvfs)) { ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * Close a small window in which it's possible that the filesystem could * be unmounted and freed, and zfsvfs, though valid in the previous * statement, could point to unrelated memory by the time we try to * prevent the filesystem from being unmounted. */ rw_enter(&zfsvfs_lock, RW_WRITER); if (zfsvfs != ozp->z_zfsvfs) { rw_exit(&zfsvfs_lock); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); return (KMEM_CBRC_DONT_KNOW); } /* * If the znode is still valid, then so is the file system. We know that * no valid file system can be freed while we hold zfsvfs_lock, so we * can safely ensure that the filesystem is not and will not be * unmounted. The next statement is equivalent to ZFS_ENTER(). */ rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); if (zfsvfs->z_unmounted) { ZFS_EXIT(zfsvfs); rw_exit(&zfsvfs_lock); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); return (KMEM_CBRC_DONT_KNOW); } rw_exit(&zfsvfs_lock); mutex_enter(&zfsvfs->z_znodes_lock); /* * Recheck the vfs pointer in case the znode was removed just before * acquiring the lock. */ if (zfsvfs != ozp->z_zfsvfs) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); return (KMEM_CBRC_DONT_KNOW); } /* * At this point we know that as long as we hold z_znodes_lock, the * znode cannot be freed and fields within the znode can be safely * accessed. Now, prevent a race with zfs_zget(). */ if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); return (KMEM_CBRC_LATER); } vp = ZTOV(ozp); if (mutex_tryenter(&vp->v_lock) == 0) { ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); return (KMEM_CBRC_LATER); } /* Only move znodes that are referenced _only_ by the DNLC. */ if (vp->v_count != 1 || !vn_in_dnlc(vp)) { mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); return (KMEM_CBRC_LATER); } /* * The znode is known and in a valid state to move. We're holding the * locks needed to execute the critical section. */ zfs_znode_move_impl(ozp, nzp); mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); list_link_replace(&ozp->z_link_node, &nzp->z_link_node); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); return (KMEM_CBRC_YES); } #endif /* sun */ void zfs_znode_init(void) { /* * Initialize zcache */ rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, 0); kmem_cache_set_move(znode_cache, zfs_znode_move); } void zfs_znode_fini(void) { #ifdef sun /* * Cleanup vfs & vnode ops */ zfs_remove_op_tables(); #endif /* sun */ /* * Cleanup zcache */ if (znode_cache) kmem_cache_destroy(znode_cache); znode_cache = NULL; rw_destroy(&zfsvfs_lock); } #ifdef sun struct vnodeops *zfs_dvnodeops; struct vnodeops *zfs_fvnodeops; struct vnodeops *zfs_symvnodeops; struct vnodeops *zfs_xdvnodeops; struct vnodeops *zfs_evnodeops; struct vnodeops *zfs_sharevnodeops; void zfs_remove_op_tables() { /* * Remove vfs ops */ ASSERT(zfsfstype); (void) vfs_freevfsops_by_type(zfsfstype); zfsfstype = 0; /* * Remove vnode ops */ if (zfs_dvnodeops) vn_freevnodeops(zfs_dvnodeops); if (zfs_fvnodeops) vn_freevnodeops(zfs_fvnodeops); if (zfs_symvnodeops) vn_freevnodeops(zfs_symvnodeops); if (zfs_xdvnodeops) vn_freevnodeops(zfs_xdvnodeops); if (zfs_evnodeops) vn_freevnodeops(zfs_evnodeops); if (zfs_sharevnodeops) vn_freevnodeops(zfs_sharevnodeops); zfs_dvnodeops = NULL; zfs_fvnodeops = NULL; zfs_symvnodeops = NULL; zfs_xdvnodeops = NULL; zfs_evnodeops = NULL; zfs_sharevnodeops = NULL; } extern const fs_operation_def_t zfs_dvnodeops_template[]; extern const fs_operation_def_t zfs_fvnodeops_template[]; extern const fs_operation_def_t zfs_xdvnodeops_template[]; extern const fs_operation_def_t zfs_symvnodeops_template[]; extern const fs_operation_def_t zfs_evnodeops_template[]; extern const fs_operation_def_t zfs_sharevnodeops_template[]; int zfs_create_op_tables() { int error; /* * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). * In this case we just return as the ops vectors are already set up. */ if (zfs_dvnodeops) return (0); error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, &zfs_dvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, &zfs_fvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, &zfs_symvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, &zfs_xdvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, &zfs_evnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, &zfs_sharevnodeops); return (error); } #endif /* sun */ int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) { zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; znode_t *zp; int error; vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_type = VDIR; vattr.va_mode = S_IFDIR|0555; vattr.va_uid = crgetuid(kcred); vattr.va_gid = crgetgid(kcred); sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); sharezp->z_moved = 0; sharezp->z_unlinked = 0; sharezp->z_atime_dirty = 0; sharezp->z_zfsvfs = zfsvfs; sharezp->z_is_sa = zfsvfs->z_use_sa; VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, kcred, NULL, &acl_ids)); zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, sharezp); POINTER_INVALIDATE(&sharezp->z_zfsvfs); error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); zfsvfs->z_shares_dir = sharezp->z_id; zfs_acl_ids_free(&acl_ids); sa_handle_destroy(sharezp->z_sa_hdl); kmem_cache_free(znode_cache, sharezp); return (error); } /* * define a couple of values we need available * for both 64 and 32 bit environments. */ #ifndef NBITSMINOR64 #define NBITSMINOR64 32 #endif #ifndef MAXMAJ64 #define MAXMAJ64 0xffffffffUL #endif #ifndef MAXMIN64 #define MAXMIN64 0xffffffffUL #endif /* * Create special expldev for ZFS private use. * Can't use standard expldev since it doesn't do * what we want. The standard expldev() takes a * dev32_t in LP64 and expands it to a long dev_t. * We need an interface that takes a dev32_t in ILP32 * and expands it to a long dev_t. */ static uint64_t zfs_expldev(dev_t dev) { return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); } /* * Special cmpldev for ZFS private use. * Can't use standard cmpldev since it takes * a long dev_t and compresses it to dev32_t in * LP64. We need to do a compaction of a long dev_t * to a dev32_t in ILP32. */ dev_t zfs_cmpldev(uint64_t dev) { return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); } static void zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) { ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); mutex_enter(&zp->z_lock); ASSERT(zp->z_sa_hdl == NULL); ASSERT(zp->z_acl_cached == NULL); if (sa_hdl == NULL) { VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; sa_set_userp(sa_hdl, zp); } zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; /* * Slap on VROOT if we are the root znode */ if (zp->z_id == zfsvfs->z_root) ZTOV(zp)->v_flag |= VROOT; mutex_exit(&zp->z_lock); vn_exists(ZTOV(zp)); } void zfs_znode_dmu_fini(znode_t *zp) { ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || zp->z_unlinked || RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); sa_handle_destroy(zp->z_sa_hdl); zp->z_sa_hdl = NULL; } static void zfs_vnode_forget(vnode_t *vp) { /* copied from insmntque_stddtr */ vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Construct a new znode/vnode and intialize. * * This does not do a call to dmu_set_user() that is * up to the caller to do, in case you don't want to * return the znode */ static znode_t * zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, dmu_object_type_t obj_type, sa_handle_t *hdl) { znode_t *zp; vnode_t *vp; uint64_t mode; uint64_t parent; sa_bulk_attr_t bulk[9]; int count = 0; int error; zp = kmem_cache_alloc(znode_cache, KM_SLEEP); KASSERT(curthread->td_vp_reserv > 0, ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); if (error != 0) { kmem_cache_free(znode_cache, zp); return (NULL); } zp->z_vnode = vp; vp->v_data = zp; ASSERT(zp->z_dirlocks == NULL); ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); zp->z_moved = 0; /* * Defer setting z_zfsvfs until the znode is ready to be a candidate for * the zfs_znode_move() callback. */ zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; vp = ZTOV(zp); zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &zp->z_uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &zp->z_gid, 8); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { if (hdl == NULL) sa_handle_destroy(zp->z_sa_hdl); zfs_vnode_forget(vp); zp->z_vnode = NULL; kmem_cache_free(znode_cache, zp); return (NULL); } zp->z_mode = mode; vp->v_type = IFTOVT((mode_t)mode); switch (vp->v_type) { case VDIR: zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ break; #ifdef sun case VBLK: case VCHR: { uint64_t rdev; VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev, sizeof (rdev)) == 0); vp->v_rdev = zfs_cmpldev(rdev); } break; #endif /* sun */ case VFIFO: #ifdef sun case VSOCK: case VDOOR: #endif /* sun */ vp->v_op = &zfs_fifoops; break; case VREG: if (parent == zfsvfs->z_shares_dir) { ASSERT(zp->z_uid == 0 && zp->z_gid == 0); vp->v_op = &zfs_shareops; } break; #ifdef sun case VLNK: vn_setops(vp, zfs_symvnodeops); break; default: vn_setops(vp, zfs_evnodeops); break; #endif /* sun */ } mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); membar_producer(); /* * Everything else must be valid before assigning z_zfsvfs makes the * znode eligible for zfs_znode_move(). */ zp->z_zfsvfs = zfsvfs; mutex_exit(&zfsvfs->z_znodes_lock); /* * Acquire vnode lock before making it available to the world. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VN_LOCK_AREC(vp); if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); VFS_HOLD(zfsvfs->z_vfs); return (zp); } static uint64_t empty_xattr; static uint64_t pad[4]; static zfs_acl_phys_t acl_phys; /* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; int err; int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; sa_bulk_attr_t sa_attrs[ZPL_END]; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); if (zfsvfs->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ } else { obj = 0; gethrestime(&now); gen = dmu_tx_get_txg(tx); } obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (vap->va_type == VDIR) { if (zfsvfs->z_replay) { VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj, zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx)); } else { obj = zap_create_norm(zfsvfs->z_os, zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); } } else { if (zfsvfs->z_replay) { VERIFY0(dmu_object_claim(zfsvfs->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx)); } else { obj = dmu_object_alloc(zfsvfs->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); } } ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_id = obj; } else { dzp_pflags = dzp->z_pflags; } /* * If parent is an xattr, so am I. */ if (dzp_pflags & ZFS_XATTR) { flag |= IS_XATTR; } if (zfsvfs->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; if (vap->va_type == VDIR) { size = 2; /* contents ("." and "..") */ links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } else { size = links = 0; } if (vap->va_type == VBLK || vap->va_type == VCHR) { rdev = zfs_expldev(vap->va_rdev); } parent = dzp->z_id; mode = acl_ids->z_mode; if (flag & IS_XATTR) pflags |= ZFS_XATTR; /* * No execs denied will be deterimed when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); if (vap->va_mask & AT_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } if (vap->va_mask & AT_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* * Setup the array of attributes to be replaced/set on the new file * * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); } else { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); } SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || (vap->va_type == VBLK || vap->va_type == VCHR)) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); } if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, sizeof (uint64_t) * 4); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, acl_ids->z_fuid, acl_ids->z_fgid); } VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); ASSERT(*zpp != NULL); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; (*zpp)->z_sa_hdl = sa_hdl; } (*zpp)->z_pflags = pflags; (*zpp)->z_mode = mode; if (vap->va_mask & AT_XVATTR) zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } if (!(flag & IS_ROOT_NODE)) { vnode_t *vp; vp = ZTOV(*zpp); vp->v_vflag |= VV_FORCEINSMQ; err = insmntque(vp, zfsvfs->z_vfs); vp->v_vflag &= ~VV_FORCEINSMQ; KASSERT(err == 0, ("insmntque() failed: error %d", err)); } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); } /* * Update in-core attributes. It is assumed the caller will be doing an * sa_bulk_update to push the changes out. */ void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) { xoptattr_t *xoap; xoap = xva_getxoptattr(xvap); ASSERT(xoap); if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { uint64_t times[2]; ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), ×, sizeof (times), tx); XVA_SET_RTN(xvap, XAT_CREATETIME); } if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_READONLY); } if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_HIDDEN); } if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_SYSTEM); } if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_ARCHIVE); } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_OPAQUE); } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, xoap->xoa_av_quarantined, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { zfs_sa_set_scanstamp(zp, xvap, tx); XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_REPARSE); } if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_OFFLINE); } if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_SPARSE); } } int zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; vnode_t *vp; sa_handle_t *hdl; struct thread *td; int locked; int err; td = curthread; getnewvnode_reserve(1); again: *zpp = NULL; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); getnewvnode_drop_reserve(); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); #ifdef __FreeBSD__ getnewvnode_drop_reserve(); #endif return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = SET_ERROR(ENOENT); } else { vp = ZTOV(zp); *zpp = zp; err = 0; } sa_buf_rele(db, NULL); /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */ if (err == 0) VN_HOLD(vp); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); if (err == 0) { locked = VOP_ISLOCKED(vp); VI_LOCK(vp); if ((vp->v_iflag & VI_DOOMED) != 0 && locked != LK_EXCLUSIVE) { /* * The vnode is doomed and this thread doesn't * hold the exclusive lock on it, so the vnode * must be being reclaimed by another thread. * Otherwise the doomed vnode is being reclaimed * by this thread and zfs_zget is called from * ZIL internals. */ VI_UNLOCK(vp); VN_RELE(vp); goto again; } VI_UNLOCK(vp); } getnewvnode_drop_reserve(); return (err); } /* * Not found create new znode/vnode * but only if file exists. * * There is a small window where zfs_vget() could * find this object while a file create is still in * progress. This is checked for in zfs_znode_alloc() * * if zfs_znode_alloc() fails it will drop the hold on the * bonus buffer. */ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, doi.doi_bonus_type, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { *zpp = zp; } if (err == 0) { vnode_t *vp = ZTOV(zp); err = insmntque(vp, zfsvfs->z_vfs); if (err == 0) { vp->v_hash = obj_num; VOP_UNLOCK(vp, 0); } else { zp->z_vnode = NULL; zfs_znode_dmu_fini(zp); zfs_znode_free(zp); *zpp = NULL; } } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); getnewvnode_drop_reserve(); return (err); } int zfs_rezget(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_object_info_t doi; dmu_buf_t *db; vnode_t *vp; uint64_t obj_num = zp->z_id; uint64_t mode, size; sa_bulk_attr_t bulk[8]; int err; int count = 0; uint64_t gen; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); mutex_enter(&zp->z_acl_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } mutex_exit(&zp->z_acl_lock); ASSERT(zp->z_sa_hdl == NULL); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (SET_ERROR(EINVAL)); } zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); size = zp->z_size; /* reload cached values */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &gen, sizeof (gen)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, sizeof (zp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, sizeof (zp->z_links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, sizeof (zp->z_atime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &zp->z_uid, sizeof (zp->z_uid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &zp->z_gid, sizeof (zp->z_gid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, sizeof (mode)); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (SET_ERROR(EIO)); } zp->z_mode = mode; if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (SET_ERROR(EIO)); } /* * XXXPJD: Not sure how is that possible, but under heavy * zfs recv -F load it happens that z_gen is the same, but * vnode type is different than znode type. This would mean * that for example regular file was replaced with directory * which has the same object number. */ vp = ZTOV(zp); if (vp != NULL && vp->v_type != IFTOVT((mode_t)zp->z_mode)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EIO); } zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; if (vp != NULL) { vn_pages_remove(vp, 0, 0); if (zp->z_size != size) vnode_pager_setsize(vp, zp->z_size); } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (0); } void zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; uint64_t obj = zp->z_id; uint64_t acl_obj = zfs_external_acl(zp); ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); VERIFY(0 == dmu_object_free(os, acl_obj, tx)); } VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); zfs_znode_free(zp); } void zfs_zinactive(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; ASSERT(zp->z_sa_hdl); /* * Don't allow a zfs_zget() while were trying to release this znode */ ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); mutex_enter(&zp->z_lock); /* * If this was the last reference to a file with no links, * remove the file from the file system. */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); zfs_znode_free(zp); } void zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; ASSERT(zp->z_sa_hdl == NULL); zp->z_vnode = NULL; mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); list_remove(&zfsvfs->z_all_znodes, zp); mutex_exit(&zfsvfs->z_znodes_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } kmem_cache_free(znode_cache, zp); VFS_RELE(zfsvfs->z_vfs); } void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2], boolean_t have_tx) { timestruc_t now; gethrestime(&now); if (have_tx) { /* will sa_bulk_update happen really soon? */ zp->z_atime_dirty = 0; zp->z_seq++; } else { zp->z_atime_dirty = 1; } if (flag & AT_ATIME) { ZFS_TIME_ENCODE(&now, zp->z_atime); } if (flag & AT_MTIME) { ZFS_TIME_ENCODE(&now, mtime); if (zp->z_zfsvfs->z_use_fuids) { zp->z_pflags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); } } if (flag & AT_CTIME) { ZFS_TIME_ENCODE(&now, ctime); if (zp->z_zfsvfs->z_use_fuids) zp->z_pflags |= ZFS_ARCHIVE; } } /* * Grow the block size for a file. * * IN: zp - znode of file to free data in. * size - requested block size * tx - open transaction. * * NOTE: this function assumes that the znode is write locked. */ void zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) { int error; u_longlong_t dummy; if (size <= zp->z_blksz) return; /* * If the file size is already greater than the current blocksize, * we will not grow. If there is more than one block in a file, * the blocksize cannot change. */ if (zp->z_blksz && zp->z_size > zp->z_blksz) return; error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, size, 0, tx); if (error == ENOTSUP) return; ASSERT0(error); /* What blocksize did we actually get? */ dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); } #ifdef sun /* * This is a dummy interface used when pvn_vplist_dirty() should *not* * be calling back into the fs for a putpage(). E.g.: when truncating * a file, the pages being "thrown away* don't need to be written out. */ /* ARGSUSED */ static int zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, int flags, cred_t *cr) { ASSERT(0); return (0); } #endif /* sun */ /* * Increase the file length * * IN: zp - znode of file to free data in. * end - new end-of-file * * RETURN: 0 on success, error code on failure */ static int zfs_extend(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_tx_t *tx; rl_t *rl; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { zfs_range_unlock(rl); return (0); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); if (end > zp->z_blksz && (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { /* * We are growing the file past the current block size. */ if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { ASSERT(!ISP2(zp->z_blksz)); newblksz = MIN(end, SPA_MAXBLOCKSIZE); } else { newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); } dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); } else { newblksz = 0; } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } if (newblksz) zfs_grow_blocksize(zp, newblksz, tx); zp->z_size = end; VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), &zp->z_size, sizeof (zp->z_size), tx)); vnode_pager_setsize(ZTOV(zp), end); zfs_range_unlock(rl); dmu_tx_commit(tx); return (0); } /* * Free space in a file. * * IN: zp - znode of file to free data in. * off - start of section to free. * len - length of section to free. * * RETURN: 0 on success, error code on failure */ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; rl_t *rl; int error; /* * Lock the range being freed. */ rl = zfs_range_lock(zp, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { zfs_range_unlock(rl); return (0); } if (off + len > zp->z_size) len = zp->z_size - off; error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); if (error == 0) { /* * In FreeBSD we cannot free block in the middle of a file, * but only at the end of a file, so this code path should * never happen. */ vnode_pager_setsize(ZTOV(zp), off); } zfs_range_unlock(rl); return (error); } /* * Truncate a file * * IN: zp - znode of file to free data in. * end - new end-of-file. * * RETURN: 0 on success, error code on failure */ static int zfs_trunc(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; rl_t *rl; int error; sa_bulk_attr_t bulk[2]; int count = 0; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { zfs_range_unlock(rl); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); if (error) { zfs_range_unlock(rl); return (error); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } zp->z_size = end; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, sizeof (zp->z_size)); if (end == 0) { zp->z_pflags &= ~ZFS_SPARSE; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); } VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); /* * Clear any mapped pages in the truncated region. This has to * happen outside of the transaction to avoid the possibility of * a deadlock with someone trying to push a page that we are * about to invalidate. */ vnode_pager_setsize(vp, end); zfs_range_unlock(rl); return (0); } /* * Free space in a file * * IN: zp - znode of file to free data in. * off - start of range * len - end of range (0 => EOF) * flag - current file open mode flags. * log - TRUE if this action should be logged * * RETURN: 0 on success, error code on failure */ int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog = zfsvfs->z_log; uint64_t mode; uint64_t mtime[2], ctime[2]; sa_bulk_attr_t bulk[3]; int count = 0; int error; if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, sizeof (mode))) != 0) return (error); if (off > zp->z_size) { error = zfs_extend(zp, off+len); if (error == 0 && log) goto log; else return (error); } /* * Check for any locks in the region to be freed. */ if (MANDLOCK(vp, (mode_t)mode)) { uint64_t length = (len ? len : zp->z_size - off); if (error = chklock(vp, FWRITE, off, length, flag, NULL)) return (error); } if (len == 0) { error = zfs_trunc(zp, off); } else { if ((error = zfs_free_range(zp, off, len)) == 0 && off + len > zp->z_size) error = zfs_extend(zp, off+len); } if (error || !log) return (error); log: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); ASSERT(error == 0); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); dmu_tx_commit(tx); return (0); } void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { zfsvfs_t zfsvfs; uint64_t moid, obj, sa_obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; int i; znode_t *rootzp = NULL; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; /* * First attempt to create master node. */ /* * In an empty objset, there are no blocks to read and thus * there can be no i/o errors (which we assert below). */ moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); ASSERT(error == 0); /* * Set starting attributes. */ version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); elem = NULL; while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { /* For the moment we expect all zpl props to be uint64_ts */ uint64_t val; char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); VERIFY(nvpair_value_uint64(elem, &val) == 0); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) version = val; } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) sense = val; } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); /* * Create zap object used for SA attribute registration */ if (version >= ZPL_VERSION_SA) { sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT(error == 0); } else { sa_obj = 0; } /* * Create a delete queue. */ obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); /* * Create root znode. Create minimal znode/vnode/zfsvfs * to allow zfs_mknode to work. */ VATTR_NULL(&vattr); vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_type = VDIR; vattr.va_mode = S_IFDIR|0755; vattr.va_uid = crgetuid(cr); vattr.va_gid = crgetgid(cr); bzero(&zfsvfs, sizeof (zfsvfs_t)); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); rootzp->z_moved = 0; rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); zfsvfs.z_os = os; zfsvfs.z_parent = &zfsvfs; zfsvfs.z_version = version; zfsvfs.z_use_fuids = USE_FUIDS(version, os); zfsvfs.z_use_sa = USE_SA(version, os); zfsvfs.z_norm = norm; error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zfsvfs.z_attr_table); ASSERT(error == 0); /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); rootzp->z_zfsvfs = &zfsvfs; VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); ASSERT(error == 0); zfs_acl_ids_free(&acl_ids); POINTER_INVALIDATE(&rootzp->z_zfsvfs); sa_handle_destroy(rootzp->z_sa_hdl); kmem_cache_free(znode_cache, rootzp); /* * Create shares directory */ error = zfs_create_share_dir(&zfsvfs, tx); ASSERT(error == 0); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs.z_hold_mtx[i]); } #endif /* _KERNEL */ static int zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) { uint64_t sa_obj = 0; int error; error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (error != 0 && error != ENOENT) return (error); error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); return (error); } static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); if ((doi.doi_bonus_type != DMU_OT_SA && doi.doi_bonus_type != DMU_OT_ZNODE) || doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)) { sa_buf_rele(*db, tag); return (SET_ERROR(ENOTSUP)); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { sa_buf_rele(*db, tag); return (error); } return (0); } void zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); sa_buf_rele(db, tag); } /* * Given an object number, return its parent object number and whether * or not the object is an extended attribute directory. */ static int zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp, int *is_xattrdir) { uint64_t parent; uint64_t pflags; uint64_t mode; uint64_t parent_mode; sa_bulk_attr_t bulk[3]; sa_handle_t *sa_hdl; dmu_buf_t *sa_db; int count = 0; int error; SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, &parent, sizeof (parent)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, &pflags, sizeof (pflags)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &mode, sizeof (mode)); if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) return (error); /* * When a link is removed its parent pointer is not changed and will * be invalid. There are two cases where a link is removed but the * file stays around, when it goes to the delete queue and when there * are additional links. */ error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); if (error != 0) return (error); error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); if (error != 0) return (error); *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); /* * Extended attributes can be applied to files, directories, etc. * Otherwise the parent must be a directory. */ if (!*is_xattrdir && !S_ISDIR(parent_mode)) return (SET_ERROR(EINVAL)); *pobjp = parent; return (0); } /* * Given an object number, return some zpl level statistics */ static int zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, zfs_stat_t *sb) { sa_bulk_attr_t bulk[4]; int count = 0; SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &sb->zs_mode, sizeof (sb->zs_mode)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, &sb->zs_gen, sizeof (sb->zs_gen)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, &sb->zs_links, sizeof (sb->zs_links)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, &sb->zs_ctime, sizeof (sb->zs_ctime)); return (sa_bulk_lookup(hdl, bulk, count)); } static int zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, sa_attr_type_t *sa_table, char *buf, int len) { sa_handle_t *sa_hdl; sa_handle_t *prevhdl = NULL; dmu_buf_t *prevdb = NULL; dmu_buf_t *sa_db = NULL; char *path = buf + len - 1; int error; *path = '\0'; sa_hdl = hdl; for (;;) { uint64_t pobj; char component[MAXNAMELEN + 2]; size_t complen; int is_xattrdir; if (prevdb) zfs_release_sa_handle(prevhdl, prevdb, FTAG); if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) break; if (pobj == obj) { if (path[0] != '/') *--path = '/'; break; } component[0] = '/'; if (is_xattrdir) { (void) sprintf(component + 1, ""); } else { error = zap_value_search(osp, pobj, obj, ZFS_DIRENT_OBJ(-1ULL), component + 1); if (error != 0) break; } complen = strlen(component); path -= complen; ASSERT(path >= buf); bcopy(component, path, complen); obj = pobj; if (sa_hdl != hdl) { prevhdl = sa_hdl; prevdb = sa_db; } error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); if (error != 0) { sa_hdl = prevhdl; sa_db = prevdb; break; } } if (sa_hdl != NULL && sa_hdl != hdl) { ASSERT(sa_db != NULL); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } if (error == 0) (void) memmove(buf, path, buf + len - path); return (error); } int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) { sa_attr_type_t *sa_table; sa_handle_t *hdl; dmu_buf_t *db; int error; error = zfs_sa_setup(osp, &sa_table); if (error != 0) return (error); error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); zfs_release_sa_handle(hdl, db, FTAG); return (error); } int zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, char *buf, int len) { char *path = buf + len - 1; sa_attr_type_t *sa_table; sa_handle_t *hdl; dmu_buf_t *db; int error; *path = '\0'; error = zfs_sa_setup(osp, &sa_table); if (error != 0) return (error); error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_stats_impl(hdl, sa_table, sb); if (error != 0) { zfs_release_sa_handle(hdl, db, FTAG); return (error); } error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); zfs_release_sa_handle(hdl, db, FTAG); return (error); } Index: stable/9/sys/cddl/contrib/opensolaris =================================================================== --- stable/9/sys/cddl/contrib/opensolaris (revision 273911) +++ stable/9/sys/cddl/contrib/opensolaris (revision 273912) Property changes on: stable/9/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r263710,273377-273378,273423,273455 Index: stable/9/sys/cddl/dev/dtrace/dtrace_sysctl.c =================================================================== --- stable/9/sys/cddl/dev/dtrace/dtrace_sysctl.c (revision 273911) +++ stable/9/sys/cddl/dev/dtrace/dtrace_sysctl.c (revision 273912) @@ -1,94 +1,94 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * $FreeBSD$ * */ SYSCTL_NODE(_debug, OID_AUTO, dtrace, CTLFLAG_RD, 0, "DTrace debug parameters"); int dtrace_debug = 0; TUNABLE_INT("debug.dtrace.debug", &dtrace_debug); SYSCTL_INT(_debug_dtrace, OID_AUTO, debug, CTLFLAG_RW, &dtrace_debug, 0, ""); /* Report registered DTrace providers. */ static int sysctl_dtrace_providers(SYSCTL_HANDLER_ARGS) { char *p_name = NULL; dtrace_provider_t *prov = dtrace_provider; int error = 0; size_t len = 0; mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); /* Compute the length of the space-separated provider name string. */ while (prov != NULL) { len += strlen(prov->dtpv_name) + 1; prov = prov->dtpv_next; } if ((p_name = kmem_alloc(len, KM_SLEEP)) == NULL) error = ENOMEM; else { /* Start with an empty string. */ *p_name = '\0'; /* Point to the first provider again. */ prov = dtrace_provider; /* Loop through the providers, appending the names. */ while (prov != NULL) { if (prov != dtrace_provider) (void) strlcat(p_name, " ", len); (void) strlcat(p_name, prov->dtpv_name, len); prov = prov->dtpv_next; } } mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); if (p_name != NULL) { error = sysctl_handle_string(oidp, p_name, len, req); kmem_free(p_name, 0); } return (error); } SYSCTL_PROC(_debug_dtrace, OID_AUTO, providers, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_dtrace_providers, "A", "available DTrace providers"); SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RD, 0, "DTrace parameters"); -SYSCTL_LONG(_kern_dtrace, OID_AUTO, dof_maxsize, CTLFLAG_RW, +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, dof_maxsize, CTLFLAG_RW, &dtrace_dof_maxsize, 0, "largest allowed DOF table"); -SYSCTL_LONG(_kern_dtrace, OID_AUTO, helper_actions_max, CTLFLAG_RW, +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, helper_actions_max, CTLFLAG_RW, &dtrace_helper_actions_max, 0, "maximum number of allowed helper actions"); SYSCTL_INT(_kern_dtrace, OID_AUTO, memstr_max, CTLFLAG_RW, &dtrace_memstr_max, 0, "largest allowed argument to memstr(), 0 indicates no limit"); Index: stable/9/sys/compat/ndis/kern_ndis.c =================================================================== --- stable/9/sys/compat/ndis/kern_ndis.c (revision 273911) +++ stable/9/sys/compat/ndis/kern_ndis.c (revision 273912) @@ -1,1437 +1,1446 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NDIS_DUMMY_PATH "\\\\some\\bogus\\path" +#define NDIS_FLAG_RDONLY 1 static void ndis_status_func(ndis_handle, ndis_status, void *, uint32_t); static void ndis_statusdone_func(ndis_handle); static void ndis_setdone_func(ndis_handle, ndis_status); static void ndis_getdone_func(ndis_handle, ndis_status); static void ndis_resetdone_func(ndis_handle, ndis_status, uint8_t); static void ndis_sendrsrcavail_func(ndis_handle); static void ndis_intrsetup(kdpc *, device_object *, irp *, struct ndis_softc *); static void ndis_return(device_object *, void *); static image_patch_table kernndis_functbl[] = { IMPORT_SFUNC(ndis_status_func, 4), IMPORT_SFUNC(ndis_statusdone_func, 1), IMPORT_SFUNC(ndis_setdone_func, 2), IMPORT_SFUNC(ndis_getdone_func, 2), IMPORT_SFUNC(ndis_resetdone_func, 3), IMPORT_SFUNC(ndis_sendrsrcavail_func, 1), IMPORT_SFUNC(ndis_intrsetup, 4), IMPORT_SFUNC(ndis_return, 1), { NULL, NULL, NULL } }; static struct nd_head ndis_devhead; /* * This allows us to export our symbols to other modules. * Note that we call ourselves 'ndisapi' to avoid a namespace * collision with if_ndis.ko, which internally calls itself * 'ndis.' * * Note: some of the subsystems depend on each other, so the * order in which they're started is important. The order of * importance is: * * HAL - spinlocks and IRQL manipulation * ntoskrnl - DPC and workitem threads, object waiting * windrv - driver/device registration * * The HAL should also be the last thing shut down, since * the ntoskrnl subsystem will use spinlocks right up until * the DPC and workitem threads are terminated. */ static int ndis_modevent(module_t mod, int cmd, void *arg) { int error = 0; image_patch_table *patch; switch (cmd) { case MOD_LOAD: /* Initialize subsystems */ hal_libinit(); ntoskrnl_libinit(); windrv_libinit(); ndis_libinit(); usbd_libinit(); patch = kernndis_functbl; while (patch->ipt_func != NULL) { windrv_wrap((funcptr)patch->ipt_func, (funcptr *)&patch->ipt_wrap, patch->ipt_argcnt, patch->ipt_ftype); patch++; } TAILQ_INIT(&ndis_devhead); break; case MOD_SHUTDOWN: if (TAILQ_FIRST(&ndis_devhead) == NULL) { /* Shut down subsystems */ ndis_libfini(); usbd_libfini(); windrv_libfini(); ntoskrnl_libfini(); hal_libfini(); patch = kernndis_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } } break; case MOD_UNLOAD: /* Shut down subsystems */ ndis_libfini(); usbd_libfini(); windrv_libfini(); ntoskrnl_libfini(); hal_libfini(); patch = kernndis_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } break; default: error = EINVAL; break; } return (error); } DEV_MODULE(ndisapi, ndis_modevent, NULL); MODULE_VERSION(ndisapi, 1); static void ndis_sendrsrcavail_func(adapter) ndis_handle adapter; { } static void ndis_status_func(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (ifp->if_flags & IFF_DEBUG) device_printf(sc->ndis_dev, "status: %x\n", status); } static void ndis_statusdone_func(adapter) ndis_handle adapter; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (ifp->if_flags & IFF_DEBUG) device_printf(sc->ndis_dev, "status complete\n"); } static void ndis_setdone_func(adapter, status) ndis_handle adapter; ndis_status status; { ndis_miniport_block *block; block = adapter; block->nmb_setstat = status; KeSetEvent(&block->nmb_setevent, IO_NO_INCREMENT, FALSE); } static void ndis_getdone_func(adapter, status) ndis_handle adapter; ndis_status status; { ndis_miniport_block *block; block = adapter; block->nmb_getstat = status; KeSetEvent(&block->nmb_getevent, IO_NO_INCREMENT, FALSE); } static void ndis_resetdone_func(ndis_handle adapter, ndis_status status, uint8_t addressingreset) { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (ifp->if_flags & IFF_DEBUG) device_printf(sc->ndis_dev, "reset done...\n"); KeSetEvent(&block->nmb_resetevent, IO_NO_INCREMENT, FALSE); } int ndis_create_sysctls(arg) void *arg; { struct ndis_softc *sc; ndis_cfg *vals; char buf[256]; struct sysctl_oid *oidp; struct sysctl_ctx_entry *e; if (arg == NULL) return (EINVAL); sc = arg; vals = sc->ndis_regvals; TAILQ_INIT(&sc->ndis_cfglist_head); /* Add the driver-specific registry keys. */ while(1) { if (vals->nc_cfgkey == NULL) break; if (vals->nc_idx != sc->ndis_devidx) { vals++; continue; } /* See if we already have a sysctl with this name */ oidp = NULL; TAILQ_FOREACH(e, device_get_sysctl_ctx(sc->ndis_dev), link) { oidp = e->entry; if (strcasecmp(oidp->oid_name, vals->nc_cfgkey) == 0) break; oidp = NULL; } if (oidp != NULL) { vals++; continue; } ndis_add_sysctl(sc, vals->nc_cfgkey, vals->nc_cfgdesc, vals->nc_val, CTLFLAG_RW); vals++; } /* Now add a couple of builtin keys. */ /* * Environment can be either Windows (0) or WindowsNT (1). * We qualify as the latter. */ ndis_add_sysctl(sc, "Environment", - "Windows environment", "1", CTLFLAG_RD); + "Windows environment", "1", NDIS_FLAG_RDONLY); /* NDIS version should be 5.1. */ ndis_add_sysctl(sc, "NdisVersion", - "NDIS API Version", "0x00050001", CTLFLAG_RD); + "NDIS API Version", "0x00050001", NDIS_FLAG_RDONLY); /* * Some miniport drivers rely on the existence of the SlotNumber, * NetCfgInstanceId and DriverDesc keys. */ - ndis_add_sysctl(sc, "SlotNumber", "Slot Numer", "01", CTLFLAG_RD); + ndis_add_sysctl(sc, "SlotNumber", "Slot Numer", "01", NDIS_FLAG_RDONLY); ndis_add_sysctl(sc, "NetCfgInstanceId", "NetCfgInstanceId", - "{12345678-1234-5678-CAFE0-123456789ABC}", CTLFLAG_RD); + "{12345678-1234-5678-CAFE0-123456789ABC}", NDIS_FLAG_RDONLY); ndis_add_sysctl(sc, "DriverDesc", "Driver Description", - "NDIS Network Adapter", CTLFLAG_RD); + "NDIS Network Adapter", NDIS_FLAG_RDONLY); /* Bus type (PCI, PCMCIA, etc...) */ sprintf(buf, "%d", (int)sc->ndis_iftype); - ndis_add_sysctl(sc, "BusType", "Bus Type", buf, CTLFLAG_RD); + ndis_add_sysctl(sc, "BusType", "Bus Type", buf, NDIS_FLAG_RDONLY); if (sc->ndis_res_io != NULL) { sprintf(buf, "0x%lx", rman_get_start(sc->ndis_res_io)); ndis_add_sysctl(sc, "IOBaseAddress", - "Base I/O Address", buf, CTLFLAG_RD); + "Base I/O Address", buf, NDIS_FLAG_RDONLY); } if (sc->ndis_irq != NULL) { sprintf(buf, "%lu", rman_get_start(sc->ndis_irq)); ndis_add_sysctl(sc, "InterruptNumber", - "Interrupt Number", buf, CTLFLAG_RD); + "Interrupt Number", buf, NDIS_FLAG_RDONLY); } return (0); } int -ndis_add_sysctl(arg, key, desc, val, flag) +ndis_add_sysctl(arg, key, desc, val, flag_rdonly) void *arg; char *key; char *desc; char *val; - int flag; + int flag_rdonly; { struct ndis_softc *sc; struct ndis_cfglist *cfg; char descstr[256]; sc = arg; cfg = malloc(sizeof(struct ndis_cfglist), M_DEVBUF, M_NOWAIT|M_ZERO); if (cfg == NULL) { printf("failed for %s\n", key); return (ENOMEM); } cfg->ndis_cfg.nc_cfgkey = strdup(key, M_DEVBUF); if (desc == NULL) { snprintf(descstr, sizeof(descstr), "%s (dynamic)", key); cfg->ndis_cfg.nc_cfgdesc = strdup(descstr, M_DEVBUF); } else cfg->ndis_cfg.nc_cfgdesc = strdup(desc, M_DEVBUF); strcpy(cfg->ndis_cfg.nc_val, val); TAILQ_INSERT_TAIL(&sc->ndis_cfglist_head, cfg, link); - cfg->ndis_oid = - SYSCTL_ADD_STRING(device_get_sysctl_ctx(sc->ndis_dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ndis_dev)), - OID_AUTO, cfg->ndis_cfg.nc_cfgkey, flag, - cfg->ndis_cfg.nc_val, sizeof(cfg->ndis_cfg.nc_val), - cfg->ndis_cfg.nc_cfgdesc); - + if (flag_rdonly != 0) { + cfg->ndis_oid = + SYSCTL_ADD_STRING(device_get_sysctl_ctx(sc->ndis_dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ndis_dev)), + OID_AUTO, cfg->ndis_cfg.nc_cfgkey, CTLFLAG_RD, + cfg->ndis_cfg.nc_val, sizeof(cfg->ndis_cfg.nc_val), + cfg->ndis_cfg.nc_cfgdesc); + } else { + cfg->ndis_oid = + SYSCTL_ADD_STRING(device_get_sysctl_ctx(sc->ndis_dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ndis_dev)), + OID_AUTO, cfg->ndis_cfg.nc_cfgkey, CTLFLAG_RW, + cfg->ndis_cfg.nc_val, sizeof(cfg->ndis_cfg.nc_val), + cfg->ndis_cfg.nc_cfgdesc); + } return (0); } /* * Somewhere, somebody decided "hey, let's automatically create * a sysctl tree for each device instance as it's created -- it'll * make life so much easier!" Lies. Why must they turn the kernel * into a house of lies? */ int ndis_flush_sysctls(arg) void *arg; { struct ndis_softc *sc; struct ndis_cfglist *cfg; struct sysctl_ctx_list *clist; sc = arg; clist = device_get_sysctl_ctx(sc->ndis_dev); while (!TAILQ_EMPTY(&sc->ndis_cfglist_head)) { cfg = TAILQ_FIRST(&sc->ndis_cfglist_head); TAILQ_REMOVE(&sc->ndis_cfglist_head, cfg, link); sysctl_ctx_entry_del(clist, cfg->ndis_oid); sysctl_remove_oid(cfg->ndis_oid, 1, 0); free(cfg->ndis_cfg.nc_cfgkey, M_DEVBUF); free(cfg->ndis_cfg.nc_cfgdesc, M_DEVBUF); free(cfg, M_DEVBUF); } return (0); } void * ndis_get_routine_address(functbl, name) struct image_patch_table *functbl; char *name; { int i; for (i = 0; functbl[i].ipt_name != NULL; i++) if (strcmp(name, functbl[i].ipt_name) == 0) return (functbl[i].ipt_wrap); return (NULL); } static void ndis_return(dobj, arg) device_object *dobj; void *arg; { ndis_miniport_block *block; ndis_miniport_characteristics *ch; ndis_return_handler returnfunc; ndis_handle adapter; ndis_packet *p; uint8_t irql; list_entry *l; block = arg; ch = IoGetDriverObjectExtension(dobj->do_drvobj, (void *)1); p = arg; adapter = block->nmb_miniportadapterctx; if (adapter == NULL) return; returnfunc = ch->nmc_return_packet_func; KeAcquireSpinLock(&block->nmb_returnlock, &irql); while (!IsListEmpty(&block->nmb_returnlist)) { l = RemoveHeadList((&block->nmb_returnlist)); p = CONTAINING_RECORD(l, ndis_packet, np_list); InitializeListHead((&p->np_list)); KeReleaseSpinLock(&block->nmb_returnlock, irql); MSCALL2(returnfunc, adapter, p); KeAcquireSpinLock(&block->nmb_returnlock, &irql); } KeReleaseSpinLock(&block->nmb_returnlock, irql); } void ndis_return_packet(buf, arg) void *buf; /* not used */ void *arg; { ndis_packet *p; ndis_miniport_block *block; if (arg == NULL) return; p = arg; /* Decrement refcount. */ p->np_refcnt--; /* Release packet when refcount hits zero, otherwise return. */ if (p->np_refcnt) return; block = ((struct ndis_softc *)p->np_softc)->ndis_block; KeAcquireSpinLockAtDpcLevel(&block->nmb_returnlock); InitializeListHead((&p->np_list)); InsertHeadList((&block->nmb_returnlist), (&p->np_list)); KeReleaseSpinLockFromDpcLevel(&block->nmb_returnlock); IoQueueWorkItem(block->nmb_returnitem, (io_workitem_func)kernndis_functbl[7].ipt_wrap, WORKQUEUE_CRITICAL, block); } void ndis_free_bufs(b0) ndis_buffer *b0; { ndis_buffer *next; if (b0 == NULL) return; while(b0 != NULL) { next = b0->mdl_next; IoFreeMdl(b0); b0 = next; } } void ndis_free_packet(p) ndis_packet *p; { if (p == NULL) return; ndis_free_bufs(p->np_private.npp_head); NdisFreePacket(p); } int ndis_convert_res(arg) void *arg; { struct ndis_softc *sc; ndis_resource_list *rl = NULL; cm_partial_resource_desc *prd = NULL; ndis_miniport_block *block; device_t dev; struct resource_list *brl; struct resource_list_entry *brle; int error = 0; sc = arg; block = sc->ndis_block; dev = sc->ndis_dev; rl = malloc(sizeof(ndis_resource_list) + (sizeof(cm_partial_resource_desc) * (sc->ndis_rescnt - 1)), M_DEVBUF, M_NOWAIT|M_ZERO); if (rl == NULL) return (ENOMEM); rl->cprl_version = 5; rl->cprl_revision = 1; rl->cprl_count = sc->ndis_rescnt; prd = rl->cprl_partial_descs; brl = BUS_GET_RESOURCE_LIST(dev, dev); if (brl != NULL) { STAILQ_FOREACH(brle, brl, link) { switch (brle->type) { case SYS_RES_IOPORT: prd->cprd_type = CmResourceTypePort; prd->cprd_flags = CM_RESOURCE_PORT_IO; prd->cprd_sharedisp = CmResourceShareDeviceExclusive; prd->u.cprd_port.cprd_start.np_quad = brle->start; prd->u.cprd_port.cprd_len = brle->count; break; case SYS_RES_MEMORY: prd->cprd_type = CmResourceTypeMemory; prd->cprd_flags = CM_RESOURCE_MEMORY_READ_WRITE; prd->cprd_sharedisp = CmResourceShareDeviceExclusive; prd->u.cprd_mem.cprd_start.np_quad = brle->start; prd->u.cprd_mem.cprd_len = brle->count; break; case SYS_RES_IRQ: prd->cprd_type = CmResourceTypeInterrupt; prd->cprd_flags = 0; /* * Always mark interrupt resources as * shared, since in our implementation, * they will be. */ prd->cprd_sharedisp = CmResourceShareShared; prd->u.cprd_intr.cprd_level = brle->start; prd->u.cprd_intr.cprd_vector = brle->start; prd->u.cprd_intr.cprd_affinity = 0; break; default: break; } prd++; } } block->nmb_rlist = rl; return (error); } /* * Map an NDIS packet to an mbuf list. When an NDIS driver receives a * packet, it will hand it to us in the form of an ndis_packet, * which we need to convert to an mbuf that is then handed off * to the stack. Note: we configure the mbuf list so that it uses * the memory regions specified by the ndis_buffer structures in * the ndis_packet as external storage. In most cases, this will * point to a memory region allocated by the driver (either by * ndis_malloc_withtag() or ndis_alloc_sharedmem()). We expect * the driver to handle free()ing this region for is, so we set up * a dummy no-op free handler for it. */ int ndis_ptom(m0, p) struct mbuf **m0; ndis_packet *p; { struct mbuf *m = NULL, *prev = NULL; ndis_buffer *buf; ndis_packet_private *priv; uint32_t totlen = 0; struct ifnet *ifp; struct ether_header *eh; int diff; if (p == NULL || m0 == NULL) return (EINVAL); priv = &p->np_private; buf = priv->npp_head; p->np_refcnt = 0; for (buf = priv->npp_head; buf != NULL; buf = buf->mdl_next) { if (buf == priv->npp_head) #ifdef MT_HEADER MGETHDR(m, M_DONTWAIT, MT_HEADER); #else MGETHDR(m, M_DONTWAIT, MT_DATA); #endif else MGET(m, M_DONTWAIT, MT_DATA); if (m == NULL) { m_freem(*m0); *m0 = NULL; return (ENOBUFS); } m->m_len = MmGetMdlByteCount(buf); m->m_data = MmGetMdlVirtualAddress(buf); MEXTADD(m, m->m_data, m->m_len, ndis_return_packet, m->m_data, p, 0, EXT_NDIS); p->np_refcnt++; totlen += m->m_len; if (m->m_flags & M_PKTHDR) *m0 = m; else prev->m_next = m; prev = m; } /* * This is a hack to deal with the Marvell 8335 driver * which, when associated with an AP in WPA-PSK mode, * seems to overpad its frames by 8 bytes. I don't know * that the extra 8 bytes are for, and they're not there * in open mode, so for now clamp the frame size at 1514 * until I can figure out how to deal with this properly, * otherwise if_ethersubr() will spank us by discarding * the 'oversize' frames. */ eh = mtod((*m0), struct ether_header *); ifp = ((struct ndis_softc *)p->np_softc)->ifp; if (totlen > ETHER_MAX_FRAME(ifp, eh->ether_type, FALSE)) { diff = totlen - ETHER_MAX_FRAME(ifp, eh->ether_type, FALSE); totlen -= diff; m->m_len -= diff; } (*m0)->m_pkthdr.len = totlen; return (0); } /* * Create an NDIS packet from an mbuf chain. * This is used mainly when transmitting packets, where we need * to turn an mbuf off an interface's send queue and transform it * into an NDIS packet which will be fed into the NDIS driver's * send routine. * * NDIS packets consist of two parts: an ndis_packet structure, * which is vaguely analagous to the pkthdr portion of an mbuf, * and one or more ndis_buffer structures, which define the * actual memory segments in which the packet data resides. * We need to allocate one ndis_buffer for each mbuf in a chain, * plus one ndis_packet as the header. */ int ndis_mtop(m0, p) struct mbuf *m0; ndis_packet **p; { struct mbuf *m; ndis_buffer *buf = NULL, *prev = NULL; ndis_packet_private *priv; if (p == NULL || *p == NULL || m0 == NULL) return (EINVAL); priv = &(*p)->np_private; priv->npp_totlen = m0->m_pkthdr.len; for (m = m0; m != NULL; m = m->m_next) { if (m->m_len == 0) continue; buf = IoAllocateMdl(m->m_data, m->m_len, FALSE, FALSE, NULL); if (buf == NULL) { ndis_free_packet(*p); *p = NULL; return (ENOMEM); } MmBuildMdlForNonPagedPool(buf); if (priv->npp_head == NULL) priv->npp_head = buf; else prev->mdl_next = buf; prev = buf; } priv->npp_tail = buf; return (0); } int ndis_get_supported_oids(arg, oids, oidcnt) void *arg; ndis_oid **oids; int *oidcnt; { int len, rval; ndis_oid *o; if (arg == NULL || oids == NULL || oidcnt == NULL) return (EINVAL); len = 0; ndis_get_info(arg, OID_GEN_SUPPORTED_LIST, NULL, &len); o = malloc(len, M_DEVBUF, M_NOWAIT); if (o == NULL) return (ENOMEM); rval = ndis_get_info(arg, OID_GEN_SUPPORTED_LIST, o, &len); if (rval) { free(o, M_DEVBUF); return (rval); } *oids = o; *oidcnt = len / 4; return (0); } int ndis_set_info(arg, oid, buf, buflen) void *arg; ndis_oid oid; void *buf; int *buflen; { struct ndis_softc *sc; ndis_status rval; ndis_handle adapter; ndis_setinfo_handler setfunc; uint32_t byteswritten = 0, bytesneeded = 0; uint8_t irql; uint64_t duetime; /* * According to the NDIS spec, MiniportQueryInformation() * and MiniportSetInformation() requests are handled serially: * once one request has been issued, we must wait for it to * finish before allowing another request to proceed. */ sc = arg; KeResetEvent(&sc->ndis_block->nmb_setevent); KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); if (sc->ndis_block->nmb_pendingreq != NULL) { KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); panic("ndis_set_info() called while other request pending"); } else sc->ndis_block->nmb_pendingreq = (ndis_request *)sc; setfunc = sc->ndis_chars->nmc_setinfo_func; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL || setfunc == NULL || sc->ndis_block->nmb_devicectx == NULL) { sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (ENXIO); } rval = MSCALL6(setfunc, adapter, oid, buf, *buflen, &byteswritten, &bytesneeded); sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); if (rval == NDIS_STATUS_PENDING) { /* Wait up to 5 seconds. */ duetime = (5 * 1000000) * -10; KeWaitForSingleObject(&sc->ndis_block->nmb_setevent, 0, 0, FALSE, &duetime); rval = sc->ndis_block->nmb_setstat; } if (byteswritten) *buflen = byteswritten; if (bytesneeded) *buflen = bytesneeded; if (rval == NDIS_STATUS_INVALID_LENGTH) return (ENOSPC); if (rval == NDIS_STATUS_INVALID_OID) return (EINVAL); if (rval == NDIS_STATUS_NOT_SUPPORTED || rval == NDIS_STATUS_NOT_ACCEPTED) return (ENOTSUP); if (rval != NDIS_STATUS_SUCCESS) return (ENODEV); return (0); } typedef void (*ndis_senddone_func)(ndis_handle, ndis_packet *, ndis_status); int ndis_send_packets(arg, packets, cnt) void *arg; ndis_packet **packets; int cnt; { struct ndis_softc *sc; ndis_handle adapter; ndis_sendmulti_handler sendfunc; ndis_senddone_func senddonefunc; int i; ndis_packet *p; uint8_t irql = 0; sc = arg; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL) return (ENXIO); sendfunc = sc->ndis_chars->nmc_sendmulti_func; senddonefunc = sc->ndis_block->nmb_senddone_func; if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); MSCALL3(sendfunc, adapter, packets, cnt); for (i = 0; i < cnt; i++) { p = packets[i]; /* * Either the driver already handed the packet to * ndis_txeof() due to a failure, or it wants to keep * it and release it asynchronously later. Skip to the * next one. */ if (p == NULL || p->np_oob.npo_status == NDIS_STATUS_PENDING) continue; MSCALL3(senddonefunc, sc->ndis_block, p, p->np_oob.npo_status); } if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (0); } int ndis_send_packet(arg, packet) void *arg; ndis_packet *packet; { struct ndis_softc *sc; ndis_handle adapter; ndis_status status; ndis_sendsingle_handler sendfunc; ndis_senddone_func senddonefunc; uint8_t irql = 0; sc = arg; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL) return (ENXIO); sendfunc = sc->ndis_chars->nmc_sendsingle_func; senddonefunc = sc->ndis_block->nmb_senddone_func; if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); status = MSCALL3(sendfunc, adapter, packet, packet->np_private.npp_flags); if (status == NDIS_STATUS_PENDING) { if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (0); } MSCALL3(senddonefunc, sc->ndis_block, packet, status); if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (0); } int ndis_init_dma(arg) void *arg; { struct ndis_softc *sc; int i, error; sc = arg; sc->ndis_tmaps = malloc(sizeof(bus_dmamap_t) * sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO); if (sc->ndis_tmaps == NULL) return (ENOMEM); for (i = 0; i < sc->ndis_maxpkts; i++) { error = bus_dmamap_create(sc->ndis_ttag, 0, &sc->ndis_tmaps[i]); if (error) { free(sc->ndis_tmaps, M_DEVBUF); return (ENODEV); } } return (0); } int ndis_destroy_dma(arg) void *arg; { struct ndis_softc *sc; struct mbuf *m; ndis_packet *p = NULL; int i; sc = arg; for (i = 0; i < sc->ndis_maxpkts; i++) { if (sc->ndis_txarray[i] != NULL) { p = sc->ndis_txarray[i]; m = (struct mbuf *)p->np_rsvd[1]; if (m != NULL) m_freem(m); ndis_free_packet(sc->ndis_txarray[i]); } bus_dmamap_destroy(sc->ndis_ttag, sc->ndis_tmaps[i]); } free(sc->ndis_tmaps, M_DEVBUF); bus_dma_tag_destroy(sc->ndis_ttag); return (0); } int ndis_reset_nic(arg) void *arg; { struct ndis_softc *sc; ndis_handle adapter; ndis_reset_handler resetfunc; uint8_t addressing_reset; int rval; uint8_t irql = 0; sc = arg; NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; resetfunc = sc->ndis_chars->nmc_reset_func; if (adapter == NULL || resetfunc == NULL || sc->ndis_block->nmb_devicectx == NULL) { NDIS_UNLOCK(sc); return (EIO); } NDIS_UNLOCK(sc); KeResetEvent(&sc->ndis_block->nmb_resetevent); if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); rval = MSCALL2(resetfunc, &addressing_reset, adapter); if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); if (rval == NDIS_STATUS_PENDING) KeWaitForSingleObject(&sc->ndis_block->nmb_resetevent, 0, 0, FALSE, NULL); return (0); } int ndis_halt_nic(arg) void *arg; { struct ndis_softc *sc; ndis_handle adapter; ndis_halt_handler haltfunc; ndis_miniport_block *block; int empty = 0; uint8_t irql; sc = arg; block = sc->ndis_block; if (!cold) KeFlushQueuedDpcs(); /* * Wait for all packets to be returned. */ while (1) { KeAcquireSpinLock(&block->nmb_returnlock, &irql); empty = IsListEmpty(&block->nmb_returnlist); KeReleaseSpinLock(&block->nmb_returnlock, irql); if (empty) break; NdisMSleep(1000); } NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL) { NDIS_UNLOCK(sc); return (EIO); } sc->ndis_block->nmb_devicectx = NULL; /* * The adapter context is only valid after the init * handler has been called, and is invalid once the * halt handler has been called. */ haltfunc = sc->ndis_chars->nmc_halt_func; NDIS_UNLOCK(sc); MSCALL1(haltfunc, adapter); NDIS_LOCK(sc); sc->ndis_block->nmb_miniportadapterctx = NULL; NDIS_UNLOCK(sc); return (0); } int ndis_shutdown_nic(arg) void *arg; { struct ndis_softc *sc; ndis_handle adapter; ndis_shutdown_handler shutdownfunc; sc = arg; NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; shutdownfunc = sc->ndis_chars->nmc_shutdown_handler; NDIS_UNLOCK(sc); if (adapter == NULL || shutdownfunc == NULL) return (EIO); if (sc->ndis_chars->nmc_rsvd0 == NULL) MSCALL1(shutdownfunc, adapter); else MSCALL1(shutdownfunc, sc->ndis_chars->nmc_rsvd0); TAILQ_REMOVE(&ndis_devhead, sc->ndis_block, link); return (0); } int ndis_pnpevent_nic(arg, type) void *arg; int type; { device_t dev; struct ndis_softc *sc; ndis_handle adapter; ndis_pnpevent_handler pnpeventfunc; dev = arg; sc = device_get_softc(arg); NDIS_LOCK(sc); adapter = sc->ndis_block->nmb_miniportadapterctx; pnpeventfunc = sc->ndis_chars->nmc_pnpevent_handler; NDIS_UNLOCK(sc); if (adapter == NULL || pnpeventfunc == NULL) return (EIO); if (sc->ndis_chars->nmc_rsvd0 == NULL) MSCALL4(pnpeventfunc, adapter, type, NULL, 0); else MSCALL4(pnpeventfunc, sc->ndis_chars->nmc_rsvd0, type, NULL, 0); return (0); } int ndis_init_nic(arg) void *arg; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_init_handler initfunc; ndis_status status, openstatus = 0; ndis_medium mediumarray[NdisMediumMax]; uint32_t chosenmedium, i; if (arg == NULL) return (EINVAL); sc = arg; NDIS_LOCK(sc); block = sc->ndis_block; initfunc = sc->ndis_chars->nmc_init_func; NDIS_UNLOCK(sc); sc->ndis_block->nmb_timerlist = NULL; for (i = 0; i < NdisMediumMax; i++) mediumarray[i] = i; status = MSCALL6(initfunc, &openstatus, &chosenmedium, mediumarray, NdisMediumMax, block, block); /* * If the init fails, blow away the other exported routines * we obtained from the driver so we can't call them later. * If the init failed, none of these will work. */ if (status != NDIS_STATUS_SUCCESS) { NDIS_LOCK(sc); sc->ndis_block->nmb_miniportadapterctx = NULL; NDIS_UNLOCK(sc); return (ENXIO); } /* * This may look really goofy, but apparently it is possible * to halt a miniport too soon after it's been initialized. * After MiniportInitialize() finishes, pause for 1 second * to give the chip a chance to handle any short-lived timers * that were set in motion. If we call MiniportHalt() too soon, * some of the timers may not be cancelled, because the driver * expects them to fire before the halt is called. */ pause("ndwait", hz); NDIS_LOCK(sc); sc->ndis_block->nmb_devicectx = sc; NDIS_UNLOCK(sc); return (0); } static void ndis_intrsetup(dpc, dobj, ip, sc) kdpc *dpc; device_object *dobj; irp *ip; struct ndis_softc *sc; { ndis_miniport_interrupt *intr; intr = sc->ndis_block->nmb_interrupt; /* Sanity check. */ if (intr == NULL) return; KeAcquireSpinLockAtDpcLevel(&intr->ni_dpccountlock); KeResetEvent(&intr->ni_dpcevt); if (KeInsertQueueDpc(&intr->ni_dpc, NULL, NULL) == TRUE) intr->ni_dpccnt++; KeReleaseSpinLockFromDpcLevel(&intr->ni_dpccountlock); } int ndis_get_info(arg, oid, buf, buflen) void *arg; ndis_oid oid; void *buf; int *buflen; { struct ndis_softc *sc; ndis_status rval; ndis_handle adapter; ndis_queryinfo_handler queryfunc; uint32_t byteswritten = 0, bytesneeded = 0; uint8_t irql; uint64_t duetime; sc = arg; KeResetEvent(&sc->ndis_block->nmb_getevent); KeAcquireSpinLock(&sc->ndis_block->nmb_lock, &irql); if (sc->ndis_block->nmb_pendingreq != NULL) { KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); panic("ndis_get_info() called while other request pending"); } else sc->ndis_block->nmb_pendingreq = (ndis_request *)sc; queryfunc = sc->ndis_chars->nmc_queryinfo_func; adapter = sc->ndis_block->nmb_miniportadapterctx; if (adapter == NULL || queryfunc == NULL || sc->ndis_block->nmb_devicectx == NULL) { sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); return (ENXIO); } rval = MSCALL6(queryfunc, adapter, oid, buf, *buflen, &byteswritten, &bytesneeded); sc->ndis_block->nmb_pendingreq = NULL; KeReleaseSpinLock(&sc->ndis_block->nmb_lock, irql); /* Wait for requests that block. */ if (rval == NDIS_STATUS_PENDING) { /* Wait up to 5 seconds. */ duetime = (5 * 1000000) * -10; KeWaitForSingleObject(&sc->ndis_block->nmb_getevent, 0, 0, FALSE, &duetime); rval = sc->ndis_block->nmb_getstat; } if (byteswritten) *buflen = byteswritten; if (bytesneeded) *buflen = bytesneeded; if (rval == NDIS_STATUS_INVALID_LENGTH || rval == NDIS_STATUS_BUFFER_TOO_SHORT) return (ENOSPC); if (rval == NDIS_STATUS_INVALID_OID) return (EINVAL); if (rval == NDIS_STATUS_NOT_SUPPORTED || rval == NDIS_STATUS_NOT_ACCEPTED) return (ENOTSUP); if (rval != NDIS_STATUS_SUCCESS) return (ENODEV); return (0); } uint32_t NdisAddDevice(drv, pdo) driver_object *drv; device_object *pdo; { device_object *fdo; ndis_miniport_block *block; struct ndis_softc *sc; uint32_t status; int error; sc = device_get_softc(pdo->do_devext); if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus) { error = bus_setup_intr(sc->ndis_dev, sc->ndis_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, ntoskrnl_intr, NULL, &sc->ndis_intrhand); if (error) return (NDIS_STATUS_FAILURE); } status = IoCreateDevice(drv, sizeof(ndis_miniport_block), NULL, FILE_DEVICE_UNKNOWN, 0, FALSE, &fdo); if (status != STATUS_SUCCESS) return (status); block = fdo->do_devext; block->nmb_filterdbs.nf_ethdb = block; block->nmb_deviceobj = fdo; block->nmb_physdeviceobj = pdo; block->nmb_nextdeviceobj = IoAttachDeviceToDeviceStack(fdo, pdo); KeInitializeSpinLock(&block->nmb_lock); KeInitializeSpinLock(&block->nmb_returnlock); KeInitializeEvent(&block->nmb_getevent, EVENT_TYPE_NOTIFY, TRUE); KeInitializeEvent(&block->nmb_setevent, EVENT_TYPE_NOTIFY, TRUE); KeInitializeEvent(&block->nmb_resetevent, EVENT_TYPE_NOTIFY, TRUE); InitializeListHead(&block->nmb_parmlist); InitializeListHead(&block->nmb_returnlist); block->nmb_returnitem = IoAllocateWorkItem(fdo); /* * Stash pointers to the miniport block and miniport * characteristics info in the if_ndis softc so the * UNIX wrapper driver can get to them later. */ sc->ndis_block = block; sc->ndis_chars = IoGetDriverObjectExtension(drv, (void *)1); /* * If the driver has a MiniportTransferData() function, * we should allocate a private RX packet pool. */ if (sc->ndis_chars->nmc_transferdata_func != NULL) { NdisAllocatePacketPool(&status, &block->nmb_rxpool, 32, PROTOCOL_RESERVED_SIZE_IN_PACKET); if (status != NDIS_STATUS_SUCCESS) { IoDetachDevice(block->nmb_nextdeviceobj); IoDeleteDevice(fdo); return (status); } InitializeListHead((&block->nmb_packetlist)); } /* Give interrupt handling priority over timers. */ IoInitializeDpcRequest(fdo, kernndis_functbl[6].ipt_wrap); KeSetImportanceDpc(&fdo->do_dpc, KDPC_IMPORTANCE_HIGH); /* Finish up BSD-specific setup. */ block->nmb_signature = (void *)0xcafebabe; block->nmb_status_func = kernndis_functbl[0].ipt_wrap; block->nmb_statusdone_func = kernndis_functbl[1].ipt_wrap; block->nmb_setdone_func = kernndis_functbl[2].ipt_wrap; block->nmb_querydone_func = kernndis_functbl[3].ipt_wrap; block->nmb_resetdone_func = kernndis_functbl[4].ipt_wrap; block->nmb_sendrsrc_func = kernndis_functbl[5].ipt_wrap; block->nmb_pendingreq = NULL; TAILQ_INSERT_TAIL(&ndis_devhead, block, link); return (STATUS_SUCCESS); } int ndis_unload_driver(arg) void *arg; { struct ndis_softc *sc; device_object *fdo; sc = arg; if (sc->ndis_intrhand) bus_teardown_intr(sc->ndis_dev, sc->ndis_irq, sc->ndis_intrhand); if (sc->ndis_block->nmb_rlist != NULL) free(sc->ndis_block->nmb_rlist, M_DEVBUF); ndis_flush_sysctls(sc); TAILQ_REMOVE(&ndis_devhead, sc->ndis_block, link); if (sc->ndis_chars->nmc_transferdata_func != NULL) NdisFreePacketPool(sc->ndis_block->nmb_rxpool); fdo = sc->ndis_block->nmb_deviceobj; IoFreeWorkItem(sc->ndis_block->nmb_returnitem); IoDetachDevice(sc->ndis_block->nmb_nextdeviceobj); IoDeleteDevice(fdo); return (0); } Index: stable/9/sys/conf =================================================================== --- stable/9/sys/conf (revision 273911) +++ stable/9/sys/conf (revision 273912) Property changes on: stable/9/sys/conf ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/conf:r263710,273377-273378,273423,273455 Index: stable/9/sys/contrib/dev/acpica =================================================================== --- stable/9/sys/contrib/dev/acpica (revision 273911) +++ stable/9/sys/contrib/dev/acpica (revision 273912) Property changes on: stable/9/sys/contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/dev/acpica:r263710,273377-273378,273423,273455 Index: stable/9/sys/contrib/dev/run =================================================================== --- stable/9/sys/contrib/dev/run (revision 273911) +++ stable/9/sys/contrib/dev/run (revision 273912) Property changes on: stable/9/sys/contrib/dev/run ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/dev/run:r263710,273377-273378,273423,273455 Index: stable/9/sys/contrib/octeon-sdk =================================================================== --- stable/9/sys/contrib/octeon-sdk (revision 273911) +++ stable/9/sys/contrib/octeon-sdk (revision 273912) Property changes on: stable/9/sys/contrib/octeon-sdk ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/octeon-sdk:r263710,273377-273378,273423,273455 Index: stable/9/sys/contrib/pf =================================================================== --- stable/9/sys/contrib/pf (revision 273911) +++ stable/9/sys/contrib/pf (revision 273912) Property changes on: stable/9/sys/contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/pf:r263710,273377-273378,273423,273455 Index: stable/9/sys/contrib/x86emu =================================================================== --- stable/9/sys/contrib/x86emu (revision 273911) +++ stable/9/sys/contrib/x86emu (revision 273912) Property changes on: stable/9/sys/contrib/x86emu ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/x86emu:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/acpi_support/acpi_asus.c =================================================================== --- stable/9/sys/dev/acpi_support/acpi_asus.c (revision 273911) +++ stable/9/sys/dev/acpi_support/acpi_asus.c (revision 273912) @@ -1,1280 +1,1285 @@ /*- * Copyright (c) 2004, 2005 Philip Paeps * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for extra ACPI-controlled gadgets (hotkeys, leds, etc) found on * recent Asus (and Medion) laptops. Inspired by the acpi4asus project which * implements these features in the Linux kernel. * * * * Currently should support most features, but could use some more testing. * Particularly the display-switching stuff is a bit hairy. If you have an * Asus laptop which doesn't appear to be supported, or strange things happen * when using this driver, please report to . */ #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include /* Methods */ #define ACPI_ASUS_METHOD_BRN 1 #define ACPI_ASUS_METHOD_DISP 2 #define ACPI_ASUS_METHOD_LCD 3 #define ACPI_ASUS_METHOD_CAMERA 4 #define ACPI_ASUS_METHOD_CARDRD 5 #define ACPI_ASUS_METHOD_WLAN 6 #define _COMPONENT ACPI_OEM ACPI_MODULE_NAME("ASUS") struct acpi_asus_model { char *name; char *bled_set; char *dled_set; char *gled_set; char *mled_set; char *tled_set; char *wled_set; char *brn_get; char *brn_set; char *brn_up; char *brn_dn; char *lcd_get; char *lcd_set; char *disp_get; char *disp_set; char *cam_get; char *cam_set; char *crd_get; char *crd_set; char *wlan_get; char *wlan_set; void (*n_func)(ACPI_HANDLE, UINT32, void *); char *lcdd; void (*lcdd_n_func)(ACPI_HANDLE, UINT32, void *); }; struct acpi_asus_led { struct acpi_asus_softc *sc; struct cdev *cdev; int busy; int state; enum { ACPI_ASUS_LED_BLED, ACPI_ASUS_LED_DLED, ACPI_ASUS_LED_GLED, ACPI_ASUS_LED_MLED, ACPI_ASUS_LED_TLED, ACPI_ASUS_LED_WLED, } type; }; struct acpi_asus_softc { device_t dev; ACPI_HANDLE handle; ACPI_HANDLE lcdd_handle; struct acpi_asus_model *model; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct acpi_asus_led s_bled; struct acpi_asus_led s_dled; struct acpi_asus_led s_gled; struct acpi_asus_led s_mled; struct acpi_asus_led s_tled; struct acpi_asus_led s_wled; int s_brn; int s_disp; int s_lcd; int s_cam; int s_crd; int s_wlan; }; static void acpi_asus_lcdd_notify(ACPI_HANDLE h, UINT32 notify, void *context); /* * We can identify Asus laptops from the string they return * as a result of calling the ATK0100 'INIT' method. */ static struct acpi_asus_model acpi_asus_models[] = { { .name = "xxN", .mled_set = "MLED", .wled_set = "WLED", .lcd_get = "\\BKLT", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\ADVG", .disp_set = "SDSP" }, { .name = "A1x", .mled_set = "MLED", .lcd_get = "\\BKLI", .lcd_set = "\\_SB.PCI0.ISA.EC0._Q10", .brn_up = "\\_SB.PCI0.ISA.EC0._Q0E", .brn_dn = "\\_SB.PCI0.ISA.EC0._Q0F" }, { .name = "A2x", .mled_set = "MLED", .wled_set = "WLED", .lcd_get = "\\BAOF", .lcd_set = "\\Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\INFB", .disp_set = "SDSP" }, { .name = "A3E", .mled_set = "MLED", .wled_set = "WLED", .lcd_get = "\\_SB.PCI0.SBRG.EC0.RPIN(0x67)", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\_SB.PCI0.P0P2.VGA.GETD", .disp_set = "SDSP" }, { .name = "A3F", .mled_set = "MLED", .wled_set = "WLED", .bled_set = "BLED", .lcd_get = "\\_SB.PCI0.SBRG.EC0.RPIN(0x11)", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\SSTE", .disp_set = "SDSP" }, { .name = "A3N", .mled_set = "MLED", .bled_set = "BLED", .wled_set = "WLED", .lcd_get = "\\BKLT", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\_SB.PCI0.P0P3.VGA.GETD", .disp_set = "SDSP" }, { .name = "A4D", .mled_set = "MLED", .brn_up = "\\_SB_.PCI0.SBRG.EC0._Q0E", .brn_dn = "\\_SB_.PCI0.SBRG.EC0._Q0F", .brn_get = "GPLV", .brn_set = "SPLV", #ifdef notyet .disp_get = "\\_SB_.PCI0.SBRG.EC0._Q10", .disp_set = "\\_SB_.PCI0.SBRG.EC0._Q11" #endif }, { .name = "A6V", .bled_set = "BLED", .mled_set = "MLED", .wled_set = "WLED", .lcd_get = NULL, .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\_SB.PCI0.P0P3.VGA.GETD", .disp_set = "SDSP" }, { .name = "A8SR", .bled_set = "BLED", .mled_set = "MLED", .wled_set = "WLED", .lcd_get = NULL, .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\_SB.PCI0.P0P1.VGA.GETD", .disp_set = "SDSP", .lcdd = "\\_SB.PCI0.P0P1.VGA.LCDD", .lcdd_n_func = acpi_asus_lcdd_notify }, { .name = "D1x", .mled_set = "MLED", .lcd_get = "\\GP11", .lcd_set = "\\Q0D", .brn_up = "\\Q0C", .brn_dn = "\\Q0B", .disp_get = "\\INFB", .disp_set = "SDSP" }, { .name = "G2K", .bled_set = "BLED", .dled_set = "DLED", .gled_set = "GLED", .mled_set = "MLED", .tled_set = "TLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "GBTL", .lcd_set = "SBTL", .disp_get = "\\_SB.PCI0.PCE2.VGA.GETD", .disp_set = "SDSP", }, { .name = "L2D", .mled_set = "MLED", .wled_set = "WLED", .brn_up = "\\Q0E", .brn_dn = "\\Q0F", .lcd_get = "\\SGP0", .lcd_set = "\\Q10" }, { .name = "L3C", .mled_set = "MLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "\\GL32", .lcd_set = "\\_SB.PCI0.PX40.ECD0._Q10" }, { .name = "L3D", .mled_set = "MLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "\\BKLG", .lcd_set = "\\Q10" }, { .name = "L3H", .mled_set = "MLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "\\_SB.PCI0.PM.PBC", .lcd_set = "EHK", .disp_get = "\\_SB.INFB", .disp_set = "SDSP" }, { .name = "L4R", .mled_set = "MLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "\\_SB.PCI0.SBSM.SEO4", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .disp_get = "\\_SB.PCI0.P0P1.VGA.GETD", .disp_set = "SDSP" }, { .name = "L5x", .mled_set = "MLED", .tled_set = "TLED", .lcd_get = "\\BAOF", .lcd_set = "\\Q0D", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\INFB", .disp_set = "SDSP" }, { .name = "L8L" /* Only has hotkeys, apparently */ }, { .name = "M1A", .mled_set = "MLED", .brn_up = "\\_SB.PCI0.PX40.EC0.Q0E", .brn_dn = "\\_SB.PCI0.PX40.EC0.Q0F", .lcd_get = "\\PNOF", .lcd_set = "\\_SB.PCI0.PX40.EC0.Q10" }, { .name = "M2E", .mled_set = "MLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "\\GP06", .lcd_set = "\\Q10" }, { .name = "M6N", .mled_set = "MLED", .wled_set = "WLED", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .lcd_get = "\\_SB.BKLT", .brn_set = "SPLV", .brn_get = "GPLV", .disp_set = "SDSP", .disp_get = "\\SSTE" }, { .name = "M6R", .mled_set = "MLED", .wled_set = "WLED", .brn_get = "GPLV", .brn_set = "SPLV", .lcd_get = "\\_SB.PCI0.SBSM.SEO4", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .disp_get = "\\SSTE", .disp_set = "SDSP" }, { .name = "S1x", .mled_set = "MLED", .wled_set = "WLED", .lcd_get = "\\PNOF", .lcd_set = "\\_SB.PCI0.PX40.Q10", .brn_get = "GPLV", .brn_set = "SPLV" }, { .name = "S2x", .mled_set = "MLED", .lcd_get = "\\BKLI", .lcd_set = "\\_SB.PCI0.ISA.EC0._Q10", .brn_up = "\\_SB.PCI0.ISA.EC0._Q0B", .brn_dn = "\\_SB.PCI0.ISA.EC0._Q0A" }, { .name = "V6V", .bled_set = "BLED", .tled_set = "TLED", .wled_set = "WLED", .lcd_get = "\\BKLT", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\_SB.PCI0.P0P1.VGA.GETD", .disp_set = "SDSP" }, { .name = "W5A", .bled_set = "BLED", .lcd_get = "\\BKLT", .lcd_set = "\\_SB.PCI0.SBRG.EC0._Q10", .brn_get = "GPLV", .brn_set = "SPLV", .disp_get = "\\_SB.PCI0.P0P2.VGA.GETD", .disp_set = "SDSP" }, { .name = NULL } }; /* * Samsung P30/P35 laptops have an Asus ATK0100 gadget interface, * but they can't be probed quite the same way as Asus laptops. */ static struct acpi_asus_model acpi_samsung_models[] = { { .name = "P30", .wled_set = "WLED", .brn_up = "\\_SB.PCI0.LPCB.EC0._Q68", .brn_dn = "\\_SB.PCI0.LPCB.EC0._Q69", .lcd_get = "\\BKLT", .lcd_set = "\\_SB.PCI0.LPCB.EC0._Q0E" }, { .name = NULL } }; static void acpi_asus_eeepc_notify(ACPI_HANDLE h, UINT32 notify, void *context); /* * EeePC have an Asus ASUS010 gadget interface, * but they can't be probed quite the same way as Asus laptops. */ static struct acpi_asus_model acpi_eeepc_models[] = { { .name = "EEE", .brn_get = "\\_SB.ATKD.PBLG", .brn_set = "\\_SB.ATKD.PBLS", .cam_get = "\\_SB.ATKD.CAMG", .cam_set = "\\_SB.ATKD.CAMS", .crd_set = "\\_SB.ATKD.CRDS", .crd_get = "\\_SB.ATKD.CRDG", .wlan_get = "\\_SB.ATKD.WLDG", .wlan_set = "\\_SB.ATKD.WLDS", .n_func = acpi_asus_eeepc_notify }, { .name = NULL } }; static struct { char *name; char *description; int method; - int flags; + int flag_anybody; } acpi_asus_sysctls[] = { { .name = "lcd_backlight", .method = ACPI_ASUS_METHOD_LCD, .description = "state of the lcd backlight", - .flags = CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY + .flag_anybody = 1 }, { .name = "lcd_brightness", .method = ACPI_ASUS_METHOD_BRN, .description = "brightness of the lcd panel", - .flags = CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY + .flag_anybody = 1 }, { .name = "video_output", .method = ACPI_ASUS_METHOD_DISP, .description = "display output state", - .flags = CTLTYPE_INT | CTLFLAG_RW }, { .name = "camera", .method = ACPI_ASUS_METHOD_CAMERA, .description = "internal camera state", - .flags = CTLTYPE_INT | CTLFLAG_RW }, { .name = "cardreader", .method = ACPI_ASUS_METHOD_CARDRD, .description = "internal card reader state", - .flags = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wlan", .method = ACPI_ASUS_METHOD_WLAN, .description = "wireless lan state", - .flags = CTLTYPE_INT | CTLFLAG_RW }, { .name = NULL } }; ACPI_SERIAL_DECL(asus, "ACPI ASUS extras"); /* Function prototypes */ static int acpi_asus_probe(device_t dev); static int acpi_asus_attach(device_t dev); static int acpi_asus_detach(device_t dev); static void acpi_asus_led(struct acpi_asus_led *led, int state); static void acpi_asus_led_task(struct acpi_asus_led *led, int pending __unused); static int acpi_asus_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_asus_sysctl_init(struct acpi_asus_softc *sc, int method); static int acpi_asus_sysctl_get(struct acpi_asus_softc *sc, int method); static int acpi_asus_sysctl_set(struct acpi_asus_softc *sc, int method, int val); static void acpi_asus_notify(ACPI_HANDLE h, UINT32 notify, void *context); static device_method_t acpi_asus_methods[] = { DEVMETHOD(device_probe, acpi_asus_probe), DEVMETHOD(device_attach, acpi_asus_attach), DEVMETHOD(device_detach, acpi_asus_detach), { 0, 0 } }; static driver_t acpi_asus_driver = { "acpi_asus", acpi_asus_methods, sizeof(struct acpi_asus_softc) }; static devclass_t acpi_asus_devclass; DRIVER_MODULE(acpi_asus, acpi, acpi_asus_driver, acpi_asus_devclass, 0, 0); MODULE_DEPEND(acpi_asus, acpi, 1, 1, 1); static int acpi_asus_probe(device_t dev) { struct acpi_asus_model *model; struct acpi_asus_softc *sc; struct sbuf *sb; ACPI_BUFFER Buf; ACPI_OBJECT Arg, *Obj; ACPI_OBJECT_LIST Args; static char *asus_ids[] = { "ATK0100", "ASUS010", NULL }; char *rstr; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); if (acpi_disabled("asus")) return (ENXIO); rstr = ACPI_ID_PROBE(device_get_parent(dev), dev, asus_ids); if (rstr == NULL) { return (ENXIO); } sc = device_get_softc(dev); sc->dev = dev; sc->handle = acpi_get_handle(dev); Arg.Type = ACPI_TYPE_INTEGER; Arg.Integer.Value = 0; Args.Count = 1; Args.Pointer = &Arg; Buf.Pointer = NULL; Buf.Length = ACPI_ALLOCATE_BUFFER; AcpiEvaluateObject(sc->handle, "INIT", &Args, &Buf); Obj = Buf.Pointer; /* * The Samsung P30 returns a null-pointer from INIT, we * can identify it from the 'ODEM' string in the DSDT. */ if (Obj->String.Pointer == NULL) { ACPI_STATUS status; ACPI_TABLE_HEADER th; status = AcpiGetTableHeader(ACPI_SIG_DSDT, 0, &th); if (ACPI_FAILURE(status)) { device_printf(dev, "Unsupported (Samsung?) laptop\n"); AcpiOsFree(Buf.Pointer); return (ENXIO); } if (strncmp("ODEM", th.OemTableId, 4) == 0) { sc->model = &acpi_samsung_models[0]; device_set_desc(dev, "Samsung P30 Laptop Extras"); AcpiOsFree(Buf.Pointer); return (0); } /* EeePC */ if (strncmp("ASUS010", rstr, 7) == 0) { sc->model = &acpi_eeepc_models[0]; device_set_desc(dev, "ASUS EeePC"); AcpiOsFree(Buf.Pointer); return (0); } } sb = sbuf_new_auto(); if (sb == NULL) return (ENOMEM); /* * Asus laptops are simply identified by name, easy! */ for (model = acpi_asus_models; model->name != NULL; model++) { if (strncmp(Obj->String.Pointer, model->name, 3) == 0) { good: sbuf_printf(sb, "Asus %s Laptop Extras", Obj->String.Pointer); sbuf_finish(sb); sc->model = model; device_set_desc_copy(dev, sbuf_data(sb)); sbuf_delete(sb); AcpiOsFree(Buf.Pointer); return (0); } /* * Some models look exactly the same as other models, but have * their own ids. If we spot these, set them up with the same * details as the models they're like, possibly dealing with * small differences. * * XXX: there must be a prettier way to do this! */ else if (strncmp(model->name, "xxN", 3) == 0 && (strncmp(Obj->String.Pointer, "M3N", 3) == 0 || strncmp(Obj->String.Pointer, "S1N", 3) == 0)) goto good; else if (strncmp(model->name, "A1x", 3) == 0 && strncmp(Obj->String.Pointer, "A1", 2) == 0) goto good; else if (strncmp(model->name, "A2x", 3) == 0 && strncmp(Obj->String.Pointer, "A2", 2) == 0) goto good; else if (strncmp(model->name, "A3F", 3) == 0 && strncmp(Obj->String.Pointer, "A6F", 3) == 0) goto good; else if (strncmp(model->name, "D1x", 3) == 0 && strncmp(Obj->String.Pointer, "D1", 2) == 0) goto good; else if (strncmp(model->name, "L3H", 3) == 0 && strncmp(Obj->String.Pointer, "L2E", 3) == 0) goto good; else if (strncmp(model->name, "L5x", 3) == 0 && strncmp(Obj->String.Pointer, "L5", 2) == 0) goto good; else if (strncmp(model->name, "M2E", 3) == 0 && (strncmp(Obj->String.Pointer, "M2", 2) == 0 || strncmp(Obj->String.Pointer, "L4E", 3) == 0)) goto good; else if (strncmp(model->name, "S1x", 3) == 0 && (strncmp(Obj->String.Pointer, "L8", 2) == 0 || strncmp(Obj->String.Pointer, "S1", 2) == 0)) goto good; else if (strncmp(model->name, "S2x", 3) == 0 && (strncmp(Obj->String.Pointer, "J1", 2) == 0 || strncmp(Obj->String.Pointer, "S2", 2) == 0)) goto good; /* L2B is like L3C but has no lcd_get method */ else if (strncmp(model->name, "L3C", 3) == 0 && strncmp(Obj->String.Pointer, "L2B", 3) == 0) { model->lcd_get = NULL; goto good; } /* A3G is like M6R but with a different lcd_get method */ else if (strncmp(model->name, "M6R", 3) == 0 && strncmp(Obj->String.Pointer, "A3G", 3) == 0) { model->lcd_get = "\\BLFG"; goto good; } /* M2N and W1N are like xxN with added WLED */ else if (strncmp(model->name, "xxN", 3) == 0 && (strncmp(Obj->String.Pointer, "M2N", 3) == 0 || strncmp(Obj->String.Pointer, "W1N", 3) == 0)) { model->wled_set = "WLED"; goto good; } /* M5N and S5N are like xxN without MLED */ else if (strncmp(model->name, "xxN", 3) == 0 && (strncmp(Obj->String.Pointer, "M5N", 3) == 0 || strncmp(Obj->String.Pointer, "S5N", 3) == 0)) { model->mled_set = NULL; goto good; } } sbuf_printf(sb, "Unsupported Asus laptop: %s\n", Obj->String.Pointer); sbuf_finish(sb); device_printf(dev, "%s", sbuf_data(sb)); sbuf_delete(sb); AcpiOsFree(Buf.Pointer); return (ENXIO); } static int acpi_asus_attach(device_t dev) { struct acpi_asus_softc *sc; struct acpi_softc *acpi_sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc(dev); acpi_sc = acpi_device_get_parent_softc(dev); /* Build sysctl tree */ sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_CHILDREN(acpi_sc->acpi_sysctl_tree), OID_AUTO, "asus", CTLFLAG_RD, 0, ""); /* Hook up nodes */ for (int i = 0; acpi_asus_sysctls[i].name != NULL; i++) { if (!acpi_asus_sysctl_init(sc, acpi_asus_sysctls[i].method)) continue; - SYSCTL_ADD_PROC(&sc->sysctl_ctx, - SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, - acpi_asus_sysctls[i].name, - acpi_asus_sysctls[i].flags, - sc, i, acpi_asus_sysctl, "I", - acpi_asus_sysctls[i].description); + if (acpi_asus_sysctls[i].flag_anybody != 0) { + SYSCTL_ADD_PROC(&sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, + acpi_asus_sysctls[i].name, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, + sc, i, acpi_asus_sysctl, "I", + acpi_asus_sysctls[i].description); + } else { + SYSCTL_ADD_PROC(&sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, + acpi_asus_sysctls[i].name, + CTLTYPE_INT | CTLFLAG_RW, + sc, i, acpi_asus_sysctl, "I", + acpi_asus_sysctls[i].description); + } } /* Attach leds */ if (sc->model->bled_set) { sc->s_bled.busy = 0; sc->s_bled.sc = sc; sc->s_bled.type = ACPI_ASUS_LED_BLED; sc->s_bled.cdev = led_create_state((led_t *)acpi_asus_led, &sc->s_bled, "bled", 1); } if (sc->model->dled_set) { sc->s_dled.busy = 0; sc->s_dled.sc = sc; sc->s_dled.type = ACPI_ASUS_LED_DLED; sc->s_dled.cdev = led_create((led_t *)acpi_asus_led, &sc->s_dled, "dled"); } if (sc->model->gled_set) { sc->s_gled.busy = 0; sc->s_gled.sc = sc; sc->s_gled.type = ACPI_ASUS_LED_GLED; sc->s_gled.cdev = led_create((led_t *)acpi_asus_led, &sc->s_gled, "gled"); } if (sc->model->mled_set) { sc->s_mled.busy = 0; sc->s_mled.sc = sc; sc->s_mled.type = ACPI_ASUS_LED_MLED; sc->s_mled.cdev = led_create((led_t *)acpi_asus_led, &sc->s_mled, "mled"); } if (sc->model->tled_set) { sc->s_tled.busy = 0; sc->s_tled.sc = sc; sc->s_tled.type = ACPI_ASUS_LED_TLED; sc->s_tled.cdev = led_create_state((led_t *)acpi_asus_led, &sc->s_tled, "tled", 1); } if (sc->model->wled_set) { sc->s_wled.busy = 0; sc->s_wled.sc = sc; sc->s_wled.type = ACPI_ASUS_LED_WLED; sc->s_wled.cdev = led_create_state((led_t *)acpi_asus_led, &sc->s_wled, "wled", 1); } /* Activate hotkeys */ AcpiEvaluateObject(sc->handle, "BSTS", NULL, NULL); /* Handle notifies */ if (sc->model->n_func == NULL) sc->model->n_func = acpi_asus_notify; AcpiInstallNotifyHandler(sc->handle, ACPI_SYSTEM_NOTIFY, sc->model->n_func, dev); /* Find and hook the 'LCDD' object */ if (sc->model->lcdd != NULL && sc->model->lcdd_n_func != NULL) { ACPI_STATUS res; sc->lcdd_handle = NULL; res = AcpiGetHandle((sc->model->lcdd[0] == '\\' ? NULL : sc->handle), sc->model->lcdd, &(sc->lcdd_handle)); if (ACPI_SUCCESS(res)) { AcpiInstallNotifyHandler((sc->lcdd_handle), ACPI_DEVICE_NOTIFY, sc->model->lcdd_n_func, dev); } else { printf("%s: unable to find LCD device '%s'\n", __func__, sc->model->lcdd); } } return (0); } static int acpi_asus_detach(device_t dev) { struct acpi_asus_softc *sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc(dev); /* Turn the lights off */ if (sc->model->bled_set) led_destroy(sc->s_bled.cdev); if (sc->model->dled_set) led_destroy(sc->s_dled.cdev); if (sc->model->gled_set) led_destroy(sc->s_gled.cdev); if (sc->model->mled_set) led_destroy(sc->s_mled.cdev); if (sc->model->tled_set) led_destroy(sc->s_tled.cdev); if (sc->model->wled_set) led_destroy(sc->s_wled.cdev); /* Remove notify handler */ AcpiRemoveNotifyHandler(sc->handle, ACPI_SYSTEM_NOTIFY, acpi_asus_notify); if (sc->lcdd_handle) { KASSERT(sc->model->lcdd_n_func != NULL, ("model->lcdd_n_func is NULL, but lcdd_handle is non-zero")); AcpiRemoveNotifyHandler((sc->lcdd_handle), ACPI_DEVICE_NOTIFY, sc->model->lcdd_n_func); } /* Free sysctl tree */ sysctl_ctx_free(&sc->sysctl_ctx); return (0); } static void acpi_asus_led_task(struct acpi_asus_led *led, int pending __unused) { struct acpi_asus_softc *sc; char *method; int state; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = led->sc; switch (led->type) { case ACPI_ASUS_LED_BLED: method = sc->model->bled_set; state = led->state; break; case ACPI_ASUS_LED_DLED: method = sc->model->dled_set; state = led->state; break; case ACPI_ASUS_LED_GLED: method = sc->model->gled_set; state = led->state + 1; /* 1: off, 2: on */ break; case ACPI_ASUS_LED_MLED: method = sc->model->mled_set; state = !led->state; /* inverted */ break; case ACPI_ASUS_LED_TLED: method = sc->model->tled_set; state = led->state; break; case ACPI_ASUS_LED_WLED: method = sc->model->wled_set; state = led->state; break; default: printf("acpi_asus_led: invalid LED type %d\n", (int)led->type); return; } acpi_SetInteger(sc->handle, method, state); led->busy = 0; } static void acpi_asus_led(struct acpi_asus_led *led, int state) { ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); if (led->busy) return; led->busy = 1; led->state = state; AcpiOsExecute(OSL_NOTIFY_HANDLER, (void *)acpi_asus_led_task, led); } static int acpi_asus_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_asus_softc *sc; int arg; int error = 0; int function; int method; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_asus_softc *)oidp->oid_arg1; function = oidp->oid_arg2; method = acpi_asus_sysctls[function].method; ACPI_SERIAL_BEGIN(asus); arg = acpi_asus_sysctl_get(sc, method); error = sysctl_handle_int(oidp, &arg, 0, req); /* Sanity check */ if (error != 0 || req->newptr == NULL) goto out; /* Update */ error = acpi_asus_sysctl_set(sc, method, arg); out: ACPI_SERIAL_END(asus); return (error); } static int acpi_asus_sysctl_get(struct acpi_asus_softc *sc, int method) { int val = 0; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(asus); switch (method) { case ACPI_ASUS_METHOD_BRN: val = sc->s_brn; break; case ACPI_ASUS_METHOD_DISP: val = sc->s_disp; break; case ACPI_ASUS_METHOD_LCD: val = sc->s_lcd; break; case ACPI_ASUS_METHOD_CAMERA: val = sc->s_cam; break; case ACPI_ASUS_METHOD_CARDRD: val = sc->s_crd; break; case ACPI_ASUS_METHOD_WLAN: val = sc->s_wlan; break; } return (val); } static int acpi_asus_sysctl_set(struct acpi_asus_softc *sc, int method, int arg) { ACPI_STATUS status = AE_OK; ACPI_OBJECT_LIST acpiargs; ACPI_OBJECT acpiarg[1]; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(asus); acpiargs.Count = 1; acpiargs.Pointer = acpiarg; acpiarg[0].Type = ACPI_TYPE_INTEGER; acpiarg[0].Integer.Value = arg; switch (method) { case ACPI_ASUS_METHOD_BRN: if (arg < 0 || arg > 15) return (EINVAL); if (sc->model->brn_set) status = acpi_SetInteger(sc->handle, sc->model->brn_set, arg); else { while (arg != 0) { status = AcpiEvaluateObject(sc->handle, (arg > 0) ? sc->model->brn_up : sc->model->brn_dn, NULL, NULL); (arg > 0) ? arg-- : arg++; } } if (ACPI_SUCCESS(status)) sc->s_brn = arg; break; case ACPI_ASUS_METHOD_DISP: if (arg < 0 || arg > 7) return (EINVAL); status = acpi_SetInteger(sc->handle, sc->model->disp_set, arg); if (ACPI_SUCCESS(status)) sc->s_disp = arg; break; case ACPI_ASUS_METHOD_LCD: if (arg < 0 || arg > 1) return (EINVAL); if (strncmp(sc->model->name, "L3H", 3) != 0) status = AcpiEvaluateObject(sc->handle, sc->model->lcd_set, NULL, NULL); else status = acpi_SetInteger(sc->handle, sc->model->lcd_set, 0x7); if (ACPI_SUCCESS(status)) sc->s_lcd = arg; break; case ACPI_ASUS_METHOD_CAMERA: if (arg < 0 || arg > 1) return (EINVAL); status = AcpiEvaluateObject(sc->handle, sc->model->cam_set, &acpiargs, NULL); if (ACPI_SUCCESS(status)) sc->s_cam = arg; break; case ACPI_ASUS_METHOD_CARDRD: if (arg < 0 || arg > 1) return (EINVAL); status = AcpiEvaluateObject(sc->handle, sc->model->crd_set, &acpiargs, NULL); if (ACPI_SUCCESS(status)) sc->s_crd = arg; break; case ACPI_ASUS_METHOD_WLAN: if (arg < 0 || arg > 1) return (EINVAL); status = AcpiEvaluateObject(sc->handle, sc->model->wlan_set, &acpiargs, NULL); if (ACPI_SUCCESS(status)) sc->s_wlan = arg; break; } return (0); } static int acpi_asus_sysctl_init(struct acpi_asus_softc *sc, int method) { ACPI_STATUS status; switch (method) { case ACPI_ASUS_METHOD_BRN: if (sc->model->brn_get) { /* GPLV/SPLV models */ status = acpi_GetInteger(sc->handle, sc->model->brn_get, &sc->s_brn); if (ACPI_SUCCESS(status)) return (TRUE); } else if (sc->model->brn_up) { /* Relative models */ status = AcpiEvaluateObject(sc->handle, sc->model->brn_up, NULL, NULL); if (ACPI_FAILURE(status)) return (FALSE); status = AcpiEvaluateObject(sc->handle, sc->model->brn_dn, NULL, NULL); if (ACPI_FAILURE(status)) return (FALSE); return (TRUE); } return (FALSE); case ACPI_ASUS_METHOD_DISP: if (sc->model->disp_get) { status = acpi_GetInteger(sc->handle, sc->model->disp_get, &sc->s_disp); if (ACPI_SUCCESS(status)) return (TRUE); } return (FALSE); case ACPI_ASUS_METHOD_LCD: if (sc->model->lcd_get) { if (strncmp(sc->model->name, "L3H", 3) == 0) { ACPI_BUFFER Buf; ACPI_OBJECT Arg[2], Obj; ACPI_OBJECT_LIST Args; /* L3H is a bit special */ Arg[0].Type = ACPI_TYPE_INTEGER; Arg[0].Integer.Value = 0x02; Arg[1].Type = ACPI_TYPE_INTEGER; Arg[1].Integer.Value = 0x03; Args.Count = 2; Args.Pointer = Arg; Buf.Length = sizeof(Obj); Buf.Pointer = &Obj; status = AcpiEvaluateObject(sc->handle, sc->model->lcd_get, &Args, &Buf); if (ACPI_SUCCESS(status) && Obj.Type == ACPI_TYPE_INTEGER) { sc->s_lcd = Obj.Integer.Value >> 8; return (TRUE); } } else { status = acpi_GetInteger(sc->handle, sc->model->lcd_get, &sc->s_lcd); if (ACPI_SUCCESS(status)) return (TRUE); } } return (FALSE); case ACPI_ASUS_METHOD_CAMERA: if (sc->model->cam_get) { status = acpi_GetInteger(sc->handle, sc->model->cam_get, &sc->s_cam); if (ACPI_SUCCESS(status)) return (TRUE); } return (FALSE); case ACPI_ASUS_METHOD_CARDRD: if (sc->model->crd_get) { status = acpi_GetInteger(sc->handle, sc->model->crd_get, &sc->s_crd); if (ACPI_SUCCESS(status)) return (TRUE); } return (FALSE); case ACPI_ASUS_METHOD_WLAN: if (sc->model->wlan_get) { status = acpi_GetInteger(sc->handle, sc->model->wlan_get, &sc->s_wlan); if (ACPI_SUCCESS(status)) return (TRUE); } return (FALSE); } return (FALSE); } static void acpi_asus_notify(ACPI_HANDLE h, UINT32 notify, void *context) { struct acpi_asus_softc *sc; struct acpi_softc *acpi_sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc((device_t)context); acpi_sc = acpi_device_get_parent_softc(sc->dev); ACPI_SERIAL_BEGIN(asus); if ((notify & ~0x10) <= 15) { sc->s_brn = notify & ~0x10; ACPI_VPRINT(sc->dev, acpi_sc, "Brightness increased\n"); } else if ((notify & ~0x20) <= 15) { sc->s_brn = notify & ~0x20; ACPI_VPRINT(sc->dev, acpi_sc, "Brightness decreased\n"); } else if (notify == 0x33) { sc->s_lcd = 1; ACPI_VPRINT(sc->dev, acpi_sc, "LCD turned on\n"); } else if (notify == 0x34) { sc->s_lcd = 0; ACPI_VPRINT(sc->dev, acpi_sc, "LCD turned off\n"); } else if (notify == 0x86) { acpi_asus_sysctl_set(sc, ACPI_ASUS_METHOD_BRN, sc->s_brn-1); ACPI_VPRINT(sc->dev, acpi_sc, "Brightness decreased\n"); } else if (notify == 0x87) { acpi_asus_sysctl_set(sc, ACPI_ASUS_METHOD_BRN, sc->s_brn+1); ACPI_VPRINT(sc->dev, acpi_sc, "Brightness increased\n"); } else { /* Notify devd(8) */ acpi_UserNotify("ASUS", h, notify); } ACPI_SERIAL_END(asus); } static void acpi_asus_lcdd_notify(ACPI_HANDLE h, UINT32 notify, void *context) { struct acpi_asus_softc *sc; struct acpi_softc *acpi_sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc((device_t)context); acpi_sc = acpi_device_get_parent_softc(sc->dev); ACPI_SERIAL_BEGIN(asus); switch (notify) { case 0x87: acpi_asus_sysctl_set(sc, ACPI_ASUS_METHOD_BRN, sc->s_brn-1); ACPI_VPRINT(sc->dev, acpi_sc, "Brightness decreased\n"); break; case 0x86: acpi_asus_sysctl_set(sc, ACPI_ASUS_METHOD_BRN, sc->s_brn+1); ACPI_VPRINT(sc->dev, acpi_sc, "Brightness increased\n"); break; } ACPI_SERIAL_END(asus); } static void acpi_asus_eeepc_notify(ACPI_HANDLE h, UINT32 notify, void *context) { struct acpi_asus_softc *sc; struct acpi_softc *acpi_sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc((device_t)context); acpi_sc = acpi_device_get_parent_softc(sc->dev); ACPI_SERIAL_BEGIN(asus); if ((notify & ~0x20) <= 15) { sc->s_brn = notify & ~0x20; ACPI_VPRINT(sc->dev, acpi_sc, "Brightness increased/decreased\n"); } else { /* Notify devd(8) */ acpi_UserNotify("ASUS-Eee", h, notify); } ACPI_SERIAL_END(asus); } Index: stable/9/sys/dev/acpi_support/acpi_hp.c =================================================================== --- stable/9/sys/dev/acpi_support/acpi_hp.c (revision 273911) +++ stable/9/sys/dev/acpi_support/acpi_hp.c (revision 273912) @@ -1,1217 +1,1213 @@ /*- * Copyright (c) 2009 Michael Gmelin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for extra ACPI-controlled features found on HP laptops * that use a WMI enabled BIOS (e.g. HP Compaq 8510p and 6510p). * Allows to control and read status of integrated hardware and read * BIOS settings through CMI. * Inspired by the hp-wmi driver, which implements a subset of these * features (hotkeys) on Linux. * * HP CMI whitepaper: * http://h20331.www2.hp.com/Hpsub/downloads/cmi_whitepaper.pdf * wmi-hp for Linux: * http://www.kernel.org * WMI and ACPI: * http://www.microsoft.com/whdc/system/pnppwr/wmi/wmi-acpi.mspx */ #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_wmi_if.h" #define _COMPONENT ACPI_OEM ACPI_MODULE_NAME("HP") #define ACPI_HP_WMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" #define ACPI_HP_WMI_BIOS_GUID "5FB7F034-2C63-45E9-BE91-3D44E2C707E4" #define ACPI_HP_WMI_CMI_GUID "2D114B49-2DFB-4130-B8FE-4A3C09E75133" #define ACPI_HP_WMI_DISPLAY_COMMAND 0x1 #define ACPI_HP_WMI_HDDTEMP_COMMAND 0x2 #define ACPI_HP_WMI_ALS_COMMAND 0x3 #define ACPI_HP_WMI_DOCK_COMMAND 0x4 #define ACPI_HP_WMI_WIRELESS_COMMAND 0x5 #define ACPI_HP_METHOD_WLAN_ENABLED 1 #define ACPI_HP_METHOD_WLAN_RADIO 2 #define ACPI_HP_METHOD_WLAN_ON_AIR 3 #define ACPI_HP_METHOD_WLAN_ENABLE_IF_RADIO_ON 4 #define ACPI_HP_METHOD_WLAN_DISABLE_IF_RADIO_OFF 5 #define ACPI_HP_METHOD_BLUETOOTH_ENABLED 6 #define ACPI_HP_METHOD_BLUETOOTH_RADIO 7 #define ACPI_HP_METHOD_BLUETOOTH_ON_AIR 8 #define ACPI_HP_METHOD_BLUETOOTH_ENABLE_IF_RADIO_ON 9 #define ACPI_HP_METHOD_BLUETOOTH_DISABLE_IF_RADIO_OFF 10 #define ACPI_HP_METHOD_WWAN_ENABLED 11 #define ACPI_HP_METHOD_WWAN_RADIO 12 #define ACPI_HP_METHOD_WWAN_ON_AIR 13 #define ACPI_HP_METHOD_WWAN_ENABLE_IF_RADIO_ON 14 #define ACPI_HP_METHOD_WWAN_DISABLE_IF_RADIO_OFF 15 #define ACPI_HP_METHOD_ALS 16 #define ACPI_HP_METHOD_DISPLAY 17 #define ACPI_HP_METHOD_HDDTEMP 18 #define ACPI_HP_METHOD_DOCK 19 #define ACPI_HP_METHOD_CMI_DETAIL 20 #define ACPI_HP_METHOD_VERBOSE 21 #define HP_MASK_WWAN_ON_AIR 0x1000000 #define HP_MASK_BLUETOOTH_ON_AIR 0x10000 #define HP_MASK_WLAN_ON_AIR 0x100 #define HP_MASK_WWAN_RADIO 0x8000000 #define HP_MASK_BLUETOOTH_RADIO 0x80000 #define HP_MASK_WLAN_RADIO 0x800 #define HP_MASK_WWAN_ENABLED 0x2000000 #define HP_MASK_BLUETOOTH_ENABLED 0x20000 #define HP_MASK_WLAN_ENABLED 0x200 #define ACPI_HP_CMI_DETAIL_PATHS 0x01 #define ACPI_HP_CMI_DETAIL_ENUMS 0x02 #define ACPI_HP_CMI_DETAIL_FLAGS 0x04 #define ACPI_HP_CMI_DETAIL_SHOW_MAX_INSTANCE 0x08 struct acpi_hp_inst_seq_pair { UINT32 sequence; /* sequence number as suggested by cmi bios */ UINT8 instance; /* object instance on guid */ }; struct acpi_hp_softc { device_t dev; device_t wmi_dev; int has_notify; /* notification GUID found */ int has_cmi; /* CMI GUID found */ int cmi_detail; /* CMI detail level (set by sysctl) */ int verbose; /* add debug output */ int wlan_enable_if_radio_on; /* set by sysctl */ int wlan_disable_if_radio_off; /* set by sysctl */ int bluetooth_enable_if_radio_on; /* set by sysctl */ int bluetooth_disable_if_radio_off; /* set by sysctl */ int wwan_enable_if_radio_on; /* set by sysctl */ int wwan_disable_if_radio_off; /* set by sysctl */ int was_wlan_on_air; /* last known WLAN on air status */ int was_bluetooth_on_air; /* last known BT on air status */ int was_wwan_on_air; /* last known WWAN on air status */ struct sysctl_ctx_list *sysctl_ctx; struct sysctl_oid *sysctl_tree; struct cdev *hpcmi_dev_t; /* hpcmi device handle */ struct sbuf hpcmi_sbuf; /* /dev/hpcmi output sbuf */ pid_t hpcmi_open_pid; /* pid operating on /dev/hpcmi */ int hpcmi_bufptr; /* current pointer position in /dev/hpcmi output buffer */ int cmi_order_size; /* size of cmi_order list */ struct acpi_hp_inst_seq_pair cmi_order[128]; /* list of CMI instances ordered by BIOS suggested sequence */ }; static struct { char *name; int method; char *description; - int access; + int flag_rdonly; } acpi_hp_sysctls[] = { { .name = "wlan_enabled", .method = ACPI_HP_METHOD_WLAN_ENABLED, .description = "Enable/Disable WLAN (WiFi)", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wlan_radio", .method = ACPI_HP_METHOD_WLAN_RADIO, .description = "WLAN radio status", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "wlan_on_air", .method = ACPI_HP_METHOD_WLAN_ON_AIR, .description = "WLAN radio ready to use (enabled and radio)", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "wlan_enable_if_radio_on", .method = ACPI_HP_METHOD_WLAN_ENABLE_IF_RADIO_ON, .description = "Enable WLAN if radio is turned on", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wlan_disable_if_radio_off", .method = ACPI_HP_METHOD_WLAN_DISABLE_IF_RADIO_OFF, .description = "Disable WLAN if radio is turned off", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "bt_enabled", .method = ACPI_HP_METHOD_BLUETOOTH_ENABLED, .description = "Enable/Disable Bluetooth", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "bt_radio", .method = ACPI_HP_METHOD_BLUETOOTH_RADIO, .description = "Bluetooth radio status", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "bt_on_air", .method = ACPI_HP_METHOD_BLUETOOTH_ON_AIR, .description = "Bluetooth radio ready to use" " (enabled and radio)", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "bt_enable_if_radio_on", .method = ACPI_HP_METHOD_BLUETOOTH_ENABLE_IF_RADIO_ON, .description = "Enable bluetooth if radio is turned on", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "bt_disable_if_radio_off", .method = ACPI_HP_METHOD_BLUETOOTH_DISABLE_IF_RADIO_OFF, .description = "Disable bluetooth if radio is turned off", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wwan_enabled", .method = ACPI_HP_METHOD_WWAN_ENABLED, .description = "Enable/Disable WWAN (UMTS)", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wwan_radio", .method = ACPI_HP_METHOD_WWAN_RADIO, .description = "WWAN radio status", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "wwan_on_air", .method = ACPI_HP_METHOD_WWAN_ON_AIR, .description = "WWAN radio ready to use (enabled and radio)", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "wwan_enable_if_radio_on", .method = ACPI_HP_METHOD_WWAN_ENABLE_IF_RADIO_ON, .description = "Enable WWAN if radio is turned on", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wwan_disable_if_radio_off", .method = ACPI_HP_METHOD_WWAN_DISABLE_IF_RADIO_OFF, .description = "Disable WWAN if radio is turned off", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "als_enabled", .method = ACPI_HP_METHOD_ALS, .description = "Enable/Disable ALS (Ambient light sensor)", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "display", .method = ACPI_HP_METHOD_DISPLAY, .description = "Display status", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "hdd_temperature", .method = ACPI_HP_METHOD_HDDTEMP, .description = "HDD temperature", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "is_docked", .method = ACPI_HP_METHOD_DOCK, .description = "Docking station status", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "cmi_detail", .method = ACPI_HP_METHOD_CMI_DETAIL, .description = "Details shown in CMI output " "(cat /dev/hpcmi)", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "verbose", .method = ACPI_HP_METHOD_VERBOSE, .description = "Verbosity level", - .access = CTLTYPE_INT | CTLFLAG_RW }, { NULL, 0, NULL, 0 } }; ACPI_SERIAL_DECL(hp, "HP ACPI-WMI Mapping"); static void acpi_hp_identify(driver_t *driver, device_t parent); static int acpi_hp_probe(device_t dev); static int acpi_hp_attach(device_t dev); static int acpi_hp_detach(device_t dev); static void acpi_hp_evaluate_auto_on_off(struct acpi_hp_softc* sc); static int acpi_hp_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_hp_sysctl_set(struct acpi_hp_softc *sc, int method, int arg, int oldarg); static int acpi_hp_sysctl_get(struct acpi_hp_softc *sc, int method); static int acpi_hp_exec_wmi_command(device_t wmi_dev, int command, int is_write, int val); static void acpi_hp_notify(ACPI_HANDLE h, UINT32 notify, void *context); static int acpi_hp_get_cmi_block(device_t wmi_dev, const char* guid, UINT8 instance, char* outbuf, size_t outsize, UINT32* sequence, int detail); static void acpi_hp_hex_decode(char* buffer); static d_open_t acpi_hp_hpcmi_open; static d_close_t acpi_hp_hpcmi_close; static d_read_t acpi_hp_hpcmi_read; /* handler /dev/hpcmi device */ static struct cdevsw hpcmi_cdevsw = { .d_version = D_VERSION, .d_open = acpi_hp_hpcmi_open, .d_close = acpi_hp_hpcmi_close, .d_read = acpi_hp_hpcmi_read, .d_name = "hpcmi", }; static device_method_t acpi_hp_methods[] = { DEVMETHOD(device_identify, acpi_hp_identify), DEVMETHOD(device_probe, acpi_hp_probe), DEVMETHOD(device_attach, acpi_hp_attach), DEVMETHOD(device_detach, acpi_hp_detach), {0, 0} }; static driver_t acpi_hp_driver = { "acpi_hp", acpi_hp_methods, sizeof(struct acpi_hp_softc), }; static devclass_t acpi_hp_devclass; DRIVER_MODULE(acpi_hp, acpi_wmi, acpi_hp_driver, acpi_hp_devclass, 0, 0); MODULE_DEPEND(acpi_hp, acpi_wmi, 1, 1, 1); MODULE_DEPEND(acpi_hp, acpi, 1, 1, 1); static void acpi_hp_evaluate_auto_on_off(struct acpi_hp_softc *sc) { int wireless; int new_wlan_status; int new_bluetooth_status; int new_wwan_status; wireless = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); new_wlan_status = -1; new_bluetooth_status = -1; new_wwan_status = -1; if (sc->verbose) device_printf(sc->wmi_dev, "Wireless status is %x\n", wireless); if (sc->wlan_disable_if_radio_off && !(wireless & HP_MASK_WLAN_RADIO) && (wireless & HP_MASK_WLAN_ENABLED)) { acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, 0x100); new_wlan_status = 0; } else if (sc->wlan_enable_if_radio_on && (wireless & HP_MASK_WLAN_RADIO) && !(wireless & HP_MASK_WLAN_ENABLED)) { acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, 0x101); new_wlan_status = 1; } if (sc->bluetooth_disable_if_radio_off && !(wireless & HP_MASK_BLUETOOTH_RADIO) && (wireless & HP_MASK_BLUETOOTH_ENABLED)) { acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, 0x200); new_bluetooth_status = 0; } else if (sc->bluetooth_enable_if_radio_on && (wireless & HP_MASK_BLUETOOTH_RADIO) && !(wireless & HP_MASK_BLUETOOTH_ENABLED)) { acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, 0x202); new_bluetooth_status = 1; } if (sc->wwan_disable_if_radio_off && !(wireless & HP_MASK_WWAN_RADIO) && (wireless & HP_MASK_WWAN_ENABLED)) { acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, 0x400); new_wwan_status = 0; } else if (sc->wwan_enable_if_radio_on && (wireless & HP_MASK_WWAN_RADIO) && !(wireless & HP_MASK_WWAN_ENABLED)) { acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, 0x404); new_wwan_status = 1; } if (new_wlan_status == -1) { new_wlan_status = (wireless & HP_MASK_WLAN_ON_AIR); if ((new_wlan_status?1:0) != sc->was_wlan_on_air) { sc->was_wlan_on_air = sc->was_wlan_on_air?0:1; if (sc->verbose) device_printf(sc->wmi_dev, "WLAN on air changed to %i " "(new_wlan_status is %i)\n", sc->was_wlan_on_air, new_wlan_status); acpi_UserNotify("HP", ACPI_ROOT_OBJECT, 0xc0+sc->was_wlan_on_air); } } if (new_bluetooth_status == -1) { new_bluetooth_status = (wireless & HP_MASK_BLUETOOTH_ON_AIR); if ((new_bluetooth_status?1:0) != sc->was_bluetooth_on_air) { sc->was_bluetooth_on_air = sc->was_bluetooth_on_air? 0:1; if (sc->verbose) device_printf(sc->wmi_dev, "BLUETOOTH on air changed" " to %i (new_bluetooth_status is %i)\n", sc->was_bluetooth_on_air, new_bluetooth_status); acpi_UserNotify("HP", ACPI_ROOT_OBJECT, 0xd0+sc->was_bluetooth_on_air); } } if (new_wwan_status == -1) { new_wwan_status = (wireless & HP_MASK_WWAN_ON_AIR); if ((new_wwan_status?1:0) != sc->was_wwan_on_air) { sc->was_wwan_on_air = sc->was_wwan_on_air?0:1; if (sc->verbose) device_printf(sc->wmi_dev, "WWAN on air changed to %i" " (new_wwan_status is %i)\n", sc->was_wwan_on_air, new_wwan_status); acpi_UserNotify("HP", ACPI_ROOT_OBJECT, 0xe0+sc->was_wwan_on_air); } } } static void acpi_hp_identify(driver_t *driver, device_t parent) { /* Don't do anything if driver is disabled. */ if (acpi_disabled("hp")) return; /* Add only a single device instance. */ if (device_find_child(parent, "acpi_hp", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 0, "acpi_hp", -1) == NULL) device_printf(parent, "add acpi_hp child failed\n"); } static int acpi_hp_probe(device_t dev) { device_set_desc(dev, "HP ACPI-WMI Mapping"); return (0); } static int acpi_hp_attach(device_t dev) { struct acpi_hp_softc *sc; int arg; ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); sc = device_get_softc(dev); sc->dev = dev; sc->has_notify = 0; sc->has_cmi = 0; sc->bluetooth_enable_if_radio_on = 0; sc->bluetooth_disable_if_radio_off = 0; sc->wlan_enable_if_radio_on = 0; sc->wlan_disable_if_radio_off = 0; sc->wlan_enable_if_radio_on = 0; sc->wlan_disable_if_radio_off = 0; sc->was_wlan_on_air = 0; sc->was_bluetooth_on_air = 0; sc->was_wwan_on_air = 0; sc->cmi_detail = 0; sc->cmi_order_size = -1; sc->verbose = 0; memset(sc->cmi_order, 0, sizeof(sc->cmi_order)); sc->wmi_dev = device_get_parent(dev); if (!ACPI_WMI_PROVIDES_GUID_STRING(sc->wmi_dev, ACPI_HP_WMI_BIOS_GUID)) { device_printf(dev, "WMI device does not provide the HP BIOS GUID\n"); return (EINVAL); } if (ACPI_WMI_PROVIDES_GUID_STRING(sc->wmi_dev, ACPI_HP_WMI_EVENT_GUID)) { device_printf(dev, "HP event GUID detected, installing event handler\n"); if (ACPI_WMI_INSTALL_EVENT_HANDLER(sc->wmi_dev, ACPI_HP_WMI_EVENT_GUID, acpi_hp_notify, dev)) { device_printf(dev, "Could not install notification handler!\n"); } else { sc->has_notify = 1; } } if ((sc->has_cmi = ACPI_WMI_PROVIDES_GUID_STRING(sc->wmi_dev, ACPI_HP_WMI_CMI_GUID) )) { device_printf(dev, "HP CMI GUID detected\n"); } if (sc->has_cmi) { sc->hpcmi_dev_t = make_dev(&hpcmi_cdevsw, 0, UID_ROOT, GID_WHEEL, 0644, "hpcmi"); sc->hpcmi_dev_t->si_drv1 = sc; sc->hpcmi_open_pid = 0; sc->hpcmi_bufptr = -1; } ACPI_SERIAL_BEGIN(hp); sc->sysctl_ctx = device_get_sysctl_ctx(dev); sc->sysctl_tree = device_get_sysctl_tree(dev); for (int i = 0; acpi_hp_sysctls[i].name != NULL; ++i) { arg = 0; if ((!sc->has_notify && (acpi_hp_sysctls[i].method == ACPI_HP_METHOD_WLAN_ENABLE_IF_RADIO_ON || acpi_hp_sysctls[i].method == ACPI_HP_METHOD_WLAN_DISABLE_IF_RADIO_OFF || acpi_hp_sysctls[i].method == ACPI_HP_METHOD_BLUETOOTH_ENABLE_IF_RADIO_ON || acpi_hp_sysctls[i].method == ACPI_HP_METHOD_BLUETOOTH_DISABLE_IF_RADIO_OFF || acpi_hp_sysctls[i].method == ACPI_HP_METHOD_WWAN_ENABLE_IF_RADIO_ON || acpi_hp_sysctls[i].method == ACPI_HP_METHOD_WWAN_DISABLE_IF_RADIO_OFF)) || (arg = acpi_hp_sysctl_get(sc, acpi_hp_sysctls[i].method)) < 0) { continue; } if (acpi_hp_sysctls[i].method == ACPI_HP_METHOD_WLAN_ON_AIR) { sc->was_wlan_on_air = arg; } else if (acpi_hp_sysctls[i].method == ACPI_HP_METHOD_BLUETOOTH_ON_AIR) { sc->was_bluetooth_on_air = arg; } else if (acpi_hp_sysctls[i].method == ACPI_HP_METHOD_WWAN_ON_AIR) { sc->was_wwan_on_air = arg; } - SYSCTL_ADD_PROC(sc->sysctl_ctx, - SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, - acpi_hp_sysctls[i].name, acpi_hp_sysctls[i].access, - sc, i, acpi_hp_sysctl, "I", - acpi_hp_sysctls[i].description); + if (acpi_hp_sysctls[i].flag_rdonly != 0) { + SYSCTL_ADD_PROC(sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, + acpi_hp_sysctls[i].name, CTLTYPE_INT | CTLFLAG_RD, + sc, i, acpi_hp_sysctl, "I", + acpi_hp_sysctls[i].description); + } else { + SYSCTL_ADD_PROC(sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, + acpi_hp_sysctls[i].name, CTLTYPE_INT | CTLFLAG_RW, + sc, i, acpi_hp_sysctl, "I", + acpi_hp_sysctls[i].description); + } } ACPI_SERIAL_END(hp); return (0); } static int acpi_hp_detach(device_t dev) { struct acpi_hp_softc *sc; ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); sc = device_get_softc(dev); if (sc->has_cmi && sc->hpcmi_open_pid != 0) return (EBUSY); if (sc->has_notify) ACPI_WMI_REMOVE_EVENT_HANDLER(dev, ACPI_HP_WMI_EVENT_GUID); if (sc->has_cmi) { if (sc->hpcmi_bufptr != -1) { sbuf_delete(&sc->hpcmi_sbuf); sc->hpcmi_bufptr = -1; } sc->hpcmi_open_pid = 0; destroy_dev(sc->hpcmi_dev_t); } return (0); } static int acpi_hp_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_hp_softc *sc; int arg; int oldarg; int error = 0; int function; int method; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_hp_softc *)oidp->oid_arg1; function = oidp->oid_arg2; method = acpi_hp_sysctls[function].method; ACPI_SERIAL_BEGIN(hp); arg = acpi_hp_sysctl_get(sc, method); oldarg = arg; error = sysctl_handle_int(oidp, &arg, 0, req); if (!error && req->newptr != NULL) { error = acpi_hp_sysctl_set(sc, method, arg, oldarg); } ACPI_SERIAL_END(hp); return (error); } static int acpi_hp_sysctl_get(struct acpi_hp_softc *sc, int method) { int val = 0; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(hp); switch (method) { case ACPI_HP_METHOD_WLAN_ENABLED: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_WLAN_ENABLED) != 0); break; case ACPI_HP_METHOD_WLAN_RADIO: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_WLAN_RADIO) != 0); break; case ACPI_HP_METHOD_WLAN_ON_AIR: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_WLAN_ON_AIR) != 0); break; case ACPI_HP_METHOD_WLAN_ENABLE_IF_RADIO_ON: val = sc->wlan_enable_if_radio_on; break; case ACPI_HP_METHOD_WLAN_DISABLE_IF_RADIO_OFF: val = sc->wlan_disable_if_radio_off; break; case ACPI_HP_METHOD_BLUETOOTH_ENABLED: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_BLUETOOTH_ENABLED) != 0); break; case ACPI_HP_METHOD_BLUETOOTH_RADIO: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_BLUETOOTH_RADIO) != 0); break; case ACPI_HP_METHOD_BLUETOOTH_ON_AIR: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_BLUETOOTH_ON_AIR) != 0); break; case ACPI_HP_METHOD_BLUETOOTH_ENABLE_IF_RADIO_ON: val = sc->bluetooth_enable_if_radio_on; break; case ACPI_HP_METHOD_BLUETOOTH_DISABLE_IF_RADIO_OFF: val = sc->bluetooth_disable_if_radio_off; break; case ACPI_HP_METHOD_WWAN_ENABLED: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_WWAN_ENABLED) != 0); break; case ACPI_HP_METHOD_WWAN_RADIO: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_WWAN_RADIO) != 0); break; case ACPI_HP_METHOD_WWAN_ON_AIR: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 0, 0); val = ((val & HP_MASK_WWAN_ON_AIR) != 0); break; case ACPI_HP_METHOD_WWAN_ENABLE_IF_RADIO_ON: val = sc->wwan_enable_if_radio_on; break; case ACPI_HP_METHOD_WWAN_DISABLE_IF_RADIO_OFF: val = sc->wwan_disable_if_radio_off; break; case ACPI_HP_METHOD_ALS: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_ALS_COMMAND, 0, 0); break; case ACPI_HP_METHOD_DISPLAY: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_DISPLAY_COMMAND, 0, 0); break; case ACPI_HP_METHOD_HDDTEMP: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_HDDTEMP_COMMAND, 0, 0); break; case ACPI_HP_METHOD_DOCK: val = acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_DOCK_COMMAND, 0, 0); break; case ACPI_HP_METHOD_CMI_DETAIL: val = sc->cmi_detail; break; case ACPI_HP_METHOD_VERBOSE: val = sc->verbose; break; } return (val); } static int acpi_hp_sysctl_set(struct acpi_hp_softc *sc, int method, int arg, int oldarg) { ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(hp); if (method != ACPI_HP_METHOD_CMI_DETAIL && method != ACPI_HP_METHOD_VERBOSE) arg = arg?1:0; if (arg != oldarg) { switch (method) { case ACPI_HP_METHOD_WLAN_ENABLED: return (acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, arg?0x101:0x100)); case ACPI_HP_METHOD_WLAN_ENABLE_IF_RADIO_ON: sc->wlan_enable_if_radio_on = arg; acpi_hp_evaluate_auto_on_off(sc); break; case ACPI_HP_METHOD_WLAN_DISABLE_IF_RADIO_OFF: sc->wlan_disable_if_radio_off = arg; acpi_hp_evaluate_auto_on_off(sc); break; case ACPI_HP_METHOD_BLUETOOTH_ENABLED: return (acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, arg?0x202:0x200)); case ACPI_HP_METHOD_BLUETOOTH_ENABLE_IF_RADIO_ON: sc->bluetooth_enable_if_radio_on = arg; acpi_hp_evaluate_auto_on_off(sc); break; case ACPI_HP_METHOD_BLUETOOTH_DISABLE_IF_RADIO_OFF: sc->bluetooth_disable_if_radio_off = arg?1:0; acpi_hp_evaluate_auto_on_off(sc); break; case ACPI_HP_METHOD_WWAN_ENABLED: return (acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_WIRELESS_COMMAND, 1, arg?0x404:0x400)); case ACPI_HP_METHOD_WWAN_ENABLE_IF_RADIO_ON: sc->wwan_enable_if_radio_on = arg?1:0; acpi_hp_evaluate_auto_on_off(sc); break; case ACPI_HP_METHOD_WWAN_DISABLE_IF_RADIO_OFF: sc->wwan_disable_if_radio_off = arg?1:0; acpi_hp_evaluate_auto_on_off(sc); break; case ACPI_HP_METHOD_ALS: return (acpi_hp_exec_wmi_command(sc->wmi_dev, ACPI_HP_WMI_ALS_COMMAND, 1, arg?1:0)); case ACPI_HP_METHOD_CMI_DETAIL: sc->cmi_detail = arg; if ((arg & ACPI_HP_CMI_DETAIL_SHOW_MAX_INSTANCE) != (oldarg & ACPI_HP_CMI_DETAIL_SHOW_MAX_INSTANCE)) { sc->cmi_order_size = -1; } break; case ACPI_HP_METHOD_VERBOSE: sc->verbose = arg; break; } } return (0); } static __inline void acpi_hp_free_buffer(ACPI_BUFFER* buf) { if (buf && buf->Pointer) { AcpiOsFree(buf->Pointer); } } static void acpi_hp_notify(ACPI_HANDLE h, UINT32 notify, void *context) { device_t dev = context; ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, notify); struct acpi_hp_softc *sc = device_get_softc(dev); ACPI_BUFFER response = { ACPI_ALLOCATE_BUFFER, NULL }; ACPI_OBJECT *obj; ACPI_WMI_GET_EVENT_DATA(sc->wmi_dev, notify, &response); obj = (ACPI_OBJECT*) response.Pointer; if (obj && obj->Type == ACPI_TYPE_BUFFER && obj->Buffer.Length == 8) { if (*((UINT8 *) obj->Buffer.Pointer) == 0x5) { acpi_hp_evaluate_auto_on_off(sc); } } acpi_hp_free_buffer(&response); } static int acpi_hp_exec_wmi_command(device_t wmi_dev, int command, int is_write, int val) { UINT32 params[5] = { 0x55434553, is_write?2:1, command, is_write?4:0, val}; UINT32* result; ACPI_OBJECT *obj; ACPI_BUFFER in = { sizeof(params), ¶ms }; ACPI_BUFFER out = { ACPI_ALLOCATE_BUFFER, NULL }; int retval; if (ACPI_FAILURE(ACPI_WMI_EVALUATE_CALL(wmi_dev, ACPI_HP_WMI_BIOS_GUID, 0, 0x3, &in, &out))) { acpi_hp_free_buffer(&out); return (-EINVAL); } obj = out.Pointer; if (!obj || obj->Type != ACPI_TYPE_BUFFER) { acpi_hp_free_buffer(&out); return (-EINVAL); } result = (UINT32*) obj->Buffer.Pointer; retval = result[2]; if (result[1] > 0) { retval = result[1]; } acpi_hp_free_buffer(&out); return (retval); } static __inline char* acpi_hp_get_string_from_object(ACPI_OBJECT* obj, char* dst, size_t size) { int length; dst[0] = 0; if (obj->Type == ACPI_TYPE_STRING) { length = obj->String.Length+1; if (length > size) { length = size - 1; } strlcpy(dst, obj->String.Pointer, length); acpi_hp_hex_decode(dst); } return (dst); } /* * Read BIOS Setting block in instance "instance". * The block returned is ACPI_TYPE_PACKAGE which should contain the following * elements: * Index Meaning * 0 Setting Name [string] * 1 Value (comma separated, asterisk marks the current value) [string] * 2 Path within the bios hierarchy [string] * 3 IsReadOnly [int] * 4 DisplayInUI [int] * 5 RequiresPhysicalPresence [int] * 6 Sequence for ordering within the bios settings (absolute) [int] * 7 Length of prerequisites array [int] * 8..8+[7] PrerequisiteN [string] * 9+[7] Current value (in case of enum) [string] / Array length [int] * 10+[7] Enum length [int] / Array values * 11+[7]ff Enum value at index x [string] */ static int acpi_hp_get_cmi_block(device_t wmi_dev, const char* guid, UINT8 instance, char* outbuf, size_t outsize, UINT32* sequence, int detail) { ACPI_OBJECT *obj; ACPI_BUFFER out = { ACPI_ALLOCATE_BUFFER, NULL }; int i; int outlen; int size = 255; int has_enums = 0; int valuebase = 0; char string_buffer[size]; int enumbase; outlen = 0; outbuf[0] = 0; if (ACPI_FAILURE(ACPI_WMI_GET_BLOCK(wmi_dev, guid, instance, &out))) { acpi_hp_free_buffer(&out); return (-EINVAL); } obj = out.Pointer; if (!obj || obj->Type != ACPI_TYPE_PACKAGE) { acpi_hp_free_buffer(&out); return (-EINVAL); } if (obj->Package.Count >= 8 && obj->Package.Elements[7].Type == ACPI_TYPE_INTEGER) { valuebase = 8 + obj->Package.Elements[7].Integer.Value; } /* check if this matches our expectations based on limited knowledge */ if (valuebase > 7 && obj->Package.Count > valuebase + 1 && obj->Package.Elements[0].Type == ACPI_TYPE_STRING && obj->Package.Elements[1].Type == ACPI_TYPE_STRING && obj->Package.Elements[2].Type == ACPI_TYPE_STRING && obj->Package.Elements[3].Type == ACPI_TYPE_INTEGER && obj->Package.Elements[4].Type == ACPI_TYPE_INTEGER && obj->Package.Elements[5].Type == ACPI_TYPE_INTEGER && obj->Package.Elements[6].Type == ACPI_TYPE_INTEGER && obj->Package.Elements[valuebase].Type == ACPI_TYPE_STRING && obj->Package.Elements[valuebase+1].Type == ACPI_TYPE_INTEGER && obj->Package.Count > valuebase + obj->Package.Elements[valuebase+1].Integer.Value ) { enumbase = valuebase + 1; if (detail & ACPI_HP_CMI_DETAIL_PATHS) { strlcat(outbuf, acpi_hp_get_string_from_object( &obj->Package.Elements[2], string_buffer, size), outsize); outlen += 48; while (strlen(outbuf) < outlen) strlcat(outbuf, " ", outsize); } strlcat(outbuf, acpi_hp_get_string_from_object( &obj->Package.Elements[0], string_buffer, size), outsize); outlen += 43; while (strlen(outbuf) < outlen) strlcat(outbuf, " ", outsize); strlcat(outbuf, acpi_hp_get_string_from_object( &obj->Package.Elements[valuebase], string_buffer, size), outsize); outlen += 21; while (strlen(outbuf) < outlen) strlcat(outbuf, " ", outsize); for (i = 0; i < strlen(outbuf); ++i) if (outbuf[i] == '\\') outbuf[i] = '/'; if (detail & ACPI_HP_CMI_DETAIL_ENUMS) { for (i = enumbase + 1; i < enumbase + 1 + obj->Package.Elements[enumbase].Integer.Value; ++i) { acpi_hp_get_string_from_object( &obj->Package.Elements[i], string_buffer, size); if (strlen(string_buffer) > 1 || (strlen(string_buffer) == 1 && string_buffer[0] != ' ')) { if (has_enums) strlcat(outbuf, "/", outsize); else strlcat(outbuf, " (", outsize); strlcat(outbuf, string_buffer, outsize); has_enums = 1; } } } if (has_enums) strlcat(outbuf, ")", outsize); if (detail & ACPI_HP_CMI_DETAIL_FLAGS) { strlcat(outbuf, obj->Package.Elements[3].Integer.Value? " [ReadOnly]":"", outsize); strlcat(outbuf, obj->Package.Elements[4].Integer.Value? "":" [NOUI]", outsize); strlcat(outbuf, obj->Package.Elements[5].Integer.Value? " [RPP]":"", outsize); } *sequence = (UINT32) obj->Package.Elements[6].Integer.Value; } acpi_hp_free_buffer(&out); return (0); } /* * Convert given two digit hex string (hexin) to an UINT8 referenced * by byteout. * Return != 0 if the was a problem (invalid input) */ static __inline int acpi_hp_hex_to_int(const UINT8 *hexin, UINT8 *byteout) { unsigned int hi; unsigned int lo; hi = hexin[0]; lo = hexin[1]; if ('0' <= hi && hi <= '9') hi -= '0'; else if ('A' <= hi && hi <= 'F') hi -= ('A' - 10); else if ('a' <= hi && hi <= 'f') hi -= ('a' - 10); else return (1); if ('0' <= lo && lo <= '9') lo -= '0'; else if ('A' <= lo && lo <= 'F') lo -= ('A' - 10); else if ('a' <= lo && lo <= 'f') lo -= ('a' - 10); else return (1); *byteout = (hi << 4) + lo; return (0); } static void acpi_hp_hex_decode(char* buffer) { int i; int length = strlen(buffer); UINT8 *uin; UINT8 uout; if (((int)length/2)*2 == length || length < 10) return; for (i = 0; i= '0' && buffer[i] <= '9') || (buffer[i] >= 'A' && buffer[i] <= 'F'))) return; } for (i = 0; isi_drv1 == NULL) return (EBADF); sc = dev->si_drv1; ACPI_SERIAL_BEGIN(hp); if (sc->hpcmi_open_pid != 0) { ret = EBUSY; } else { if (sbuf_new(&sc->hpcmi_sbuf, NULL, 4096, SBUF_AUTOEXTEND) == NULL) { ret = ENXIO; } else { sc->hpcmi_open_pid = td->td_proc->p_pid; sc->hpcmi_bufptr = 0; ret = 0; } } ACPI_SERIAL_END(hp); return (ret); } /* * close hpcmi device */ static int acpi_hp_hpcmi_close(struct cdev* dev, int flags, int mode, struct thread *td) { struct acpi_hp_softc *sc; int ret; if (dev == NULL || dev->si_drv1 == NULL) return (EBADF); sc = dev->si_drv1; ACPI_SERIAL_BEGIN(hp); if (sc->hpcmi_open_pid == 0) { ret = EBADF; } else { if (sc->hpcmi_bufptr != -1) { sbuf_delete(&sc->hpcmi_sbuf); sc->hpcmi_bufptr = -1; } sc->hpcmi_open_pid = 0; ret = 0; } ACPI_SERIAL_END(hp); return (ret); } /* * Read from hpcmi bios information */ static int acpi_hp_hpcmi_read(struct cdev *dev, struct uio *buf, int flag) { struct acpi_hp_softc *sc; int pos, i, l, ret; UINT8 instance; UINT8 maxInstance; UINT32 sequence; int linesize = 1025; char line[linesize]; if (dev == NULL || dev->si_drv1 == NULL) return (EBADF); sc = dev->si_drv1; ACPI_SERIAL_BEGIN(hp); if (sc->hpcmi_open_pid != buf->uio_td->td_proc->p_pid || sc->hpcmi_bufptr == -1) { ret = EBADF; } else { if (!sbuf_done(&sc->hpcmi_sbuf)) { if (sc->cmi_order_size < 0) { maxInstance = sc->has_cmi; if (!(sc->cmi_detail & ACPI_HP_CMI_DETAIL_SHOW_MAX_INSTANCE) && maxInstance > 0) { maxInstance--; } sc->cmi_order_size = 0; for (instance = 0; instance < maxInstance; ++instance) { if (acpi_hp_get_cmi_block(sc->wmi_dev, ACPI_HP_WMI_CMI_GUID, instance, line, linesize, &sequence, sc->cmi_detail)) { instance = maxInstance; } else { pos = sc->cmi_order_size; for (i=0; icmi_order_size && i<127; ++i) { if (sc->cmi_order[i].sequence > sequence) { pos = i; break; } } for (i=sc->cmi_order_size; i>pos; --i) { sc->cmi_order[i].sequence = sc->cmi_order[i-1].sequence; sc->cmi_order[i].instance = sc->cmi_order[i-1].instance; } sc->cmi_order[pos].sequence = sequence; sc->cmi_order[pos].instance = instance; sc->cmi_order_size++; } } } for (i=0; icmi_order_size; ++i) { if (!acpi_hp_get_cmi_block(sc->wmi_dev, ACPI_HP_WMI_CMI_GUID, sc->cmi_order[i].instance, line, linesize, &sequence, sc->cmi_detail)) { sbuf_printf(&sc->hpcmi_sbuf, "%s\n", line); } } sbuf_finish(&sc->hpcmi_sbuf); } if (sbuf_len(&sc->hpcmi_sbuf) <= 0) { sbuf_delete(&sc->hpcmi_sbuf); sc->hpcmi_bufptr = -1; sc->hpcmi_open_pid = 0; ret = ENOMEM; } else { l = min(buf->uio_resid, sbuf_len(&sc->hpcmi_sbuf) - sc->hpcmi_bufptr); ret = (l > 0)?uiomove(sbuf_data(&sc->hpcmi_sbuf) + sc->hpcmi_bufptr, l, buf) : 0; sc->hpcmi_bufptr += l; } } ACPI_SERIAL_END(hp); return (ret); } Index: stable/9/sys/dev/acpi_support/acpi_ibm.c =================================================================== --- stable/9/sys/dev/acpi_support/acpi_ibm.c (revision 273911) +++ stable/9/sys/dev/acpi_support/acpi_ibm.c (revision 273912) @@ -1,1260 +1,1254 @@ /*- * Copyright (c) 2004 Takanori Watanabe * Copyright (c) 2005 Markus Brueffer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for extra ACPI-controlled gadgets found on IBM ThinkPad laptops. * Inspired by the ibm-acpi and tpb projects which implement these features * on Linux. * * acpi-ibm: * tpb: */ #include "opt_acpi.h" #include #include #include #include #include #include #include "acpi_if.h" #include #include #include #include #include #include #include #define _COMPONENT ACPI_OEM ACPI_MODULE_NAME("IBM") /* Internal methods */ #define ACPI_IBM_METHOD_EVENTS 1 #define ACPI_IBM_METHOD_EVENTMASK 2 #define ACPI_IBM_METHOD_HOTKEY 3 #define ACPI_IBM_METHOD_BRIGHTNESS 4 #define ACPI_IBM_METHOD_VOLUME 5 #define ACPI_IBM_METHOD_MUTE 6 #define ACPI_IBM_METHOD_THINKLIGHT 7 #define ACPI_IBM_METHOD_BLUETOOTH 8 #define ACPI_IBM_METHOD_WLAN 9 #define ACPI_IBM_METHOD_FANSPEED 10 #define ACPI_IBM_METHOD_FANLEVEL 11 #define ACPI_IBM_METHOD_FANSTATUS 12 #define ACPI_IBM_METHOD_THERMAL 13 #define ACPI_IBM_METHOD_HANDLEREVENTS 14 /* Hotkeys/Buttons */ #define IBM_RTC_HOTKEY1 0x64 #define IBM_RTC_MASK_HOME (1 << 0) #define IBM_RTC_MASK_SEARCH (1 << 1) #define IBM_RTC_MASK_MAIL (1 << 2) #define IBM_RTC_MASK_WLAN (1 << 5) #define IBM_RTC_HOTKEY2 0x65 #define IBM_RTC_MASK_THINKPAD (1 << 3) #define IBM_RTC_MASK_ZOOM (1 << 5) #define IBM_RTC_MASK_VIDEO (1 << 6) #define IBM_RTC_MASK_HIBERNATE (1 << 7) #define IBM_RTC_THINKLIGHT 0x66 #define IBM_RTC_MASK_THINKLIGHT (1 << 4) #define IBM_RTC_SCREENEXPAND 0x67 #define IBM_RTC_MASK_SCREENEXPAND (1 << 5) #define IBM_RTC_BRIGHTNESS 0x6c #define IBM_RTC_MASK_BRIGHTNESS (1 << 5) #define IBM_RTC_VOLUME 0x6e #define IBM_RTC_MASK_VOLUME (1 << 7) /* Embedded Controller registers */ #define IBM_EC_BRIGHTNESS 0x31 #define IBM_EC_MASK_BRI 0x7 #define IBM_EC_VOLUME 0x30 #define IBM_EC_MASK_VOL 0xf #define IBM_EC_MASK_MUTE (1 << 6) #define IBM_EC_FANSTATUS 0x2F #define IBM_EC_MASK_FANLEVEL 0x3f #define IBM_EC_MASK_FANDISENGAGED (1 << 6) #define IBM_EC_MASK_FANSTATUS (1 << 7) #define IBM_EC_FANSPEED 0x84 /* CMOS Commands */ #define IBM_CMOS_VOLUME_DOWN 0 #define IBM_CMOS_VOLUME_UP 1 #define IBM_CMOS_VOLUME_MUTE 2 #define IBM_CMOS_BRIGHTNESS_UP 4 #define IBM_CMOS_BRIGHTNESS_DOWN 5 /* ACPI methods */ #define IBM_NAME_KEYLIGHT "KBLT" #define IBM_NAME_WLAN_BT_GET "GBDC" #define IBM_NAME_WLAN_BT_SET "SBDC" #define IBM_NAME_MASK_BT (1 << 1) #define IBM_NAME_MASK_WLAN (1 << 2) #define IBM_NAME_THERMAL_GET "TMP7" #define IBM_NAME_THERMAL_UPDT "UPDT" #define IBM_NAME_EVENTS_STATUS_GET "DHKC" #define IBM_NAME_EVENTS_MASK_GET "DHKN" #define IBM_NAME_EVENTS_STATUS_SET "MHKC" #define IBM_NAME_EVENTS_MASK_SET "MHKM" #define IBM_NAME_EVENTS_GET "MHKP" #define IBM_NAME_EVENTS_AVAILMASK "MHKA" /* Event Code */ #define IBM_EVENT_LCD_BACKLIGHT 0x03 #define IBM_EVENT_SUSPEND_TO_RAM 0x04 #define IBM_EVENT_BLUETOOTH 0x05 #define IBM_EVENT_SCREEN_EXPAND 0x07 #define IBM_EVENT_SUSPEND_TO_DISK 0x0c #define IBM_EVENT_BRIGHTNESS_UP 0x10 #define IBM_EVENT_BRIGHTNESS_DOWN 0x11 #define IBM_EVENT_THINKLIGHT 0x12 #define IBM_EVENT_ZOOM 0x14 #define IBM_EVENT_VOLUME_UP 0x15 #define IBM_EVENT_VOLUME_DOWN 0x16 #define IBM_EVENT_MUTE 0x17 #define IBM_EVENT_ACCESS_IBM_BUTTON 0x18 #define ABS(x) (((x) < 0)? -(x) : (x)) struct acpi_ibm_softc { device_t dev; ACPI_HANDLE handle; /* Embedded controller */ device_t ec_dev; ACPI_HANDLE ec_handle; /* CMOS */ ACPI_HANDLE cmos_handle; /* Fan status */ ACPI_HANDLE fan_handle; int fan_levels; /* Keylight commands and states */ ACPI_HANDLE light_handle; int light_cmd_on; int light_cmd_off; int light_val; int light_get_supported; int light_set_supported; /* led(4) interface */ struct cdev *led_dev; int led_busy; int led_state; int wlan_bt_flags; int thermal_updt_supported; unsigned int events_availmask; unsigned int events_initialmask; int events_mask_supported; int events_enable; unsigned int handler_events; struct sysctl_ctx_list *sysctl_ctx; struct sysctl_oid *sysctl_tree; }; static struct { char *name; int method; char *description; - int access; + int flag_rdonly; } acpi_ibm_sysctls[] = { { .name = "events", .method = ACPI_IBM_METHOD_EVENTS, .description = "ACPI events enable", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "eventmask", .method = ACPI_IBM_METHOD_EVENTMASK, .description = "ACPI eventmask", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "hotkey", .method = ACPI_IBM_METHOD_HOTKEY, .description = "Key Status", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "lcd_brightness", .method = ACPI_IBM_METHOD_BRIGHTNESS, .description = "LCD Brightness", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "volume", .method = ACPI_IBM_METHOD_VOLUME, .description = "Volume", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "mute", .method = ACPI_IBM_METHOD_MUTE, .description = "Mute", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "thinklight", .method = ACPI_IBM_METHOD_THINKLIGHT, .description = "Thinklight enable", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "bluetooth", .method = ACPI_IBM_METHOD_BLUETOOTH, .description = "Bluetooth enable", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "wlan", .method = ACPI_IBM_METHOD_WLAN, .description = "WLAN enable", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "fan_speed", .method = ACPI_IBM_METHOD_FANSPEED, .description = "Fan speed", - .access = CTLTYPE_INT | CTLFLAG_RD + .flag_rdonly = 1 }, { .name = "fan_level", .method = ACPI_IBM_METHOD_FANLEVEL, .description = "Fan level", - .access = CTLTYPE_INT | CTLFLAG_RW }, { .name = "fan", .method = ACPI_IBM_METHOD_FANSTATUS, .description = "Fan enable", - .access = CTLTYPE_INT | CTLFLAG_RW }, { NULL, 0, NULL, 0 } }; ACPI_SERIAL_DECL(ibm, "ACPI IBM extras"); static int acpi_ibm_probe(device_t dev); static int acpi_ibm_attach(device_t dev); static int acpi_ibm_detach(device_t dev); static int acpi_ibm_resume(device_t dev); static void ibm_led(void *softc, int onoff); static void ibm_led_task(struct acpi_ibm_softc *sc, int pending __unused); static int acpi_ibm_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_ibm_sysctl_init(struct acpi_ibm_softc *sc, int method); static int acpi_ibm_sysctl_get(struct acpi_ibm_softc *sc, int method); static int acpi_ibm_sysctl_set(struct acpi_ibm_softc *sc, int method, int val); static int acpi_ibm_eventmask_set(struct acpi_ibm_softc *sc, int val); static int acpi_ibm_thermal_sysctl(SYSCTL_HANDLER_ARGS); static int acpi_ibm_handlerevents_sysctl(SYSCTL_HANDLER_ARGS); static void acpi_ibm_notify(ACPI_HANDLE h, UINT32 notify, void *context); static int acpi_ibm_brightness_set(struct acpi_ibm_softc *sc, int arg); static int acpi_ibm_bluetooth_set(struct acpi_ibm_softc *sc, int arg); static int acpi_ibm_thinklight_set(struct acpi_ibm_softc *sc, int arg); static int acpi_ibm_volume_set(struct acpi_ibm_softc *sc, int arg); static int acpi_ibm_mute_set(struct acpi_ibm_softc *sc, int arg); static device_method_t acpi_ibm_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_ibm_probe), DEVMETHOD(device_attach, acpi_ibm_attach), DEVMETHOD(device_detach, acpi_ibm_detach), DEVMETHOD(device_resume, acpi_ibm_resume), {0, 0} }; static driver_t acpi_ibm_driver = { "acpi_ibm", acpi_ibm_methods, sizeof(struct acpi_ibm_softc), }; static devclass_t acpi_ibm_devclass; DRIVER_MODULE(acpi_ibm, acpi, acpi_ibm_driver, acpi_ibm_devclass, 0, 0); MODULE_DEPEND(acpi_ibm, acpi, 1, 1, 1); static char *ibm_ids[] = {"IBM0068", "LEN0068", NULL}; static void ibm_led(void *softc, int onoff) { struct acpi_ibm_softc* sc = (struct acpi_ibm_softc*) softc; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); if (sc->led_busy) return; sc->led_busy = 1; sc->led_state = onoff; AcpiOsExecute(OSL_NOTIFY_HANDLER, (void *)ibm_led_task, sc); } static void ibm_led_task(struct acpi_ibm_softc *sc, int pending __unused) { ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_BEGIN(ibm); acpi_ibm_sysctl_set(sc, ACPI_IBM_METHOD_THINKLIGHT, sc->led_state); ACPI_SERIAL_END(ibm); sc->led_busy = 0; } static int acpi_ibm_probe(device_t dev) { if (acpi_disabled("ibm") || ACPI_ID_PROBE(device_get_parent(dev), dev, ibm_ids) == NULL || device_get_unit(dev) != 0) return (ENXIO); device_set_desc(dev, "IBM ThinkPad ACPI Extras"); return (0); } static int acpi_ibm_attach(device_t dev) { struct acpi_ibm_softc *sc; devclass_t ec_devclass; ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); sc = device_get_softc(dev); sc->dev = dev; sc->handle = acpi_get_handle(dev); /* Look for the first embedded controller */ if (!(ec_devclass = devclass_find ("acpi_ec"))) { if (bootverbose) device_printf(dev, "Couldn't find acpi_ec devclass\n"); return (EINVAL); } if (!(sc->ec_dev = devclass_get_device(ec_devclass, 0))) { if (bootverbose) device_printf(dev, "Couldn't find acpi_ec device\n"); return (EINVAL); } sc->ec_handle = acpi_get_handle(sc->ec_dev); /* Get the sysctl tree */ sc->sysctl_ctx = device_get_sysctl_ctx(dev); sc->sysctl_tree = device_get_sysctl_tree(dev); /* Look for event mask and hook up the nodes */ sc->events_mask_supported = ACPI_SUCCESS(acpi_GetInteger(sc->handle, IBM_NAME_EVENTS_MASK_GET, &sc->events_initialmask)); if (sc->events_mask_supported) { SYSCTL_ADD_UINT(sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "initialmask", CTLFLAG_RD, &sc->events_initialmask, 0, "Initial eventmask"); /* The availmask is the bitmask of supported events */ if (ACPI_FAILURE(acpi_GetInteger(sc->handle, IBM_NAME_EVENTS_AVAILMASK, &sc->events_availmask))) sc->events_availmask = 0xffffffff; SYSCTL_ADD_UINT(sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "availmask", CTLFLAG_RD, &sc->events_availmask, 0, "Mask of supported events"); } /* Hook up proc nodes */ for (int i = 0; acpi_ibm_sysctls[i].name != NULL; i++) { if (!acpi_ibm_sysctl_init(sc, acpi_ibm_sysctls[i].method)) continue; - SYSCTL_ADD_PROC(sc->sysctl_ctx, - SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, - acpi_ibm_sysctls[i].name, acpi_ibm_sysctls[i].access, - sc, i, acpi_ibm_sysctl, "I", - acpi_ibm_sysctls[i].description); + if (acpi_ibm_sysctls[i].flag_rdonly != 0) { + SYSCTL_ADD_PROC(sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, + acpi_ibm_sysctls[i].name, CTLTYPE_INT | CTLFLAG_RD, + sc, i, acpi_ibm_sysctl, "I", + acpi_ibm_sysctls[i].description); + } else { + SYSCTL_ADD_PROC(sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, + acpi_ibm_sysctls[i].name, CTLTYPE_INT | CTLFLAG_RW, + sc, i, acpi_ibm_sysctl, "I", + acpi_ibm_sysctls[i].description); + } } /* Hook up thermal node */ if (acpi_ibm_sysctl_init(sc, ACPI_IBM_METHOD_THERMAL)) { SYSCTL_ADD_PROC(sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "thermal", CTLTYPE_INT | CTLFLAG_RD, sc, 0, acpi_ibm_thermal_sysctl, "I", "Thermal zones"); } /* Hook up handlerevents node */ if (acpi_ibm_sysctl_init(sc, ACPI_IBM_METHOD_HANDLEREVENTS)) { SYSCTL_ADD_PROC(sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "handlerevents", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, acpi_ibm_handlerevents_sysctl, "I", "devd(8) events handled by acpi_ibm"); } /* Handle notifies */ AcpiInstallNotifyHandler(sc->handle, ACPI_DEVICE_NOTIFY, acpi_ibm_notify, dev); /* Hook up light to led(4) */ if (sc->light_set_supported) sc->led_dev = led_create_state(ibm_led, sc, "thinklight", sc->light_val); return (0); } static int acpi_ibm_detach(device_t dev) { ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); struct acpi_ibm_softc *sc = device_get_softc(dev); /* Disable events and restore eventmask */ ACPI_SERIAL_BEGIN(ibm); acpi_ibm_sysctl_set(sc, ACPI_IBM_METHOD_EVENTS, 0); acpi_ibm_sysctl_set(sc, ACPI_IBM_METHOD_EVENTMASK, sc->events_initialmask); ACPI_SERIAL_END(ibm); AcpiRemoveNotifyHandler(sc->handle, ACPI_DEVICE_NOTIFY, acpi_ibm_notify); if (sc->led_dev != NULL) led_destroy(sc->led_dev); return (0); } static int acpi_ibm_resume(device_t dev) { struct acpi_ibm_softc *sc = device_get_softc(dev); ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); ACPI_SERIAL_BEGIN(ibm); for (int i = 0; acpi_ibm_sysctls[i].name != NULL; i++) { int val; - if ((acpi_ibm_sysctls[i].access & CTLFLAG_RD) == 0) { - continue; - } - val = acpi_ibm_sysctl_get(sc, i); - if ((acpi_ibm_sysctls[i].access & CTLFLAG_WR) == 0) { + if (acpi_ibm_sysctls[i].flag_rdonly != 0) continue; - } acpi_ibm_sysctl_set(sc, i, val); } ACPI_SERIAL_END(ibm); return (0); } static int acpi_ibm_eventmask_set(struct acpi_ibm_softc *sc, int val) { ACPI_OBJECT arg[2]; ACPI_OBJECT_LIST args; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); args.Count = 2; args.Pointer = arg; arg[0].Type = ACPI_TYPE_INTEGER; arg[1].Type = ACPI_TYPE_INTEGER; for (int i = 0; i < 32; ++i) { arg[0].Integer.Value = i+1; arg[1].Integer.Value = (((1 << i) & val) != 0); status = AcpiEvaluateObject(sc->handle, IBM_NAME_EVENTS_MASK_SET, &args, NULL); if (ACPI_FAILURE(status)) return (status); } return (0); } static int acpi_ibm_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_ibm_softc *sc; int arg; int error = 0; int function; int method; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_ibm_softc *)oidp->oid_arg1; function = oidp->oid_arg2; method = acpi_ibm_sysctls[function].method; ACPI_SERIAL_BEGIN(ibm); arg = acpi_ibm_sysctl_get(sc, method); error = sysctl_handle_int(oidp, &arg, 0, req); /* Sanity check */ if (error != 0 || req->newptr == NULL) goto out; /* Update */ error = acpi_ibm_sysctl_set(sc, method, arg); out: ACPI_SERIAL_END(ibm); return (error); } static int acpi_ibm_sysctl_get(struct acpi_ibm_softc *sc, int method) { UINT64 val_ec; int val = 0, key; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); switch (method) { case ACPI_IBM_METHOD_EVENTS: acpi_GetInteger(sc->handle, IBM_NAME_EVENTS_STATUS_GET, &val); break; case ACPI_IBM_METHOD_EVENTMASK: if (sc->events_mask_supported) acpi_GetInteger(sc->handle, IBM_NAME_EVENTS_MASK_GET, &val); break; case ACPI_IBM_METHOD_HOTKEY: /* * Construct the hotkey as a bitmask as illustrated below. * Note that whenever a key was pressed, the respecting bit * toggles and nothing else changes. * +--+--+-+-+-+-+-+-+-+-+-+-+ * |11|10|9|8|7|6|5|4|3|2|1|0| * +--+--+-+-+-+-+-+-+-+-+-+-+ * | | | | | | | | | | | | * | | | | | | | | | | | +- Home Button * | | | | | | | | | | +--- Search Button * | | | | | | | | | +----- Mail Button * | | | | | | | | +------- Thinkpad Button * | | | | | | | +--------- Zoom (Fn + Space) * | | | | | | +----------- WLAN Button * | | | | | +------------- Video Button * | | | | +--------------- Hibernate Button * | | | +----------------- Thinklight Button * | | +------------------- Screen expand (Fn + F8) * | +--------------------- Brightness * +------------------------ Volume/Mute */ key = rtcin(IBM_RTC_HOTKEY1); val = (IBM_RTC_MASK_HOME | IBM_RTC_MASK_SEARCH | IBM_RTC_MASK_MAIL | IBM_RTC_MASK_WLAN) & key; key = rtcin(IBM_RTC_HOTKEY2); val |= (IBM_RTC_MASK_THINKPAD | IBM_RTC_MASK_VIDEO | IBM_RTC_MASK_HIBERNATE) & key; val |= (IBM_RTC_MASK_ZOOM & key) >> 1; key = rtcin(IBM_RTC_THINKLIGHT); val |= (IBM_RTC_MASK_THINKLIGHT & key) << 4; key = rtcin(IBM_RTC_SCREENEXPAND); val |= (IBM_RTC_MASK_THINKLIGHT & key) << 4; key = rtcin(IBM_RTC_BRIGHTNESS); val |= (IBM_RTC_MASK_BRIGHTNESS & key) << 5; key = rtcin(IBM_RTC_VOLUME); val |= (IBM_RTC_MASK_VOLUME & key) << 4; break; case ACPI_IBM_METHOD_BRIGHTNESS: ACPI_EC_READ(sc->ec_dev, IBM_EC_BRIGHTNESS, &val_ec, 1); val = val_ec & IBM_EC_MASK_BRI; break; case ACPI_IBM_METHOD_VOLUME: ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1); val = val_ec & IBM_EC_MASK_VOL; break; case ACPI_IBM_METHOD_MUTE: ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1); val = ((val_ec & IBM_EC_MASK_MUTE) == IBM_EC_MASK_MUTE); break; case ACPI_IBM_METHOD_THINKLIGHT: if (sc->light_get_supported) acpi_GetInteger(sc->ec_handle, IBM_NAME_KEYLIGHT, &val); else val = sc->light_val; break; case ACPI_IBM_METHOD_BLUETOOTH: acpi_GetInteger(sc->handle, IBM_NAME_WLAN_BT_GET, &val); sc->wlan_bt_flags = val; val = ((val & IBM_NAME_MASK_BT) != 0); break; case ACPI_IBM_METHOD_WLAN: acpi_GetInteger(sc->handle, IBM_NAME_WLAN_BT_GET, &val); sc->wlan_bt_flags = val; val = ((val & IBM_NAME_MASK_WLAN) != 0); break; case ACPI_IBM_METHOD_FANSPEED: if (sc->fan_handle) { if(ACPI_FAILURE(acpi_GetInteger(sc->fan_handle, NULL, &val))) val = -1; } else { ACPI_EC_READ(sc->ec_dev, IBM_EC_FANSPEED, &val_ec, 2); val = val_ec; } break; case ACPI_IBM_METHOD_FANLEVEL: /* * The IBM_EC_FANSTATUS register works as follows: * Bit 0-5 indicate the level at which the fan operates. Only * values between 0 and 7 have an effect. Everything * above 7 is treated the same as level 7 * Bit 6 overrides the fan speed limit if set to 1 * Bit 7 indicates at which mode the fan operates: * manual (0) or automatic (1) */ if (!sc->fan_handle) { ACPI_EC_READ(sc->ec_dev, IBM_EC_FANSTATUS, &val_ec, 1); val = val_ec & IBM_EC_MASK_FANLEVEL; } break; case ACPI_IBM_METHOD_FANSTATUS: if (!sc->fan_handle) { ACPI_EC_READ(sc->ec_dev, IBM_EC_FANSTATUS, &val_ec, 1); val = (val_ec & IBM_EC_MASK_FANSTATUS) == IBM_EC_MASK_FANSTATUS; } else val = -1; break; } return (val); } static int acpi_ibm_sysctl_set(struct acpi_ibm_softc *sc, int method, int arg) { int val; UINT64 val_ec; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); switch (method) { case ACPI_IBM_METHOD_EVENTS: if (arg < 0 || arg > 1) return (EINVAL); status = acpi_SetInteger(sc->handle, IBM_NAME_EVENTS_STATUS_SET, arg); if (ACPI_FAILURE(status)) return (status); if (sc->events_mask_supported) return acpi_ibm_eventmask_set(sc, sc->events_availmask); break; case ACPI_IBM_METHOD_EVENTMASK: if (sc->events_mask_supported) return acpi_ibm_eventmask_set(sc, arg); break; case ACPI_IBM_METHOD_BRIGHTNESS: return acpi_ibm_brightness_set(sc, arg); break; case ACPI_IBM_METHOD_VOLUME: return acpi_ibm_volume_set(sc, arg); break; case ACPI_IBM_METHOD_MUTE: return acpi_ibm_mute_set(sc, arg); break; case ACPI_IBM_METHOD_THINKLIGHT: return acpi_ibm_thinklight_set(sc, arg); break; case ACPI_IBM_METHOD_BLUETOOTH: return acpi_ibm_bluetooth_set(sc, arg); break; case ACPI_IBM_METHOD_FANLEVEL: if (arg < 0 || arg > 7) return (EINVAL); if (!sc->fan_handle) { /* Read the current fanstatus */ ACPI_EC_READ(sc->ec_dev, IBM_EC_FANSTATUS, &val_ec, 1); val = val_ec & (~IBM_EC_MASK_FANLEVEL); return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_FANSTATUS, val | arg, 1); } break; case ACPI_IBM_METHOD_FANSTATUS: if (arg < 0 || arg > 1) return (EINVAL); if (!sc->fan_handle) { /* Read the current fanstatus */ ACPI_EC_READ(sc->ec_dev, IBM_EC_FANSTATUS, &val_ec, 1); return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_FANSTATUS, (arg == 1) ? (val_ec | IBM_EC_MASK_FANSTATUS) : (val_ec & (~IBM_EC_MASK_FANSTATUS)), 1); } break; } return (0); } static int acpi_ibm_sysctl_init(struct acpi_ibm_softc *sc, int method) { int dummy; ACPI_OBJECT_TYPE cmos_t; ACPI_HANDLE ledb_handle; switch (method) { case ACPI_IBM_METHOD_EVENTS: /* Events are disabled by default */ return (TRUE); case ACPI_IBM_METHOD_EVENTMASK: return (sc->events_mask_supported); case ACPI_IBM_METHOD_HOTKEY: case ACPI_IBM_METHOD_BRIGHTNESS: case ACPI_IBM_METHOD_VOLUME: case ACPI_IBM_METHOD_MUTE: /* EC is required here, which was aready checked before */ return (TRUE); case ACPI_IBM_METHOD_THINKLIGHT: sc->cmos_handle = NULL; sc->light_get_supported = ACPI_SUCCESS(acpi_GetInteger( sc->ec_handle, IBM_NAME_KEYLIGHT, &sc->light_val)); if ((ACPI_SUCCESS(AcpiGetHandle(sc->handle, "\\UCMS", &sc->light_handle)) || ACPI_SUCCESS(AcpiGetHandle(sc->handle, "\\CMOS", &sc->light_handle)) || ACPI_SUCCESS(AcpiGetHandle(sc->handle, "\\CMS", &sc->light_handle))) && ACPI_SUCCESS(AcpiGetType(sc->light_handle, &cmos_t)) && cmos_t == ACPI_TYPE_METHOD) { sc->light_cmd_on = 0x0c; sc->light_cmd_off = 0x0d; sc->cmos_handle = sc->light_handle; } else if (ACPI_SUCCESS(AcpiGetHandle(sc->handle, "\\LGHT", &sc->light_handle))) { sc->light_cmd_on = 1; sc->light_cmd_off = 0; } else sc->light_handle = NULL; sc->light_set_supported = (sc->light_handle && ACPI_FAILURE(AcpiGetHandle(sc->ec_handle, "LEDB", &ledb_handle))); if (sc->light_get_supported) return (TRUE); if (sc->light_set_supported) { sc->light_val = 0; return (TRUE); } return (FALSE); case ACPI_IBM_METHOD_BLUETOOTH: case ACPI_IBM_METHOD_WLAN: if (ACPI_SUCCESS(acpi_GetInteger(sc->handle, IBM_NAME_WLAN_BT_GET, &dummy))) return (TRUE); return (FALSE); case ACPI_IBM_METHOD_FANSPEED: /* * Some models report the fan speed in levels from 0-7 * Newer models report it contiguously */ sc->fan_levels = (ACPI_SUCCESS(AcpiGetHandle(sc->handle, "GFAN", &sc->fan_handle)) || ACPI_SUCCESS(AcpiGetHandle(sc->handle, "\\FSPD", &sc->fan_handle))); return (TRUE); case ACPI_IBM_METHOD_FANLEVEL: case ACPI_IBM_METHOD_FANSTATUS: /* * Fan status is only supported on those models, * which report fan RPM contiguously, not in levels */ if (sc->fan_levels) return (FALSE); return (TRUE); case ACPI_IBM_METHOD_THERMAL: if (ACPI_SUCCESS(acpi_GetInteger(sc->ec_handle, IBM_NAME_THERMAL_GET, &dummy))) { sc->thermal_updt_supported = ACPI_SUCCESS(acpi_GetInteger(sc->ec_handle, IBM_NAME_THERMAL_UPDT, &dummy)); return (TRUE); } return (FALSE); case ACPI_IBM_METHOD_HANDLEREVENTS: return (TRUE); } return (FALSE); } static int acpi_ibm_thermal_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_ibm_softc *sc; int error = 0; char temp_cmd[] = "TMP0"; int temp[8]; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_ibm_softc *)oidp->oid_arg1; ACPI_SERIAL_BEGIN(ibm); for (int i = 0; i < 8; ++i) { temp_cmd[3] = '0' + i; /* * The TMPx methods seem to return +/- 128 or 0 * when the respecting sensor is not available */ if (ACPI_FAILURE(acpi_GetInteger(sc->ec_handle, temp_cmd, &temp[i])) || ABS(temp[i]) == 128 || temp[i] == 0) temp[i] = -1; else if (sc->thermal_updt_supported) /* Temperature is reported in tenth of Kelvin */ temp[i] = (temp[i] - 2732 + 5) / 10; } error = sysctl_handle_opaque(oidp, &temp, 8*sizeof(int), req); ACPI_SERIAL_END(ibm); return (error); } static int acpi_ibm_handlerevents_sysctl(SYSCTL_HANDLER_ARGS) { struct acpi_ibm_softc *sc; int error = 0; struct sbuf sb; char *cp, *ep; int l, val; unsigned int handler_events; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = (struct acpi_ibm_softc *)oidp->oid_arg1; if (sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND) == NULL) return (ENOMEM); ACPI_SERIAL_BEGIN(ibm); /* Get old values if this is a get request. */ if (req->newptr == NULL) { for (int i = 0; i < 8 * sizeof(sc->handler_events); i++) if (sc->handler_events & (1 << i)) sbuf_printf(&sb, "0x%02x ", i + 1); if (sbuf_len(&sb) == 0) sbuf_printf(&sb, "NONE"); } sbuf_trim(&sb); sbuf_finish(&sb); /* Copy out the old values to the user. */ error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb)); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) goto out; /* If the user is setting a string, parse it. */ handler_events = 0; cp = (char *)req->newptr; while (*cp) { if (isspace(*cp)) { cp++; continue; } ep = cp; while (*ep && !isspace(*ep)) ep++; l = ep - cp; if (l == 0) break; if (strncmp(cp, "NONE", 4) == 0) { cp = ep; continue; } if (l >= 3 && cp[0] == '0' && (cp[1] == 'X' || cp[1] == 'x')) val = strtoul(cp, &ep, 16); else val = strtoul(cp, &ep, 10); if (val == 0 || ep == cp || val >= 8 * sizeof(handler_events)) { cp[l] = '\0'; device_printf(sc->dev, "invalid event code: %s\n", cp); error = EINVAL; goto out; } handler_events |= 1 << (val - 1); cp = ep; } sc->handler_events = handler_events; out: ACPI_SERIAL_END(ibm); return (error); } static int acpi_ibm_brightness_set(struct acpi_ibm_softc *sc, int arg) { int val, step; UINT64 val_ec; ACPI_OBJECT Arg; ACPI_OBJECT_LIST Args; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); if (arg < 0 || arg > 7) return (EINVAL); /* Read the current brightness */ status = ACPI_EC_READ(sc->ec_dev, IBM_EC_BRIGHTNESS, &val_ec, 1); if (ACPI_FAILURE(status)) return (status); if (sc->cmos_handle) { val = val_ec & IBM_EC_MASK_BRI; Args.Count = 1; Args.Pointer = &Arg; Arg.Type = ACPI_TYPE_INTEGER; Arg.Integer.Value = (arg > val) ? IBM_CMOS_BRIGHTNESS_UP : IBM_CMOS_BRIGHTNESS_DOWN; step = (arg > val) ? 1 : -1; for (int i = val; i != arg; i += step) { status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL); if (ACPI_FAILURE(status)) { /* Record the last value */ if (i != val) { ACPI_EC_WRITE(sc->ec_dev, IBM_EC_BRIGHTNESS, i - step, 1); } return (status); } } } return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_BRIGHTNESS, arg, 1); } static int acpi_ibm_bluetooth_set(struct acpi_ibm_softc *sc, int arg) { int val; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); if (arg < 0 || arg > 1) return (EINVAL); val = (arg == 1) ? sc->wlan_bt_flags | IBM_NAME_MASK_BT : sc->wlan_bt_flags & (~IBM_NAME_MASK_BT); return acpi_SetInteger(sc->handle, IBM_NAME_WLAN_BT_SET, val); } static int acpi_ibm_thinklight_set(struct acpi_ibm_softc *sc, int arg) { ACPI_OBJECT Arg; ACPI_OBJECT_LIST Args; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); if (arg < 0 || arg > 1) return (EINVAL); if (sc->light_set_supported) { Args.Count = 1; Args.Pointer = &Arg; Arg.Type = ACPI_TYPE_INTEGER; Arg.Integer.Value = arg ? sc->light_cmd_on : sc->light_cmd_off; status = AcpiEvaluateObject(sc->light_handle, NULL, &Args, NULL); if (ACPI_SUCCESS(status)) sc->light_val = arg; return (status); } return (0); } static int acpi_ibm_volume_set(struct acpi_ibm_softc *sc, int arg) { int val, step; UINT64 val_ec; ACPI_OBJECT Arg; ACPI_OBJECT_LIST Args; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); if (arg < 0 || arg > 14) return (EINVAL); /* Read the current volume */ status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1); if (ACPI_FAILURE(status)) return (status); if (sc->cmos_handle) { val = val_ec & IBM_EC_MASK_VOL; Args.Count = 1; Args.Pointer = &Arg; Arg.Type = ACPI_TYPE_INTEGER; Arg.Integer.Value = (arg > val) ? IBM_CMOS_VOLUME_UP : IBM_CMOS_VOLUME_DOWN; step = (arg > val) ? 1 : -1; for (int i = val; i != arg; i += step) { status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL); if (ACPI_FAILURE(status)) { /* Record the last value */ if (i != val) { val_ec = i - step + (val_ec & (~IBM_EC_MASK_VOL)); ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, val_ec, 1); } return (status); } } } val_ec = arg + (val_ec & (~IBM_EC_MASK_VOL)); return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, val_ec, 1); } static int acpi_ibm_mute_set(struct acpi_ibm_softc *sc, int arg) { UINT64 val_ec; ACPI_OBJECT Arg; ACPI_OBJECT_LIST Args; ACPI_STATUS status; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); ACPI_SERIAL_ASSERT(ibm); if (arg < 0 || arg > 1) return (EINVAL); status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1); if (ACPI_FAILURE(status)) return (status); if (sc->cmos_handle) { Args.Count = 1; Args.Pointer = &Arg; Arg.Type = ACPI_TYPE_INTEGER; Arg.Integer.Value = IBM_CMOS_VOLUME_MUTE; status = AcpiEvaluateObject(sc->cmos_handle, NULL, &Args, NULL); if (ACPI_FAILURE(status)) return (status); } val_ec = (arg == 1) ? val_ec | IBM_EC_MASK_MUTE : val_ec & (~IBM_EC_MASK_MUTE); return ACPI_EC_WRITE(sc->ec_dev, IBM_EC_VOLUME, val_ec, 1); } static void acpi_ibm_eventhandler(struct acpi_ibm_softc *sc, int arg) { int val; UINT64 val_ec; ACPI_STATUS status; ACPI_SERIAL_BEGIN(ibm); switch (arg) { case IBM_EVENT_SUSPEND_TO_RAM: power_pm_suspend(POWER_SLEEP_STATE_SUSPEND); break; case IBM_EVENT_BLUETOOTH: acpi_ibm_bluetooth_set(sc, (sc->wlan_bt_flags == 0)); break; case IBM_EVENT_BRIGHTNESS_UP: case IBM_EVENT_BRIGHTNESS_DOWN: /* Read the current brightness */ status = ACPI_EC_READ(sc->ec_dev, IBM_EC_BRIGHTNESS, &val_ec, 1); if (ACPI_FAILURE(status)) return; val = val_ec & IBM_EC_MASK_BRI; val = (arg == IBM_EVENT_BRIGHTNESS_UP) ? val + 1 : val - 1; acpi_ibm_brightness_set(sc, val); break; case IBM_EVENT_THINKLIGHT: acpi_ibm_thinklight_set(sc, (sc->light_val == 0)); break; case IBM_EVENT_VOLUME_UP: case IBM_EVENT_VOLUME_DOWN: /* Read the current volume */ status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1); if (ACPI_FAILURE(status)) return; val = val_ec & IBM_EC_MASK_VOL; val = (arg == IBM_EVENT_VOLUME_UP) ? val + 1 : val - 1; acpi_ibm_volume_set(sc, val); break; case IBM_EVENT_MUTE: /* Read the current value */ status = ACPI_EC_READ(sc->ec_dev, IBM_EC_VOLUME, &val_ec, 1); if (ACPI_FAILURE(status)) return; val = ((val_ec & IBM_EC_MASK_MUTE) == IBM_EC_MASK_MUTE); acpi_ibm_mute_set(sc, (val == 0)); break; default: break; } ACPI_SERIAL_END(ibm); } static void acpi_ibm_notify(ACPI_HANDLE h, UINT32 notify, void *context) { int event, arg, type; device_t dev = context; struct acpi_ibm_softc *sc = device_get_softc(dev); ACPI_FUNCTION_TRACE_U32((char *)(uintptr_t)__func__, notify); if (notify != 0x80) device_printf(dev, "Unknown notify\n"); for (;;) { acpi_GetInteger(acpi_get_handle(dev), IBM_NAME_EVENTS_GET, &event); if (event == 0) break; type = (event >> 12) & 0xf; arg = event & 0xfff; switch (type) { case 1: if (!(sc->events_availmask & (1 << (arg - 1)))) { device_printf(dev, "Unknown key %d\n", arg); break; } /* Execute event handler */ if (sc->handler_events & (1 << (arg - 1))) acpi_ibm_eventhandler(sc, (arg & 0xff)); /* Notify devd(8) */ acpi_UserNotify("IBM", h, (arg & 0xff)); break; default: break; } } } Index: stable/9/sys/dev/acpi_support/acpi_sony.c =================================================================== --- stable/9/sys/dev/acpi_support/acpi_sony.c (revision 273911) +++ stable/9/sys/dev/acpi_support/acpi_sony.c (revision 273912) @@ -1,182 +1,191 @@ /*- * Copyright (c) 2004 Takanori Watanabe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include "acpi_if.h" #include #include #include #define _COMPONENT ACPI_OEM ACPI_MODULE_NAME("Sony") #define ACPI_SONY_GET_PID "GPID" /* * SNY5001 * This is the ACPI handle for the "Sony Notebook Control" driver under * Windows. * It provides several methods within the ACPI namespace, including: * [GS]BRT [GS]PBR [GS]CTR [GS]PCR [GS]CMI [CDPW GCDP]? GWDP PWAK PWRN * * SNY6001 * This is the ACPI handle for the "Sony Programmable I/O" driver under * Windows. * It is not yet supported by this driver, but provides control over the * power to the bluetooth, built-in camera and HSDPA modem devices in some * laptops, and also allows some control of the fan speed. */ struct acpi_sony_softc { int pid; }; static struct acpi_sony_name_list { char *nodename; char *getmethod; char *setmethod; char *comment; } acpi_sony_oids[] = { { "brightness", "GBRT", "SBRT", "Display Brightness"}, { "brightness_default", "GPBR", "SPBR", "Default Display Brightness"}, { "contrast", "GCTR", "SCTR", "Display Contrast"}, { "bass_gain", "GMGB", "SMGB", "Multimedia Bass Gain"}, { "pcr", "GPCR", "SPCR", "???"}, #if 0 { "cmi", "GCMI", "SCMI", "???"}, #endif { "wdp", "GWDP", NULL, "???"}, { "cdp", "GCDP", "CDPW", "CD Power"}, /*shares [\GL03]&0x8 flag*/ { "azp", "GAZP", "AZPW", "Audio Power"}, { "lnp", "GLNP", "LNPW", "LAN Power"}, { NULL, NULL, NULL } }; static int acpi_sony_probe(device_t dev); static int acpi_sony_attach(device_t dev); static int acpi_sony_detach(device_t dev); static int sysctl_acpi_sony_gen_handler(SYSCTL_HANDLER_ARGS); static device_method_t acpi_sony_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_sony_probe), DEVMETHOD(device_attach, acpi_sony_attach), DEVMETHOD(device_detach, acpi_sony_detach), {0, 0} }; static driver_t acpi_sony_driver = { "acpi_sony", acpi_sony_methods, sizeof(struct acpi_sony_softc), }; static devclass_t acpi_sony_devclass; DRIVER_MODULE(acpi_sony, acpi, acpi_sony_driver, acpi_sony_devclass, 0, 0); MODULE_DEPEND(acpi_sony, acpi, 1, 1, 1); static char *sny_id[] = {"SNY5001", NULL}; static int acpi_sony_probe(device_t dev) { int ret = ENXIO; if (ACPI_ID_PROBE(device_get_parent(dev), dev, sny_id)) { device_set_desc(dev, "Sony notebook controller"); ret = 0; } return (ret); } static int acpi_sony_attach(device_t dev) { struct acpi_sony_softc *sc; int i; sc = device_get_softc(dev); acpi_GetInteger(acpi_get_handle(dev), ACPI_SONY_GET_PID, &sc->pid); device_printf(dev, "PID %x\n", sc->pid); - for (i = 0 ; acpi_sony_oids[i].nodename != NULL; i++){ - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - i, acpi_sony_oids[i].nodename , CTLTYPE_INT | - ((acpi_sony_oids[i].setmethod)? CTLFLAG_RW: CTLFLAG_RD), - dev, i, sysctl_acpi_sony_gen_handler, "I", - acpi_sony_oids[i].comment); + for (i = 0 ; acpi_sony_oids[i].nodename != NULL; i++) { + if (acpi_sony_oids[i].setmethod != NULL) { + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + i, acpi_sony_oids[i].nodename , + CTLTYPE_INT | CTLFLAG_RW, + dev, i, sysctl_acpi_sony_gen_handler, "I", + acpi_sony_oids[i].comment); + } else { + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + i, acpi_sony_oids[i].nodename , + CTLTYPE_INT | CTLFLAG_RD, + dev, i, sysctl_acpi_sony_gen_handler, "I", + acpi_sony_oids[i].comment); + } } return (0); } static int acpi_sony_detach(device_t dev) { return (0); } #if 0 static int acpi_sony_suspend(device_t dev) { struct acpi_sony_softc *sc = device_get_softc(dev); return (0); } static int acpi_sony_resume(device_t dev) { return (0); } #endif static int sysctl_acpi_sony_gen_handler(SYSCTL_HANDLER_ARGS) { device_t dev = arg1; int function = oidp->oid_arg2; int error = 0, val; acpi_GetInteger(acpi_get_handle(dev), acpi_sony_oids[function].getmethod, &val); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr || !acpi_sony_oids[function].setmethod) return (error); acpi_SetInteger(acpi_get_handle(dev), acpi_sony_oids[function].setmethod, val); return (0); } Index: stable/9/sys/dev/bxe/bxe.c =================================================================== --- stable/9/sys/dev/bxe/bxe.c (revision 273911) +++ stable/9/sys/dev/bxe/bxe.c (revision 273912) @@ -1,18824 +1,18824 @@ /*- * Copyright (c) 2007-2014 QLogic Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define BXE_DRIVER_VERSION "1.78.78" #include "bxe.h" #include "ecore_sp.h" #include "ecore_init.h" #include "ecore_init_ops.h" #include "57710_int_offsets.h" #include "57711_int_offsets.h" #include "57712_int_offsets.h" /* * CTLTYPE_U64 and sysctl_handle_64 were added in r217616. Define these * explicitly here for older kernels that don't include this changeset. */ #ifndef CTLTYPE_U64 #define CTLTYPE_U64 CTLTYPE_QUAD #define sysctl_handle_64 sysctl_handle_quad #endif /* * CSUM_TCP_IPV6 and CSUM_UDP_IPV6 were added in r236170. Define these * here as zero(0) for older kernels that don't include this changeset * thereby masking the functionality. */ #ifndef CSUM_TCP_IPV6 #define CSUM_TCP_IPV6 0 #define CSUM_UDP_IPV6 0 #endif /* * pci_find_cap was added in r219865. Re-define this at pci_find_extcap * for older kernels that don't include this changeset. */ #if __FreeBSD_version < 900035 #define pci_find_cap pci_find_extcap #endif #define BXE_DEF_SB_ATT_IDX 0x0001 #define BXE_DEF_SB_IDX 0x0002 /* * FLR Support - bxe_pf_flr_clnup() is called during nic_load in the per * function HW initialization. */ #define FLR_WAIT_USEC 10000 /* 10 msecs */ #define FLR_WAIT_INTERVAL 50 /* usecs */ #define FLR_POLL_CNT (FLR_WAIT_USEC / FLR_WAIT_INTERVAL) /* 200 */ struct pbf_pN_buf_regs { int pN; uint32_t init_crd; uint32_t crd; uint32_t crd_freed; }; struct pbf_pN_cmd_regs { int pN; uint32_t lines_occup; uint32_t lines_freed; }; /* * PCI Device ID Table used by bxe_probe(). */ #define BXE_DEVDESC_MAX 64 static struct bxe_device_type bxe_devs[] = { { BRCM_VENDORID, CHIP_NUM_57710, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57710 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57711 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711E, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57711E 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57712_VF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57800, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57800_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57800_VF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57810, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57810_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57810_VF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57811, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57811_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57811_VF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 VF 10GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57840_4_10, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 4x10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57840_2_20, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 2x20GbE" }, #endif { BRCM_VENDORID, CHIP_NUM_57840_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 MF 10GbE" }, #if 0 { BRCM_VENDORID, CHIP_NUM_57840_VF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 VF 10GbE" }, #endif { 0, 0, 0, 0, NULL } }; MALLOC_DECLARE(M_BXE_ILT); MALLOC_DEFINE(M_BXE_ILT, "bxe_ilt", "bxe ILT pointer"); /* * FreeBSD device entry points. */ static int bxe_probe(device_t); static int bxe_attach(device_t); static int bxe_detach(device_t); static int bxe_shutdown(device_t); /* * FreeBSD KLD module/device interface event handler method. */ static device_method_t bxe_methods[] = { /* Device interface (device_if.h) */ DEVMETHOD(device_probe, bxe_probe), DEVMETHOD(device_attach, bxe_attach), DEVMETHOD(device_detach, bxe_detach), DEVMETHOD(device_shutdown, bxe_shutdown), #if 0 DEVMETHOD(device_suspend, bxe_suspend), DEVMETHOD(device_resume, bxe_resume), #endif /* Bus interface (bus_if.h) */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), KOBJMETHOD_END }; /* * FreeBSD KLD Module data declaration */ static driver_t bxe_driver = { "bxe", /* module name */ bxe_methods, /* event handler */ sizeof(struct bxe_softc) /* extra data */ }; /* * FreeBSD dev class is needed to manage dev instances and * to associate with a bus type */ static devclass_t bxe_devclass; MODULE_DEPEND(bxe, pci, 1, 1, 1); MODULE_DEPEND(bxe, ether, 1, 1, 1); DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); /* resources needed for unloading a previously loaded device */ #define BXE_PREV_WAIT_NEEDED 1 struct mtx bxe_prev_mtx; MTX_SYSINIT(bxe_prev_mtx, &bxe_prev_mtx, "bxe_prev_lock", MTX_DEF); struct bxe_prev_list_node { LIST_ENTRY(bxe_prev_list_node) node; uint8_t bus; uint8_t slot; uint8_t path; uint8_t aer; /* XXX automatic error recovery */ uint8_t undi; }; static LIST_HEAD(, bxe_prev_list_node) bxe_prev_list = LIST_HEAD_INITIALIZER(bxe_prev_list); static int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ /* Tunable device values... */ SYSCTL_NODE(_hw, OID_AUTO, bxe, CTLFLAG_RD, 0, "bxe driver parameters"); /* Debug */ unsigned long bxe_debug = 0; TUNABLE_ULONG("hw.bxe.debug", &bxe_debug); SYSCTL_ULONG(_hw_bxe, OID_AUTO, debug, (CTLFLAG_RDTUN), &bxe_debug, 0, "Debug logging mode"); /* Interrupt Mode: 0 (IRQ), 1 (MSI/IRQ), and 2 (MSI-X/MSI/IRQ) */ static int bxe_interrupt_mode = INTR_MODE_MSIX; TUNABLE_INT("hw.bxe.interrupt_mode", &bxe_interrupt_mode); SYSCTL_INT(_hw_bxe, OID_AUTO, interrupt_mode, CTLFLAG_RDTUN, &bxe_interrupt_mode, 0, "Interrupt (MSI-X/MSI/INTx) mode"); /* Number of Queues: 0 (Auto) or 1 to 16 (fixed queue number) */ static int bxe_queue_count = 4; TUNABLE_INT("hw.bxe.queue_count", &bxe_queue_count); SYSCTL_INT(_hw_bxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &bxe_queue_count, 0, "Multi-Queue queue count"); /* max number of buffers per queue (default RX_BD_USABLE) */ static int bxe_max_rx_bufs = 0; TUNABLE_INT("hw.bxe.max_rx_bufs", &bxe_max_rx_bufs); SYSCTL_INT(_hw_bxe, OID_AUTO, max_rx_bufs, CTLFLAG_RDTUN, &bxe_max_rx_bufs, 0, "Maximum Number of Rx Buffers Per Queue"); /* Host interrupt coalescing RX tick timer (usecs) */ static int bxe_hc_rx_ticks = 25; TUNABLE_INT("hw.bxe.hc_rx_ticks", &bxe_hc_rx_ticks); SYSCTL_INT(_hw_bxe, OID_AUTO, hc_rx_ticks, CTLFLAG_RDTUN, &bxe_hc_rx_ticks, 0, "Host Coalescing Rx ticks"); /* Host interrupt coalescing TX tick timer (usecs) */ static int bxe_hc_tx_ticks = 50; TUNABLE_INT("hw.bxe.hc_tx_ticks", &bxe_hc_tx_ticks); SYSCTL_INT(_hw_bxe, OID_AUTO, hc_tx_ticks, CTLFLAG_RDTUN, &bxe_hc_tx_ticks, 0, "Host Coalescing Tx ticks"); /* Maximum number of Rx packets to process at a time */ static int bxe_rx_budget = 0xffffffff; TUNABLE_INT("hw.bxe.rx_budget", &bxe_rx_budget); SYSCTL_INT(_hw_bxe, OID_AUTO, rx_budget, CTLFLAG_TUN, &bxe_rx_budget, 0, "Rx processing budget"); /* Maximum LRO aggregation size */ static int bxe_max_aggregation_size = 0; TUNABLE_INT("hw.bxe.max_aggregation_size", &bxe_max_aggregation_size); SYSCTL_INT(_hw_bxe, OID_AUTO, max_aggregation_size, CTLFLAG_TUN, &bxe_max_aggregation_size, 0, "max aggregation size"); /* PCI MRRS: -1 (Auto), 0 (128B), 1 (256B), 2 (512B), 3 (1KB) */ static int bxe_mrrs = -1; TUNABLE_INT("hw.bxe.mrrs", &bxe_mrrs); SYSCTL_INT(_hw_bxe, OID_AUTO, mrrs, CTLFLAG_RDTUN, &bxe_mrrs, 0, "PCIe maximum read request size"); /* AutoGrEEEn: 0 (hardware default), 1 (force on), 2 (force off) */ static int bxe_autogreeen = 0; TUNABLE_INT("hw.bxe.autogreeen", &bxe_autogreeen); SYSCTL_INT(_hw_bxe, OID_AUTO, autogreeen, CTLFLAG_RDTUN, &bxe_autogreeen, 0, "AutoGrEEEn support"); /* 4-tuple RSS support for UDP: 0 (disabled), 1 (enabled) */ static int bxe_udp_rss = 0; TUNABLE_INT("hw.bxe.udp_rss", &bxe_udp_rss); SYSCTL_INT(_hw_bxe, OID_AUTO, udp_rss, CTLFLAG_RDTUN, &bxe_udp_rss, 0, "UDP RSS support"); #define STAT_NAME_LEN 32 /* no stat names below can be longer than this */ #define STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_stats, stat_name) / 4) #define Q_STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_q_stats, stat_name) / 4) static const struct { uint32_t offset; uint32_t size; uint32_t flags; #define STATS_FLAGS_PORT 1 #define STATS_FLAGS_FUNC 2 /* MF only cares about function stats */ #define STATS_FLAGS_BOTH (STATS_FLAGS_FUNC | STATS_FLAGS_PORT) char string[STAT_NAME_LEN]; } bxe_eth_stats_arr[] = { { STATS_OFFSET32(total_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_bytes" }, { STATS_OFFSET32(error_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_bcast_packets" }, { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi), 8, STATS_FLAGS_PORT, "rx_crc_errors" }, { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi), 8, STATS_FLAGS_PORT, "rx_align_errors" }, { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi), 8, STATS_FLAGS_PORT, "rx_undersize_packets" }, { STATS_OFFSET32(etherstatsoverrsizepkts_hi), 8, STATS_FLAGS_PORT, "rx_oversize_packets" }, { STATS_OFFSET32(rx_stat_etherstatsfragments_hi), 8, STATS_FLAGS_PORT, "rx_fragments" }, { STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), 8, STATS_FLAGS_PORT, "rx_jabbers" }, { STATS_OFFSET32(no_buff_discard_hi), 8, STATS_FLAGS_BOTH, "rx_discards" }, { STATS_OFFSET32(mac_filter_discard), 4, STATS_FLAGS_PORT, "rx_filtered_packets" }, { STATS_OFFSET32(mf_tag_discard), 4, STATS_FLAGS_PORT, "rx_mf_tag_discard" }, { STATS_OFFSET32(pfc_frames_received_hi), 8, STATS_FLAGS_PORT, "pfc_frames_received" }, { STATS_OFFSET32(pfc_frames_sent_hi), 8, STATS_FLAGS_PORT, "pfc_frames_sent" }, { STATS_OFFSET32(brb_drop_hi), 8, STATS_FLAGS_PORT, "rx_brb_discard" }, { STATS_OFFSET32(brb_truncate_hi), 8, STATS_FLAGS_PORT, "rx_brb_truncate" }, { STATS_OFFSET32(pause_frames_received_hi), 8, STATS_FLAGS_PORT, "rx_pause_frames" }, { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi), 8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" }, { STATS_OFFSET32(nig_timer_max), 4, STATS_FLAGS_PORT, "rx_constant_pause_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), 8, STATS_FLAGS_PORT, "tx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bcast_packets" }, { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi), 8, STATS_FLAGS_PORT, "tx_mac_errors" }, { STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi), 8, STATS_FLAGS_PORT, "tx_carrier_errors" }, { STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_single_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_multi_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi), 8, STATS_FLAGS_PORT, "tx_deferred" }, { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi), 8, STATS_FLAGS_PORT, "tx_excess_collisions" }, { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi), 8, STATS_FLAGS_PORT, "tx_late_collisions" }, { STATS_OFFSET32(tx_stat_etherstatscollisions_hi), 8, STATS_FLAGS_PORT, "tx_total_collisions" }, { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi), 8, STATS_FLAGS_PORT, "tx_64_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi), 8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi), 8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi), 8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi), 8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" }, { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" }, { STATS_OFFSET32(etherstatspktsover1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" }, { STATS_OFFSET32(pause_frames_sent_hi), 8, STATS_FLAGS_PORT, "tx_pause_frames" }, { STATS_OFFSET32(total_tpa_aggregations_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregations" }, { STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"}, { STATS_OFFSET32(total_tpa_bytes_hi), 8, STATS_FLAGS_FUNC, "tpa_bytes"}, #if 0 { STATS_OFFSET32(recoverable_error), 4, STATS_FLAGS_FUNC, "recoverable_errors" }, { STATS_OFFSET32(unrecoverable_error), 4, STATS_FLAGS_FUNC, "unrecoverable_errors" }, #endif { STATS_OFFSET32(eee_tx_lpi), 4, STATS_FLAGS_PORT, "eee_tx_lpi"}, { STATS_OFFSET32(rx_calls), 4, STATS_FLAGS_FUNC, "rx_calls"}, { STATS_OFFSET32(rx_pkts), 4, STATS_FLAGS_FUNC, "rx_pkts"}, { STATS_OFFSET32(rx_tpa_pkts), 4, STATS_FLAGS_FUNC, "rx_tpa_pkts"}, { STATS_OFFSET32(rx_soft_errors), 4, STATS_FLAGS_FUNC, "rx_soft_errors"}, { STATS_OFFSET32(rx_hw_csum_errors), 4, STATS_FLAGS_FUNC, "rx_hw_csum_errors"}, { STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_ip"}, { STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_tcp_udp"}, { STATS_OFFSET32(rx_budget_reached), 4, STATS_FLAGS_FUNC, "rx_budget_reached"}, { STATS_OFFSET32(tx_pkts), 4, STATS_FLAGS_FUNC, "tx_pkts"}, { STATS_OFFSET32(tx_soft_errors), 4, STATS_FLAGS_FUNC, "tx_soft_errors"}, { STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_ip"}, { STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_tcp"}, { STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_udp"}, { STATS_OFFSET32(tx_ofld_frames_lso), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso"}, { STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso_hdr_splits"}, { STATS_OFFSET32(tx_encap_failures), 4, STATS_FLAGS_FUNC, "tx_encap_failures"}, { STATS_OFFSET32(tx_hw_queue_full), 4, STATS_FLAGS_FUNC, "tx_hw_queue_full"}, { STATS_OFFSET32(tx_hw_max_queue_depth), 4, STATS_FLAGS_FUNC, "tx_hw_max_queue_depth"}, { STATS_OFFSET32(tx_dma_mapping_failure), 4, STATS_FLAGS_FUNC, "tx_dma_mapping_failure"}, { STATS_OFFSET32(tx_max_drbr_queue_depth), 4, STATS_FLAGS_FUNC, "tx_max_drbr_queue_depth"}, { STATS_OFFSET32(tx_window_violation_std), 4, STATS_FLAGS_FUNC, "tx_window_violation_std"}, { STATS_OFFSET32(tx_window_violation_tso), 4, STATS_FLAGS_FUNC, "tx_window_violation_tso"}, #if 0 { STATS_OFFSET32(tx_unsupported_tso_request_ipv6), 4, STATS_FLAGS_FUNC, "tx_unsupported_tso_request_ipv6"}, { STATS_OFFSET32(tx_unsupported_tso_request_not_tcp), 4, STATS_FLAGS_FUNC, "tx_unsupported_tso_request_not_tcp"}, #endif { STATS_OFFSET32(tx_chain_lost_mbuf), 4, STATS_FLAGS_FUNC, "tx_chain_lost_mbuf"}, { STATS_OFFSET32(tx_frames_deferred), 4, STATS_FLAGS_FUNC, "tx_frames_deferred"}, { STATS_OFFSET32(tx_queue_xoff), 4, STATS_FLAGS_FUNC, "tx_queue_xoff"}, { STATS_OFFSET32(mbuf_defrag_attempts), 4, STATS_FLAGS_FUNC, "mbuf_defrag_attempts"}, { STATS_OFFSET32(mbuf_defrag_failures), 4, STATS_FLAGS_FUNC, "mbuf_defrag_failures"}, { STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_mapping_failed"}, { STATS_OFFSET32(mbuf_alloc_tx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tx"}, { STATS_OFFSET32(mbuf_alloc_rx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_rx"}, { STATS_OFFSET32(mbuf_alloc_sge), 4, STATS_FLAGS_FUNC, "mbuf_alloc_sge"}, { STATS_OFFSET32(mbuf_alloc_tpa), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tpa"} }; static const struct { uint32_t offset; uint32_t size; char string[STAT_NAME_LEN]; } bxe_eth_q_stats_arr[] = { { Q_STATS_OFFSET32(total_bytes_received_hi), 8, "rx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_received_hi), 8, "rx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_received_hi), 8, "rx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_received_hi), 8, "rx_bcast_packets" }, { Q_STATS_OFFSET32(no_buff_discard_hi), 8, "rx_discards" }, { Q_STATS_OFFSET32(total_bytes_transmitted_hi), 8, "tx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, "tx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, "tx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, "tx_bcast_packets" }, { Q_STATS_OFFSET32(total_tpa_aggregations_hi), 8, "tpa_aggregations" }, { Q_STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, "tpa_aggregated_frames"}, { Q_STATS_OFFSET32(total_tpa_bytes_hi), 8, "tpa_bytes"}, { Q_STATS_OFFSET32(rx_calls), 4, "rx_calls"}, { Q_STATS_OFFSET32(rx_pkts), 4, "rx_pkts"}, { Q_STATS_OFFSET32(rx_tpa_pkts), 4, "rx_tpa_pkts"}, { Q_STATS_OFFSET32(rx_soft_errors), 4, "rx_soft_errors"}, { Q_STATS_OFFSET32(rx_hw_csum_errors), 4, "rx_hw_csum_errors"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, "rx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, "rx_ofld_frames_csum_tcp_udp"}, { Q_STATS_OFFSET32(rx_budget_reached), 4, "rx_budget_reached"}, { Q_STATS_OFFSET32(tx_pkts), 4, "tx_pkts"}, { Q_STATS_OFFSET32(tx_soft_errors), 4, "tx_soft_errors"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, "tx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, "tx_ofld_frames_csum_tcp"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, "tx_ofld_frames_csum_udp"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso), 4, "tx_ofld_frames_lso"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, "tx_ofld_frames_lso_hdr_splits"}, { Q_STATS_OFFSET32(tx_encap_failures), 4, "tx_encap_failures"}, { Q_STATS_OFFSET32(tx_hw_queue_full), 4, "tx_hw_queue_full"}, { Q_STATS_OFFSET32(tx_hw_max_queue_depth), 4, "tx_hw_max_queue_depth"}, { Q_STATS_OFFSET32(tx_dma_mapping_failure), 4, "tx_dma_mapping_failure"}, { Q_STATS_OFFSET32(tx_max_drbr_queue_depth), 4, "tx_max_drbr_queue_depth"}, { Q_STATS_OFFSET32(tx_window_violation_std), 4, "tx_window_violation_std"}, { Q_STATS_OFFSET32(tx_window_violation_tso), 4, "tx_window_violation_tso"}, #if 0 { Q_STATS_OFFSET32(tx_unsupported_tso_request_ipv6), 4, "tx_unsupported_tso_request_ipv6"}, { Q_STATS_OFFSET32(tx_unsupported_tso_request_not_tcp), 4, "tx_unsupported_tso_request_not_tcp"}, #endif { Q_STATS_OFFSET32(tx_chain_lost_mbuf), 4, "tx_chain_lost_mbuf"}, { Q_STATS_OFFSET32(tx_frames_deferred), 4, "tx_frames_deferred"}, { Q_STATS_OFFSET32(tx_queue_xoff), 4, "tx_queue_xoff"}, { Q_STATS_OFFSET32(mbuf_defrag_attempts), 4, "mbuf_defrag_attempts"}, { Q_STATS_OFFSET32(mbuf_defrag_failures), 4, "mbuf_defrag_failures"}, { Q_STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, "mbuf_rx_bd_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, "mbuf_rx_bd_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, "mbuf_rx_tpa_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, "mbuf_rx_tpa_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, "mbuf_rx_sge_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, "mbuf_rx_sge_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_alloc_tx), 4, "mbuf_alloc_tx"}, { Q_STATS_OFFSET32(mbuf_alloc_rx), 4, "mbuf_alloc_rx"}, { Q_STATS_OFFSET32(mbuf_alloc_sge), 4, "mbuf_alloc_sge"}, { Q_STATS_OFFSET32(mbuf_alloc_tpa), 4, "mbuf_alloc_tpa"} }; #define BXE_NUM_ETH_STATS ARRAY_SIZE(bxe_eth_stats_arr) #define BXE_NUM_ETH_Q_STATS ARRAY_SIZE(bxe_eth_q_stats_arr) static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type); static int bxe_get_cmng_fns_mode(struct bxe_softc *sc); static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port); static void bxe_set_reset_global(struct bxe_softc *sc); static void bxe_set_reset_in_progress(struct bxe_softc *sc); static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine); static uint8_t bxe_clear_pf_load(struct bxe_softc *sc); static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print); static void bxe_int_disable(struct bxe_softc *sc); static int bxe_release_leader_lock(struct bxe_softc *sc); static void bxe_pf_disable(struct bxe_softc *sc); static void bxe_free_fp_buffers(struct bxe_softc *sc); static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod); static void bxe_link_report_locked(struct bxe_softc *sc); static void bxe_link_report(struct bxe_softc *sc); static void bxe_link_status_update(struct bxe_softc *sc); static void bxe_periodic_callout_func(void *xsc); static void bxe_periodic_start(struct bxe_softc *sc); static void bxe_periodic_stop(struct bxe_softc *sc); static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index); static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue); static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index); static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp); static void bxe_task_fp(struct bxe_fastpath *fp); static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents); static int bxe_alloc_mem(struct bxe_softc *sc); static void bxe_free_mem(struct bxe_softc *sc); static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc); static void bxe_free_fw_stats_mem(struct bxe_softc *sc); static int bxe_interrupt_attach(struct bxe_softc *sc); static void bxe_interrupt_detach(struct bxe_softc *sc); static void bxe_set_rx_mode(struct bxe_softc *sc); static int bxe_init_locked(struct bxe_softc *sc); static int bxe_stop_locked(struct bxe_softc *sc); static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode); static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link); static void bxe_handle_sp_tq(void *context, int pending); static void bxe_handle_rx_mode_tq(void *context, int pending); static void bxe_handle_fp_tq(void *context, int pending); /* calculate crc32 on a buffer (NOTE: crc32_length MUST be aligned to 8) */ uint32_t calc_crc32(uint8_t *crc32_packet, uint32_t crc32_length, uint32_t crc32_seed, uint8_t complement) { uint32_t byte = 0; uint32_t bit = 0; uint8_t msb = 0; uint32_t temp = 0; uint32_t shft = 0; uint8_t current_byte = 0; uint32_t crc32_result = crc32_seed; const uint32_t CRC32_POLY = 0x1edc6f41; if ((crc32_packet == NULL) || (crc32_length == 0) || ((crc32_length % 8) != 0)) { return (crc32_result); } for (byte = 0; byte < crc32_length; byte = byte + 1) { current_byte = crc32_packet[byte]; for (bit = 0; bit < 8; bit = bit + 1) { /* msb = crc32_result[31]; */ msb = (uint8_t)(crc32_result >> 31); crc32_result = crc32_result << 1; /* it (msb != current_byte[bit]) */ if (msb != (0x1 & (current_byte >> bit))) { crc32_result = crc32_result ^ CRC32_POLY; /* crc32_result[0] = 1 */ crc32_result |= 1; } } } /* Last step is to: * 1. "mirror" every bit * 2. swap the 4 bytes * 3. complement each bit */ /* Mirror */ temp = crc32_result; shft = sizeof(crc32_result) * 8 - 1; for (crc32_result >>= 1; crc32_result; crc32_result >>= 1) { temp <<= 1; temp |= crc32_result & 1; shft-- ; } /* temp[31-bit] = crc32_result[bit] */ temp <<= shft; /* Swap */ /* crc32_result = {temp[7:0], temp[15:8], temp[23:16], temp[31:24]} */ { uint32_t t0, t1, t2, t3; t0 = (0x000000ff & (temp >> 24)); t1 = (0x0000ff00 & (temp >> 8)); t2 = (0x00ff0000 & (temp << 8)); t3 = (0xff000000 & (temp << 24)); crc32_result = t0 | t1 | t2 | t3; } /* Complement */ if (complement) { crc32_result = ~crc32_result; } return (crc32_result); } int bxe_test_bit(int nr, volatile unsigned long *addr) { return ((atomic_load_acq_long(addr) & (1 << nr)) != 0); } void bxe_set_bit(unsigned int nr, volatile unsigned long *addr) { atomic_set_acq_long(addr, (1 << nr)); } void bxe_clear_bit(int nr, volatile unsigned long *addr) { atomic_clear_acq_long(addr, (1 << nr)); } int bxe_test_and_set_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x | nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_test_and_clear_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x & ~nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_cmpxchg(volatile int *addr, int old, int new) { int x; do { x = *addr; } while (atomic_cmpset_acq_int(addr, old, new) == 0); return (x); } /* * Get DMA memory from the OS. * * Validates that the OS has provided DMA buffers in response to a * bus_dmamap_load call and saves the physical address of those buffers. * When the callback is used the OS will return 0 for the mapping function * (bus_dmamap_load) so we use the value of map_arg->maxsegs to pass any * failures back to the caller. * * Returns: * Nothing. */ static void bxe_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bxe_dma *dma = arg; if (error) { dma->paddr = 0; dma->nseg = 0; BLOGE(dma->sc, "Failed DMA alloc '%s' (%d)!\n", dma->msg, error); } else { dma->paddr = segs->ds_addr; dma->nseg = nseg; #if 0 BLOGD(dma->sc, DBG_LOAD, "DMA alloc '%s': vaddr=%p paddr=%p nseg=%d size=%lu\n", dma->msg, dma->vaddr, (void *)dma->paddr, dma->nseg, dma->size); #endif } } /* * Allocate a block of memory and map it for DMA. No partial completions * allowed and release any resources acquired if we can't acquire all * resources. * * Returns: * 0 = Success, !0 = Failure */ int bxe_dma_alloc(struct bxe_softc *sc, bus_size_t size, struct bxe_dma *dma, const char *msg) { int rc; if (dma->size > 0) { BLOGE(sc, "dma block '%s' already has size %lu\n", msg, (unsigned long)dma->size); return (1); } memset(dma, 0, sizeof(*dma)); /* sanity */ dma->sc = sc; dma->size = size; snprintf(dma->msg, sizeof(dma->msg), "%s", msg); rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ BCM_PAGE_SIZE, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ size, /* max map size */ 1, /* num discontinuous */ size, /* max seg size */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &dma->tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to create dma tag for '%s' (%d)\n", msg, rc); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, (BUS_DMA_NOWAIT | BUS_DMA_ZERO), &dma->map); if (rc != 0) { BLOGE(sc, "Failed to alloc dma mem for '%s' (%d)\n", msg, rc); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, bxe_dma_map_addr, /* BLOGD in here */ dma, BUS_DMA_NOWAIT); if (rc != 0) { BLOGE(sc, "Failed to load dma map for '%s' (%d)\n", msg, rc); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } return (0); } void bxe_dma_free(struct bxe_softc *sc, struct bxe_dma *dma) { if (dma->size > 0) { #if 0 BLOGD(sc, DBG_LOAD, "DMA free '%s': vaddr=%p paddr=%p nseg=%d size=%lu\n", dma->msg, dma->vaddr, (void *)dma->paddr, dma->nseg, dma->size); #endif DBASSERT(sc, (dma->tag != NULL), ("dma tag is NULL")); bus_dmamap_sync(dma->tag, dma->map, (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); } memset(dma, 0, sizeof(*dma)); } /* * These indirect read and write routines are only during init. * The locking is handled by the MCP. */ void bxe_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); pci_write_config(sc->dev, PCICFG_GRC_DATA, val, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); } uint32_t bxe_reg_rd_ind(struct bxe_softc *sc, uint32_t addr) { uint32_t val; pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); val = pci_read_config(sc->dev, PCICFG_GRC_DATA, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); return (val); } #if 0 void bxe_dp_dmae(struct bxe_softc *sc, struct dmae_command *dmae, int msglvl) { uint32_t src_type = dmae->opcode & DMAE_COMMAND_SRC; switch (dmae->opcode & DMAE_COMMAND_DST) { case DMAE_CMD_DST_PCI: if (src_type == DMAE_CMD_SRC_PCI) DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%x:%08x], len [%d*4], dst [%x:%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo, dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); else DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%08x], len [%d*4], dst [%x:%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_lo >> 2, dmae->len, dmae->dst_addr_hi, dmae->dst_addr_lo, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); break; case DMAE_CMD_DST_GRC: if (src_type == DMAE_CMD_SRC_PCI) DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%x:%08x], len [%d*4], dst_addr [%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo, dmae->len, dmae->dst_addr_lo >> 2, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); else DP(msglvl, "DMAE: opcode 0x%08x\n" "src [%08x], len [%d*4], dst [%08x]\n" "comp_addr [%x:%08x], comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_lo >> 2, dmae->len, dmae->dst_addr_lo >> 2, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); break; default: if (src_type == DMAE_CMD_SRC_PCI) DP(msglvl, "DMAE: opcode 0x%08x\n" "src_addr [%x:%08x] len [%d * 4] dst_addr [none]\n" "comp_addr [%x:%08x] comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_hi, dmae->src_addr_lo, dmae->len, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); else DP(msglvl, "DMAE: opcode 0x%08x\n" "src_addr [%08x] len [%d * 4] dst_addr [none]\n" "comp_addr [%x:%08x] comp_val 0x%08x\n", dmae->opcode, dmae->src_addr_lo >> 2, dmae->len, dmae->comp_addr_hi, dmae->comp_addr_lo, dmae->comp_val); break; } } #endif static int bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; int cnt; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE\n", resource); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is not already taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { BLOGE(sc, "resource in use (status 0x%x bit 0x%x)\n", lock_status, resource_bit); return (-1); } /* try every 5ms for 5 seconds */ for (cnt = 0; cnt < 1000; cnt++) { REG_WR(sc, (hw_lock_control_reg + 4), resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (0); } DELAY(5000); } BLOGE(sc, "Resource lock timeout!\n"); return (-1); } static int bxe_release_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE\n", resource); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is currently taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (!(lock_status & resource_bit)) { BLOGE(sc, "resource not in use (status 0x%x bit 0x%x)\n", lock_status, resource_bit); return (-1); } REG_WR(sc, hw_lock_control_reg, resource_bit); return (0); } /* * Per pf misc lock must be acquired before the per port mcp lock. Otherwise, * had we done things the other way around, if two pfs from the same port * would attempt to access nvram at the same time, we could run into a * scenario such as: * pf A takes the port lock. * pf B succeeds in taking the same lock since they are from the same port. * pf A takes the per pf misc lock. Performs eeprom access. * pf A finishes. Unlocks the per pf misc lock. * Pf B takes the lock and proceeds to perform it's own access. * pf A unlocks the per port lock, while pf B is still working (!). * mcp takes the per port lock and corrupts pf B's access (and/or has it's own * access corrupted by pf B).* */ static int bxe_acquire_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* acquire HW lock: protect against other PFs in PF Direct Assignment */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* request access to nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_SET1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { break; } DELAY(5); } if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { BLOGE(sc, "Cannot get access to nvram interface\n"); return (-1); } return (0); } static int bxe_release_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* relinquish nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { break; } DELAY(5); } if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { BLOGE(sc, "Cannot free access to nvram interface\n"); return (-1); } /* release HW lock: protect against other PFs in PF Direct Assignment */ bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); return (0); } static void bxe_enable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* enable both bits, even on read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val | MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN)); } static void bxe_disable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* disable both bits, even after read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val & ~(MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN))); } static int bxe_nvram_read_dword(struct bxe_softc *sc, uint32_t offset, uint32_t *ret_val, uint32_t cmd_flags) { int count, i, rc; uint32_t val; /* build the command word */ cmd_flags |= MCPR_NVM_COMMAND_DOIT; /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* address of the NVRAM to read from */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue a read command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ *ret_val = 0; rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { val = REG_RD(sc, MCP_REG_MCPR_NVM_READ); /* we read nvram data in cpu order * but ethtool sees it as an array of bytes * converting to big-endian will do the work */ *ret_val = htobe32(val); rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram read timeout expired\n"); } return (rc); } static int bxe_nvram_read(struct bxe_softc *sc, uint32_t offset, uint8_t *ret_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; int rc; if ((offset & 0x03) || (buf_size & 0x03) || (buf_size == 0)) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); /* read the first word(s) */ cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((buf_size > sizeof(uint32_t)) && (rc == 0)) { rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); /* advance to the next dword */ offset += sizeof(uint32_t); ret_buf += sizeof(uint32_t); buf_size -= sizeof(uint32_t); cmd_flags = 0; } if (rc == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write_dword(struct bxe_softc *sc, uint32_t offset, uint32_t val, uint32_t cmd_flags) { int count, i, rc; /* build the command word */ cmd_flags |= (MCPR_NVM_COMMAND_DOIT | MCPR_NVM_COMMAND_WR); /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* write the data */ REG_WR(sc, MCP_REG_MCPR_NVM_WRITE, val); /* address of the NVRAM to write to */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue the write command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram write timeout expired\n"); } return (rc); } #define BYTE_OFFSET(offset) (8 * (offset & 0x03)) static int bxe_nvram_write1(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t align_offset; uint32_t val; int rc; if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); cmd_flags = (MCPR_NVM_COMMAND_FIRST | MCPR_NVM_COMMAND_LAST); align_offset = (offset & ~0x03); rc = bxe_nvram_read_dword(sc, align_offset, &val, cmd_flags); if (rc == 0) { val &= ~(0xff << BYTE_OFFSET(offset)); val |= (*data_buf << BYTE_OFFSET(offset)); /* nvram data is returned as an array of bytes * convert it back to cpu order */ val = be32toh(val); rc = bxe_nvram_write_dword(sc, align_offset, val, cmd_flags); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; uint32_t written_so_far; int rc; if (buf_size == 1) { return (bxe_nvram_write1(sc, offset, data_buf, buf_size)); } if ((offset & 0x03) || (buf_size & 0x03) /* || (buf_size == 0) */) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if (buf_size == 0) { return (0); /* nothing to do */ } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); written_so_far = 0; cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((written_so_far < buf_size) && (rc == 0)) { if (written_so_far == (buf_size - sizeof(uint32_t))) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if (((offset + 4) % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if ((offset % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_FIRST; } memcpy(&val, data_buf, 4); rc = bxe_nvram_write_dword(sc, offset, val, cmd_flags); /* advance to the next dword */ offset += sizeof(uint32_t); data_buf += sizeof(uint32_t); written_so_far += sizeof(uint32_t); cmd_flags = 0; } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } /* copy command into DMAE command memory and set DMAE command Go */ void bxe_post_dmae(struct bxe_softc *sc, struct dmae_command *dmae, int idx) { uint32_t cmd_offset; int i; cmd_offset = (DMAE_REG_CMD_MEM + (sizeof(struct dmae_command) * idx)); for (i = 0; i < ((sizeof(struct dmae_command) / 4)); i++) { REG_WR(sc, (cmd_offset + (i * 4)), *(((uint32_t *)dmae) + i)); } REG_WR(sc, dmae_reg_go_c[idx], 1); } uint32_t bxe_dmae_opcode_add_comp(uint32_t opcode, uint8_t comp_type) { return (opcode | ((comp_type << DMAE_COMMAND_C_DST_SHIFT) | DMAE_COMMAND_C_TYPE_ENABLE)); } uint32_t bxe_dmae_opcode_clr_src_reset(uint32_t opcode) { return (opcode & ~DMAE_COMMAND_SRC_RESET); } uint32_t bxe_dmae_opcode(struct bxe_softc *sc, uint8_t src_type, uint8_t dst_type, uint8_t with_comp, uint8_t comp_type) { uint32_t opcode = 0; opcode |= ((src_type << DMAE_COMMAND_SRC_SHIFT) | (dst_type << DMAE_COMMAND_DST_SHIFT)); opcode |= (DMAE_COMMAND_SRC_RESET | DMAE_COMMAND_DST_RESET); opcode |= (SC_PORT(sc) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0); opcode |= ((SC_VN(sc) << DMAE_COMMAND_E1HVN_SHIFT) | (SC_VN(sc) << DMAE_COMMAND_DST_VN_SHIFT)); opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT); #ifdef __BIG_ENDIAN opcode |= DMAE_CMD_ENDIANITY_B_DW_SWAP; #else opcode |= DMAE_CMD_ENDIANITY_DW_SWAP; #endif if (with_comp) { opcode = bxe_dmae_opcode_add_comp(opcode, comp_type); } return (opcode); } static void bxe_prep_dmae_with_comp(struct bxe_softc *sc, struct dmae_command *dmae, uint8_t src_type, uint8_t dst_type) { memset(dmae, 0, sizeof(struct dmae_command)); /* set the opcode */ dmae->opcode = bxe_dmae_opcode(sc, src_type, dst_type, TRUE, DMAE_COMP_PCI); /* fill in the completion parameters */ dmae->comp_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_val = DMAE_COMP_VAL; } /* issue a DMAE command over the init channel and wait for completion */ static int bxe_issue_dmae_with_comp(struct bxe_softc *sc, struct dmae_command *dmae) { uint32_t *wb_comp = BXE_SP(sc, wb_comp); int timeout = CHIP_REV_IS_SLOW(sc) ? 400000 : 4000; BXE_DMAE_LOCK(sc); /* reset completion */ *wb_comp = 0; /* post the command on the channel used for initializations */ bxe_post_dmae(sc, dmae, INIT_DMAE_C(sc)); /* wait for completion */ DELAY(5); while ((*wb_comp & ~DMAE_PCI_ERR_FLAG) != DMAE_COMP_VAL) { if (!timeout || (sc->recovery_state != BXE_RECOVERY_DONE && sc->recovery_state != BXE_RECOVERY_NIC_LOADING)) { BLOGE(sc, "DMAE timeout!\n"); BXE_DMAE_UNLOCK(sc); return (DMAE_TIMEOUT); } timeout--; DELAY(50); } if (*wb_comp & DMAE_PCI_ERR_FLAG) { BLOGE(sc, "DMAE PCI error!\n"); BXE_DMAE_UNLOCK(sc); return (DMAE_PCI_ERROR); } BXE_DMAE_UNLOCK(sc); return (0); } void bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, uint32_t len32) { struct dmae_command dmae; uint32_t *data; int i, rc; DBASSERT(sc, (len32 <= 4), ("DMAE read length is %d", len32)); if (!sc->dmae_ready) { data = BXE_SP(sc, wb_data[0]); for (i = 0; i < len32; i++) { data[i] = (CHIP_IS_E1(sc)) ? bxe_reg_rd_ind(sc, (src_addr + (i * 4))) : REG_RD(sc, (src_addr + (i * 4))); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_GRC, DMAE_DST_PCI); /* fill in addresses and len */ dmae.src_addr_lo = (src_addr >> 2); /* GRC addr has dword resolution */ dmae.src_addr_hi = 0; dmae.dst_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_data)); dmae.dst_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_data)); dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); }; } void bxe_write_dmae(struct bxe_softc *sc, bus_addr_t dma_addr, uint32_t dst_addr, uint32_t len32) { struct dmae_command dmae; int rc; if (!sc->dmae_ready) { DBASSERT(sc, (len32 <= 4), ("DMAE not ready and length is %d", len32)); if (CHIP_IS_E1(sc)) { ecore_init_ind_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } else { ecore_init_str_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_PCI, DMAE_DST_GRC); /* fill in addresses and len */ dmae.src_addr_lo = U64_LO(dma_addr); dmae.src_addr_hi = U64_HI(dma_addr); dmae.dst_addr_lo = (dst_addr >> 2); /* GRC addr has dword resolution */ dmae.dst_addr_hi = 0; dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); } } void bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { int dmae_wr_max = DMAE_LEN32_WR_MAX(sc); int offset = 0; while (len > dmae_wr_max) { bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ dmae_wr_max); offset += (dmae_wr_max * 4); len -= dmae_wr_max; } bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ len); } void bxe_set_ctx_validation(struct bxe_softc *sc, struct eth_context *cxt, uint32_t cid) { /* ustorm cxt validation */ cxt->ustorm_ag_context.cdu_usage = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE); /* xcontext validation */ cxt->xstorm_ag_context.cdu_reserved = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE); } static void bxe_storm_memset_hc_timeout(struct bxe_softc *sc, uint8_t port, uint8_t fw_sb_id, uint8_t sb_index, uint8_t ticks) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index)); REG_WR8(sc, addr, ticks); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d ticks %d\n", port, fw_sb_id, sb_index, ticks); } static void bxe_storm_memset_hc_disable(struct bxe_softc *sc, uint8_t port, uint16_t fw_sb_id, uint8_t sb_index, uint8_t disable) { uint32_t enable_flag = (disable) ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT); uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index)); uint8_t flags; /* clear and set */ flags = REG_RD8(sc, addr); flags &= ~HC_INDEX_DATA_HC_ENABLED; flags |= enable_flag; REG_WR8(sc, addr, flags); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d disable %d\n", port, fw_sb_id, sb_index, disable); } void bxe_update_coalesce_sb_index(struct bxe_softc *sc, uint8_t fw_sb_id, uint8_t sb_index, uint8_t disable, uint16_t usec) { int port = SC_PORT(sc); uint8_t ticks = (usec / 4); /* XXX ??? */ bxe_storm_memset_hc_timeout(sc, port, fw_sb_id, sb_index, ticks); disable = (disable) ? 1 : ((usec) ? 0 : 1); bxe_storm_memset_hc_disable(sc, port, fw_sb_id, sb_index, disable); } void elink_cb_udelay(struct bxe_softc *sc, uint32_t usecs) { DELAY(usecs); } uint32_t elink_cb_reg_read(struct bxe_softc *sc, uint32_t reg_addr) { return (REG_RD(sc, reg_addr)); } void elink_cb_reg_write(struct bxe_softc *sc, uint32_t reg_addr, uint32_t val) { REG_WR(sc, reg_addr, val); } void elink_cb_reg_wb_write(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_WR_DMAE(sc, offset, wb_write, len); } void elink_cb_reg_wb_read(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_RD_DMAE(sc, offset, wb_write, len); } uint8_t elink_cb_path_id(struct bxe_softc *sc) { return (SC_PATH(sc)); } void elink_cb_event_log(struct bxe_softc *sc, const elink_log_id_t elink_log_id, ...) { /* XXX */ #if 0 //va_list ap; va_start(ap, elink_log_id); _XXX_(sc, lm_log_id, ap); va_end(ap); #endif BLOGI(sc, "ELINK EVENT LOG (%d)\n", elink_log_id); } static int bxe_set_spio(struct bxe_softc *sc, int spio, uint32_t mode) { uint32_t spio_reg; /* Only 2 SPIOs are configurable */ if ((spio != MISC_SPIO_SPIO4) && (spio != MISC_SPIO_SPIO5)) { BLOGE(sc, "Invalid SPIO 0x%x\n", spio); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); /* read SPIO and mask except the float bits */ spio_reg = (REG_RD(sc, MISC_REG_SPIO) & MISC_SPIO_FLOAT); switch (mode) { case MISC_SPIO_OUTPUT_LOW: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output low\n", spio); /* clear FLOAT and set CLR */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_CLR_POS); break; case MISC_SPIO_OUTPUT_HIGH: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output high\n", spio); /* clear FLOAT and set SET */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_SET_POS); break; case MISC_SPIO_INPUT_HI_Z: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> input\n", spio); /* set FLOAT */ spio_reg |= (spio << MISC_SPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_SPIO, spio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); return (0); } static int bxe_gpio_read(struct bxe_softc *sc, int gpio_num, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d\n", gpio_num); return (-1); } /* read GPIO value */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); /* get the requested pin value */ return ((gpio_reg & gpio_mask) == gpio_mask) ? 1 : 0; } static int bxe_gpio_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d\n", gpio_num); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = (REG_RD(sc, MISC_REG_GPIO) & MISC_REGISTERS_GPIO_FLOAT); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear FLOAT and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear FLOAT and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> input\n", gpio_num, gpio_shift); /* set FLOAT */ gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint32_t mode) { uint32_t gpio_reg; /* any port swapping should be handled by caller */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_CLR_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_SET_POS); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output low\n", pins); /* set CLR */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output high\n", pins); /* set SET */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> input\n", pins); /* set FLOAT */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: BLOGE(sc, "Invalid GPIO mode assignment %d\n", mode); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (-1); } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_int_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d\n", gpio_num); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO int */ gpio_reg = REG_RD(sc, MISC_REG_GPIO_INT); switch (mode) { case MISC_REGISTERS_GPIO_INT_OUTPUT_CLR: BLOGD(sc, DBG_PHY, "Clear GPIO INT %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear SET and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); break; case MISC_REGISTERS_GPIO_INT_OUTPUT_SET: BLOGD(sc, DBG_PHY, "Set GPIO INT %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear CLR and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO_INT, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } uint32_t elink_cb_gpio_read(struct bxe_softc *sc, uint16_t gpio_num, uint8_t port) { return (bxe_gpio_read(sc, gpio_num, port)); } uint8_t elink_cb_gpio_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_write(sc, gpio_num, mode, port)); } uint8_t elink_cb_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint8_t mode) /* 0=low 1=high */ { return (bxe_gpio_mult_write(sc, pins, mode)); } uint8_t elink_cb_gpio_int_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_int_write(sc, gpio_num, mode, port)); } void elink_cb_notify_link_changed(struct bxe_softc *sc) { REG_WR(sc, (MISC_REG_AEU_GENERAL_ATTN_12 + (SC_FUNC(sc) * sizeof(uint32_t))), 1); } /* send the MCP a request, block until there is a reply */ uint32_t elink_cb_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t seq; uint32_t rc = 0; uint32_t cnt = 1; uint8_t delay = CHIP_REV_IS_SLOW(sc) ? 100 : 10; BXE_FWMB_LOCK(sc); seq = ++sc->fw_seq; SHMEM_WR(sc, func_mb[mb_idx].drv_mb_param, param); SHMEM_WR(sc, func_mb[mb_idx].drv_mb_header, (command | seq)); BLOGD(sc, DBG_PHY, "wrote command 0x%08x to FW MB param 0x%08x\n", (command | seq), param); /* Let the FW do it's magic. GIve it up to 5 seconds... */ do { DELAY(delay * 1000); rc = SHMEM_RD(sc, func_mb[mb_idx].fw_mb_header); } while ((seq != (rc & FW_MSG_SEQ_NUMBER_MASK)) && (cnt++ < 500)); BLOGD(sc, DBG_PHY, "[after %d ms] read 0x%x seq 0x%x from FW MB\n", cnt*delay, rc, seq); /* is this a reply to our command? */ if (seq == (rc & FW_MSG_SEQ_NUMBER_MASK)) { rc &= FW_MSG_CODE_MASK; } else { /* Ruh-roh! */ BLOGE(sc, "FW failed to respond!\n"); // XXX bxe_fw_dump(sc); rc = 0; } BXE_FWMB_UNLOCK(sc); return (rc); } static uint32_t bxe_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { return (elink_cb_fw_command(sc, command, param)); } static void __storm_memset_dma_mapping(struct bxe_softc *sc, uint32_t addr, bus_addr_t mapping) { REG_WR(sc, addr, U64_LO(mapping)); REG_WR(sc, (addr + 4), U64_HI(mapping)); } static void storm_memset_spq_addr(struct bxe_softc *sc, bus_addr_t mapping, uint16_t abs_fid) { uint32_t addr = (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PAGE_BASE_OFFSET(abs_fid)); __storm_memset_dma_mapping(sc, addr, mapping); } static void storm_memset_vf_to_pf(struct bxe_softc *sc, uint16_t abs_fid, uint16_t pf_id) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); } static void storm_memset_func_en(struct bxe_softc *sc, uint16_t abs_fid, uint8_t enable) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(abs_fid)), enable); } static void storm_memset_eq_data(struct bxe_softc *sc, struct event_ring_data *eq_data, uint16_t pfid) { uint32_t addr; size_t size; addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_DATA_OFFSET(pfid)); size = sizeof(struct event_ring_data); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)eq_data); } static void storm_memset_eq_prod(struct bxe_softc *sc, uint16_t eq_prod, uint16_t pfid) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_PROD_OFFSET(pfid)); REG_WR16(sc, addr, eq_prod); } /* * Post a slowpath command. * * A slowpath command is used to propogate a configuration change through * the controller in a controlled manner, allowing each STORM processor and * other H/W blocks to phase in the change. The commands sent on the * slowpath are referred to as ramrods. Depending on the ramrod used the * completion of the ramrod will occur in different ways. Here's a * breakdown of ramrods and how they complete: * * RAMROD_CMD_ID_ETH_PORT_SETUP * Used to setup the leading connection on a port. Completes on the * Receive Completion Queue (RCQ) of that port (typically fp[0]). * * RAMROD_CMD_ID_ETH_CLIENT_SETUP * Used to setup an additional connection on a port. Completes on the * RCQ of the multi-queue/RSS connection being initialized. * * RAMROD_CMD_ID_ETH_STAT_QUERY * Used to force the storm processors to update the statistics database * in host memory. This ramrod is send on the leading connection CID and * completes as an index increment of the CSTORM on the default status * block. * * RAMROD_CMD_ID_ETH_UPDATE * Used to update the state of the leading connection, usually to udpate * the RSS indirection table. Completes on the RCQ of the leading * connection. (Not currently used under FreeBSD until OS support becomes * available.) * * RAMROD_CMD_ID_ETH_HALT * Used when tearing down a connection prior to driver unload. Completes * on the RCQ of the multi-queue/RSS connection being torn down. Don't * use this on the leading connection. * * RAMROD_CMD_ID_ETH_SET_MAC * Sets the Unicast/Broadcast/Multicast used by the port. Completes on * the RCQ of the leading connection. * * RAMROD_CMD_ID_ETH_CFC_DEL * Used when tearing down a conneciton prior to driver unload. Completes * on the RCQ of the leading connection (since the current connection * has been completely removed from controller memory). * * RAMROD_CMD_ID_ETH_PORT_DEL * Used to tear down the leading connection prior to driver unload, * typically fp[0]. Completes as an index increment of the CSTORM on the * default status block. * * RAMROD_CMD_ID_ETH_FORWARD_SETUP * Used for connection offload. Completes on the RCQ of the multi-queue * RSS connection that is being offloaded. (Not currently used under * FreeBSD.) * * There can only be one command pending per function. * * Returns: * 0 = Success, !0 = Failure. */ /* must be called under the spq lock */ static inline struct eth_spe *bxe_sp_get_next(struct bxe_softc *sc) { struct eth_spe *next_spe = sc->spq_prod_bd; if (sc->spq_prod_bd == sc->spq_last_bd) { /* wrap back to the first eth_spq */ sc->spq_prod_bd = sc->spq; sc->spq_prod_idx = 0; } else { sc->spq_prod_bd++; sc->spq_prod_idx++; } return (next_spe); } /* must be called under the spq lock */ static inline void bxe_sp_prod_update(struct bxe_softc *sc) { int func = SC_FUNC(sc); /* * Make sure that BD data is updated before writing the producer. * BD data is written to the memory, the producer is read from the * memory, thus we need a full memory barrier to ensure the ordering. */ mb(); REG_WR16(sc, (BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func)), sc->spq_prod_idx); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); } /** * bxe_is_contextless_ramrod - check if the current command ends on EQ * * @cmd: command to check * @cmd_type: command type */ static inline int bxe_is_contextless_ramrod(int cmd, int cmd_type) { if ((cmd_type == NONE_CONNECTION_TYPE) || (cmd == RAMROD_CMD_ID_ETH_FORWARD_SETUP) || (cmd == RAMROD_CMD_ID_ETH_CLASSIFICATION_RULES) || (cmd == RAMROD_CMD_ID_ETH_FILTER_RULES) || (cmd == RAMROD_CMD_ID_ETH_MULTICAST_RULES) || (cmd == RAMROD_CMD_ID_ETH_SET_MAC) || (cmd == RAMROD_CMD_ID_ETH_RSS_UPDATE)) { return (TRUE); } else { return (FALSE); } } /** * bxe_sp_post - place a single command on an SP ring * * @sc: driver handle * @command: command to place (e.g. SETUP, FILTER_RULES, etc.) * @cid: SW CID the command is related to * @data_hi: command private data address (high 32 bits) * @data_lo: command private data address (low 32 bits) * @cmd_type: command type (e.g. NONE, ETH) * * SP data is handled as if it's always an address pair, thus data fields are * not swapped to little endian in upper functions. Instead this function swaps * data as if it's two uint32 fields. */ int bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, uint32_t data_lo, int cmd_type) { struct eth_spe *spe; uint16_t type; int common; common = bxe_is_contextless_ramrod(command, cmd_type); BXE_SP_LOCK(sc); if (common) { if (!atomic_load_acq_long(&sc->eq_spq_left)) { BLOGE(sc, "EQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } else { if (!atomic_load_acq_long(&sc->cq_spq_left)) { BLOGE(sc, "SPQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } spe = bxe_sp_get_next(sc); /* CID needs port number to be encoded int it */ spe->hdr.conn_and_cmd_data = htole32((command << SPE_HDR_CMD_ID_SHIFT) | HW_CID(sc, cid)); type = (cmd_type << SPE_HDR_CONN_TYPE_SHIFT) & SPE_HDR_CONN_TYPE; /* TBD: Check if it works for VFs */ type |= ((SC_FUNC(sc) << SPE_HDR_FUNCTION_ID_SHIFT) & SPE_HDR_FUNCTION_ID); spe->hdr.type = htole16(type); spe->data.update_data_addr.hi = htole32(data_hi); spe->data.update_data_addr.lo = htole32(data_lo); /* * It's ok if the actual decrement is issued towards the memory * somewhere between the lock and unlock. Thus no more explict * memory barrier is needed. */ if (common) { atomic_subtract_acq_long(&sc->eq_spq_left, 1); } else { atomic_subtract_acq_long(&sc->cq_spq_left, 1); } BLOGD(sc, DBG_SP, "SPQE -> %#jx\n", (uintmax_t)sc->spq_dma.paddr); BLOGD(sc, DBG_SP, "FUNC_RDATA -> %p / %#jx\n", BXE_SP(sc, func_rdata), (uintmax_t)BXE_SP_MAPPING(sc, func_rdata)); BLOGD(sc, DBG_SP, "SPQE[%x] (%x:%x) (cmd, common?) (%d,%d) hw_cid %x data (%x:%x) type(0x%x) left (CQ, EQ) (%lx,%lx)\n", sc->spq_prod_idx, (uint32_t)U64_HI(sc->spq_dma.paddr), (uint32_t)(U64_LO(sc->spq_dma.paddr) + (uint8_t *)sc->spq_prod_bd - (uint8_t *)sc->spq), command, common, HW_CID(sc, cid), data_hi, data_lo, type, atomic_load_acq_long(&sc->cq_spq_left), atomic_load_acq_long(&sc->eq_spq_left)); bxe_sp_prod_update(sc); BXE_SP_UNLOCK(sc); return (0); } /** * bxe_debug_print_ind_table - prints the indirection table configuration. * * @sc: driver hanlde * @p: pointer to rss configuration */ #if 0 static void bxe_debug_print_ind_table(struct bxe_softc *sc, struct ecore_config_rss_params *p) { int i; BLOGD(sc, DBG_LOAD, "Setting indirection table to:\n"); BLOGD(sc, DBG_LOAD, " 0x0000: "); for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { BLOGD(sc, DBG_LOAD, "0x%02x ", p->ind_table[i]); /* Print 4 bytes in a line */ if ((i + 1 < T_ETH_INDIRECTION_TABLE_SIZE) && (((i + 1) & 0x3) == 0)) { BLOGD(sc, DBG_LOAD, "\n"); BLOGD(sc, DBG_LOAD, "0x%04x: ", i + 1); } } BLOGD(sc, DBG_LOAD, "\n"); } #endif /* * FreeBSD Device probe function. * * Compares the device found to the driver's list of supported devices and * reports back to the bsd loader whether this is the right driver for the device. * This is the driver entry function called from the "kldload" command. * * Returns: * BUS_PROBE_DEFAULT on success, positive value on failure. */ static int bxe_probe(device_t dev) { struct bxe_softc *sc; struct bxe_device_type *t; char *descbuf; uint16_t did, sdid, svid, vid; /* Find our device structure */ sc = device_get_softc(dev); sc->dev = dev; t = bxe_devs; /* Get the data for the device to be probed. */ vid = pci_get_vendor(dev); did = pci_get_device(dev); svid = pci_get_subvendor(dev); sdid = pci_get_subdevice(dev); BLOGD(sc, DBG_LOAD, "%s(); VID = 0x%04X, DID = 0x%04X, SVID = 0x%04X, " "SDID = 0x%04X\n", __FUNCTION__, vid, did, svid, sdid); /* Look through the list of known devices for a match. */ while (t->bxe_name != NULL) { if ((vid == t->bxe_vid) && (did == t->bxe_did) && ((svid == t->bxe_svid) || (t->bxe_svid == PCI_ANY_ID)) && ((sdid == t->bxe_sdid) || (t->bxe_sdid == PCI_ANY_ID))) { descbuf = malloc(BXE_DEVDESC_MAX, M_TEMP, M_NOWAIT); if (descbuf == NULL) return (ENOMEM); /* Print out the device identity. */ snprintf(descbuf, BXE_DEVDESC_MAX, "%s (%c%d) BXE v:%s\n", t->bxe_name, (((pci_read_config(dev, PCIR_REVID, 4) & 0xf0) >> 4) + 'A'), (pci_read_config(dev, PCIR_REVID, 4) & 0xf), BXE_DRIVER_VERSION); device_set_desc_copy(dev, descbuf); free(descbuf, M_TEMP); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bxe_init_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX snprintf(sc->core_sx_name, sizeof(sc->core_sx_name), "bxe%d_core_lock", sc->unit); sx_init(&sc->core_sx, sc->core_sx_name); #else snprintf(sc->core_mtx_name, sizeof(sc->core_mtx_name), "bxe%d_core_lock", sc->unit); mtx_init(&sc->core_mtx, sc->core_mtx_name, NULL, MTX_DEF); #endif snprintf(sc->sp_mtx_name, sizeof(sc->sp_mtx_name), "bxe%d_sp_lock", sc->unit); mtx_init(&sc->sp_mtx, sc->sp_mtx_name, NULL, MTX_DEF); snprintf(sc->dmae_mtx_name, sizeof(sc->dmae_mtx_name), "bxe%d_dmae_lock", sc->unit); mtx_init(&sc->dmae_mtx, sc->dmae_mtx_name, NULL, MTX_DEF); snprintf(sc->port.phy_mtx_name, sizeof(sc->port.phy_mtx_name), "bxe%d_phy_lock", sc->unit); mtx_init(&sc->port.phy_mtx, sc->port.phy_mtx_name, NULL, MTX_DEF); snprintf(sc->fwmb_mtx_name, sizeof(sc->fwmb_mtx_name), "bxe%d_fwmb_lock", sc->unit); mtx_init(&sc->fwmb_mtx, sc->fwmb_mtx_name, NULL, MTX_DEF); snprintf(sc->print_mtx_name, sizeof(sc->print_mtx_name), "bxe%d_print_lock", sc->unit); mtx_init(&(sc->print_mtx), sc->print_mtx_name, NULL, MTX_DEF); snprintf(sc->stats_mtx_name, sizeof(sc->stats_mtx_name), "bxe%d_stats_lock", sc->unit); mtx_init(&(sc->stats_mtx), sc->stats_mtx_name, NULL, MTX_DEF); snprintf(sc->mcast_mtx_name, sizeof(sc->mcast_mtx_name), "bxe%d_mcast_lock", sc->unit); mtx_init(&(sc->mcast_mtx), sc->mcast_mtx_name, NULL, MTX_DEF); } static void bxe_release_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX sx_destroy(&sc->core_sx); #else if (mtx_initialized(&sc->core_mtx)) { mtx_destroy(&sc->core_mtx); } #endif if (mtx_initialized(&sc->sp_mtx)) { mtx_destroy(&sc->sp_mtx); } if (mtx_initialized(&sc->dmae_mtx)) { mtx_destroy(&sc->dmae_mtx); } if (mtx_initialized(&sc->port.phy_mtx)) { mtx_destroy(&sc->port.phy_mtx); } if (mtx_initialized(&sc->fwmb_mtx)) { mtx_destroy(&sc->fwmb_mtx); } if (mtx_initialized(&sc->print_mtx)) { mtx_destroy(&sc->print_mtx); } if (mtx_initialized(&sc->stats_mtx)) { mtx_destroy(&sc->stats_mtx); } if (mtx_initialized(&sc->mcast_mtx)) { mtx_destroy(&sc->mcast_mtx); } } static void bxe_tx_disable(struct bxe_softc* sc) { struct ifnet *ifp = sc->ifnet; /* tell the stack the driver is stopped and TX queue is full */ if (ifp != NULL) { ifp->if_drv_flags = 0; } } static void bxe_drv_pulse(struct bxe_softc *sc) { SHMEM_WR(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb, sc->fw_drv_pulse_wr_seq); } static inline uint16_t bxe_tx_avail(struct bxe_softc *sc, struct bxe_fastpath *fp) { int16_t used; uint16_t prod; uint16_t cons; prod = fp->tx_bd_prod; cons = fp->tx_bd_cons; used = SUB_S16(prod, cons); #if 0 KASSERT((used < 0), ("used tx bds < 0")); KASSERT((used > sc->tx_ring_size), ("used tx bds > tx_ring_size")); KASSERT(((sc->tx_ring_size - used) > MAX_TX_AVAIL), ("invalid number of tx bds used")); #endif return (int16_t)(sc->tx_ring_size) - used; } static inline int bxe_tx_queue_has_work(struct bxe_fastpath *fp) { uint16_t hw_cons; mb(); /* status block fields can change */ hw_cons = le16toh(*fp->tx_cons_sb); return (hw_cons != fp->tx_pkt_cons); } static inline uint8_t bxe_has_tx_work(struct bxe_fastpath *fp) { /* expand this for multi-cos if ever supported */ return (bxe_tx_queue_has_work(fp)) ? TRUE : FALSE; } static inline int bxe_has_rx_work(struct bxe_fastpath *fp) { uint16_t rx_cq_cons_sb; mb(); /* status block fields can change */ rx_cq_cons_sb = le16toh(*fp->rx_cq_cons_sb); if ((rx_cq_cons_sb & RCQ_MAX) == RCQ_MAX) rx_cq_cons_sb++; return (fp->rx_cq_cons != rx_cq_cons_sb); } static void bxe_sp_event(struct bxe_softc *sc, struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) { int cid = SW_CID(rr_cqe->ramrod_cqe.conn_and_cmd_data); int command = CQE_CMD(rr_cqe->ramrod_cqe.conn_and_cmd_data); enum ecore_queue_cmd drv_cmd = ECORE_Q_CMD_MAX; struct ecore_queue_sp_obj *q_obj = &BXE_SP_OBJ(sc, fp).q_obj; BLOGD(sc, DBG_SP, "fp=%d cid=%d got ramrod #%d state is %x type is %d\n", fp->index, cid, command, sc->state, rr_cqe->ramrod_cqe.ramrod_type); #if 0 /* * If cid is within VF range, replace the slowpath object with the * one corresponding to this VF */ if ((cid >= BXE_FIRST_VF_CID) && (cid < BXE_FIRST_VF_CID + BXE_VF_CIDS)) { bxe_iov_set_queue_sp_obj(sc, cid, &q_obj); } #endif switch (command) { case (RAMROD_CMD_ID_ETH_CLIENT_UPDATE): BLOGD(sc, DBG_SP, "got UPDATE ramrod. CID %d\n", cid); drv_cmd = ECORE_Q_CMD_UPDATE; break; case (RAMROD_CMD_ID_ETH_CLIENT_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP; break; case (RAMROD_CMD_ID_ETH_TX_QUEUE_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] tx-only setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP_TX_ONLY; break; case (RAMROD_CMD_ID_ETH_HALT): BLOGD(sc, DBG_SP, "got MULTI[%d] halt ramrod\n", cid); drv_cmd = ECORE_Q_CMD_HALT; break; case (RAMROD_CMD_ID_ETH_TERMINATE): BLOGD(sc, DBG_SP, "got MULTI[%d] teminate ramrod\n", cid); drv_cmd = ECORE_Q_CMD_TERMINATE; break; case (RAMROD_CMD_ID_ETH_EMPTY): BLOGD(sc, DBG_SP, "got MULTI[%d] empty ramrod\n", cid); drv_cmd = ECORE_Q_CMD_EMPTY; break; default: BLOGD(sc, DBG_SP, "ERROR: unexpected MC reply (%d) on fp[%d]\n", command, fp->index); return; } if ((drv_cmd != ECORE_Q_CMD_MAX) && q_obj->complete_cmd(sc, q_obj, drv_cmd)) { /* * q_obj->complete_cmd() failure means that this was * an unexpected completion. * * In this case we don't want to increase the sc->spq_left * because apparently we haven't sent this command the first * place. */ // bxe_panic(sc, ("Unexpected SP completion\n")); return; } #if 0 /* SRIOV: reschedule any 'in_progress' operations */ bxe_iov_sp_event(sc, cid, TRUE); #endif atomic_add_acq_long(&sc->cq_spq_left, 1); BLOGD(sc, DBG_SP, "sc->cq_spq_left 0x%lx\n", atomic_load_acq_long(&sc->cq_spq_left)); #if 0 if ((drv_cmd == ECORE_Q_CMD_UPDATE) && (IS_FCOE_FP(fp)) && (!!bxe_test_bit(ECORE_AFEX_FCOE_Q_UPDATE_PENDING, &sc->sp_state))) { /* * If Queue update ramrod is completed for last Queue in AFEX VIF set * flow, then ACK MCP at the end. Mark pending ACK to MCP bit to * prevent case that both bits are cleared. At the end of load/unload * driver checks that sp_state is cleared and this order prevents * races. */ bxe_set_bit(ECORE_AFEX_PENDING_VIFSET_MCP_ACK, &sc->sp_state); wmb(); bxe_clear_bit(ECORE_AFEX_FCOE_Q_UPDATE_PENDING, &sc->sp_state); /* schedule the sp task as MCP ack is required */ bxe_schedule_sp_task(sc); } #endif } /* * The current mbuf is part of an aggregation. Move the mbuf into the TPA * aggregation queue, put an empty mbuf back onto the receive chain, and mark * the current aggregation queue as in-progress. */ static void bxe_tpa_start(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t queue, uint16_t cons, uint16_t prod, struct eth_fast_path_rx_cqe *cqe) { struct bxe_sw_rx_bd tmp_bd; struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; int max_agg_queues; struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; uint16_t index; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA START " "cons=%d prod=%d\n", fp->index, queue, cons, prod); max_agg_queues = MAX_AGG_QS(sc); KASSERT((queue < max_agg_queues), ("fp[%02d] invalid aggr queue (%d >= %d)!", fp->index, queue, max_agg_queues)); KASSERT((tpa_info->state == BXE_TPA_STATE_STOP), ("fp[%02d].tpa[%02d] starting aggr on queue not stopped!", fp->index, queue)); /* copy the existing mbuf and mapping from the TPA pool */ tmp_bd = tpa_info->bd; if (tmp_bd.m == NULL) { BLOGE(sc, "fp[%02d].tpa[%02d] mbuf not allocated!\n", fp->index, queue); /* XXX Error handling? */ return; } /* change the TPA queue to the start state */ tpa_info->state = BXE_TPA_STATE_START; tpa_info->placement_offset = cqe->placement_offset; tpa_info->parsing_flags = le16toh(cqe->pars_flags.flags); tpa_info->vlan_tag = le16toh(cqe->vlan_tag); tpa_info->len_on_bd = le16toh(cqe->len_on_bd); fp->rx_tpa_queue_used |= (1 << queue); /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ index = (sc->max_rx_bufs != RX_BD_USABLE) ? prod : cons; /* move the received mbuf and mapping to TPA pool */ tpa_info->bd = fp->rx_mbuf_chain[cons]; /* release any existing RX BD mbuf mappings */ if (cons != index) { rx_buf = &fp->rx_mbuf_chain[cons]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We get here when the maximum number of rx buffers is less than * RX_BD_USABLE. The mbuf is already saved above so it's OK to NULL * it out here without concern of a memory leak. */ fp->rx_mbuf_chain[cons].m = NULL; } /* update the Rx SW BD with the mbuf info from the TPA pool */ fp->rx_mbuf_chain[index] = tmp_bd; /* update the Rx BD with the empty mbuf phys address from the TPA pool */ rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(tpa_info->seg.ds_addr)); rx_bd->addr_lo = htole32(U64_LO(tpa_info->seg.ds_addr)); } /* * When a TPA aggregation is completed, loop through the individual mbufs * of the aggregation, combining them into a single mbuf which will be sent * up the stack. Refill all freed SGEs with mbufs as we go along. */ static int bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct mbuf *m, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { struct mbuf *m_frag; uint32_t frag_len, frag_size, i; uint16_t sge_idx; int rc = 0; int j; frag_size = le16toh(cqe->pkt_len) - tpa_info->len_on_bd; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill len_on_bd=%d frag_size=%d pages=%d\n", fp->index, queue, tpa_info->len_on_bd, frag_size, pages); /* make sure the aggregated frame is not too big to handle */ if (pages > 8 * PAGES_PER_SGE) { BLOGE(sc, "fp[%02d].sge[0x%04x] has too many pages (%d)! " "pkt_len=%d len_on_bd=%d frag_size=%d\n", fp->index, cqe_idx, pages, le16toh(cqe->pkt_len), tpa_info->len_on_bd, frag_size); bxe_panic(sc, ("sge page count error\n")); return (EINVAL); } /* * Scan through the scatter gather list pulling individual mbufs into a * single mbuf for the host stack. */ for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) { sge_idx = RX_SGE(le16toh(cqe->sgl_or_raw_data.sgl[j])); /* * Firmware gives the indices of the SGE as if the ring is an array * (meaning that the "next" element will consume 2 indices). */ frag_len = min(frag_size, (uint32_t)(SGE_PAGES)); BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill i=%d j=%d " "sge_idx=%d frag_size=%d frag_len=%d\n", fp->index, queue, i, j, sge_idx, frag_size, frag_len); m_frag = fp->rx_sge_mbuf_chain[sge_idx].m; /* allocate a new mbuf for the SGE */ rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); if (rc) { /* Leave all remaining SGEs in the ring! */ return (rc); } /* update the fragment length */ m_frag->m_len = frag_len; /* concatenate the fragment to the head mbuf */ m_cat(m, m_frag); fp->eth_q_stats.mbuf_alloc_sge--; /* update the TPA mbuf size and remaining fragment size */ m->m_pkthdr.len += frag_len; frag_size -= frag_len; } BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill done frag_size=%d\n", fp->index, queue, frag_size); return (rc); } static inline void bxe_clear_sge_mask_next_elems(struct bxe_fastpath *fp) { int i, j; for (i = 1; i <= RX_SGE_NUM_PAGES; i++) { int idx = RX_SGE_TOTAL_PER_PAGE * i - 1; for (j = 0; j < 2; j++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, idx); idx--; } } } static inline void bxe_init_sge_ring_bit_mask(struct bxe_fastpath *fp) { /* set the mask to all 1's, it's faster to compare to 0 than to 0xf's */ memset(fp->sge_mask, 0xff, sizeof(fp->sge_mask)); /* * Clear the two last indices in the page to 1. These are the indices that * correspond to the "next" element, hence will never be indicated and * should be removed from the calculations. */ bxe_clear_sge_mask_next_elems(fp); } static inline void bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t idx) { uint16_t last_max = fp->last_max_sge; if (SUB_S16(idx, last_max) > 0) { fp->last_max_sge = idx; } } static inline void bxe_update_sge_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t sge_len, struct eth_end_agg_rx_cqe *cqe) { uint16_t last_max, last_elem, first_elem; uint16_t delta = 0; uint16_t i; if (!sge_len) { return; } /* first mark all used pages */ for (i = 0; i < sge_len; i++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, RX_SGE(le16toh(cqe->sgl_or_raw_data.sgl[i]))); } BLOGD(sc, DBG_LRO, "fp[%02d] fp_cqe->sgl[%d] = %d\n", fp->index, sge_len - 1, le16toh(cqe->sgl_or_raw_data.sgl[sge_len - 1])); /* assume that the last SGE index is the biggest */ bxe_update_last_max_sge(fp, le16toh(cqe->sgl_or_raw_data.sgl[sge_len - 1])); last_max = RX_SGE(fp->last_max_sge); last_elem = last_max >> BIT_VEC64_ELEM_SHIFT; first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT; /* if ring is not full */ if (last_elem + 1 != first_elem) { last_elem++; } /* now update the prod */ for (i = first_elem; i != last_elem; i = RX_SGE_NEXT_MASK_ELEM(i)) { if (__predict_true(fp->sge_mask[i])) { break; } fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK; delta += BIT_VEC64_ELEM_SZ; } if (delta > 0) { fp->rx_sge_prod += delta; /* clear page-end entries */ bxe_clear_sge_mask_next_elems(fp); } BLOGD(sc, DBG_LRO, "fp[%02d] fp->last_max_sge=%d fp->rx_sge_prod=%d\n", fp->index, fp->last_max_sge, fp->rx_sge_prod); } /* * The aggregation on the current TPA queue has completed. Pull the individual * mbuf fragments together into a single mbuf, perform all necessary checksum * calculations, and send the resuting mbuf to the stack. */ static void bxe_tpa_stop(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { struct ifnet *ifp = sc->ifnet; struct mbuf *m; int rc = 0; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] pad=%d pkt_len=%d pages=%d vlan=%d\n", fp->index, queue, tpa_info->placement_offset, le16toh(cqe->pkt_len), pages, tpa_info->vlan_tag); m = tpa_info->bd.m; /* allocate a replacement before modifying existing mbuf */ rc = bxe_alloc_rx_tpa_mbuf(fp, queue); if (rc) { /* drop the frame and log an error */ fp->eth_q_stats.rx_soft_errors++; goto bxe_tpa_stop_exit; } /* we have a replacement, fixup the current mbuf */ m_adj(m, tpa_info->placement_offset); m->m_pkthdr.len = m->m_len = tpa_info->len_on_bd; /* mark the checksums valid (taken care of by the firmware) */ fp->eth_q_stats.rx_ofld_frames_csum_ip++; fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); /* aggregate all of the SGEs into a single mbuf */ rc = bxe_fill_frag_mbuf(sc, fp, tpa_info, queue, pages, m, cqe, cqe_idx); if (rc) { /* drop the packet and log an error */ fp->eth_q_stats.rx_soft_errors++; m_freem(m); } else { if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN) { m->m_pkthdr.ether_vtag = tpa_info->vlan_tag; m->m_flags |= M_VLANTAG; } /* assign packet to this interface interface */ m->m_pkthdr.rcvif = ifp; #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; m->m_flags |= M_FLOWID; #endif ifp->if_ipackets++; fp->eth_q_stats.rx_tpa_pkts++; /* pass the frame to the stack */ (*ifp->if_input)(ifp, m); } /* we passed an mbuf up the stack or dropped the frame */ fp->eth_q_stats.mbuf_alloc_tpa--; bxe_tpa_stop_exit: fp->rx_tpa_info[queue].state = BXE_TPA_STATE_STOP; fp->rx_tpa_queue_used &= ~(1 << queue); } static uint8_t bxe_rxeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { struct ifnet *ifp = sc->ifnet; uint16_t bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod; int rx_pkts = 0; int rc; BXE_FP_RX_LOCK(fp); /* CQ "next element" is of the size of the regular element */ hw_cq_cons = le16toh(*fp->rx_cq_cons_sb); if ((hw_cq_cons & RCQ_USABLE_PER_PAGE) == RCQ_USABLE_PER_PAGE) { hw_cq_cons++; } bd_cons = fp->rx_bd_cons; bd_prod = fp->rx_bd_prod; bd_prod_fw = bd_prod; sw_cq_cons = fp->rx_cq_cons; sw_cq_prod = fp->rx_cq_prod; /* * Memory barrier necessary as speculative reads of the rx * buffer can be ahead of the index in the status block */ rmb(); BLOGD(sc, DBG_RX, "fp[%02d] Rx START hw_cq_cons=%u sw_cq_cons=%u\n", fp->index, hw_cq_cons, sw_cq_cons); while (sw_cq_cons != hw_cq_cons) { struct bxe_sw_rx_bd *rx_buf = NULL; union eth_rx_cqe *cqe; struct eth_fast_path_rx_cqe *cqe_fp; uint8_t cqe_fp_flags; enum eth_rx_cqe_type cqe_fp_type; uint16_t len, pad; struct mbuf *m = NULL; comp_ring_cons = RCQ(sw_cq_cons); bd_prod = RX_BD(bd_prod); bd_cons = RX_BD(bd_cons); cqe = &fp->rcq_chain[comp_ring_cons]; cqe_fp = &cqe->fast_path_cqe; cqe_fp_flags = cqe_fp->type_error_flags; cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE; BLOGD(sc, DBG_RX, "fp[%02d] Rx hw_cq_cons=%d hw_sw_cons=%d " "BD prod=%d cons=%d CQE type=0x%x err=0x%x " "status=0x%x rss_hash=0x%x vlan=0x%x len=%u\n", fp->index, hw_cq_cons, sw_cq_cons, bd_prod, bd_cons, CQE_TYPE(cqe_fp_flags), cqe_fp_flags, cqe_fp->status_flags, le32toh(cqe_fp->rss_hash_result), le16toh(cqe_fp->vlan_tag), le16toh(cqe_fp->pkt_len_or_gro_seg_len)); /* is this a slowpath msg? */ if (__predict_false(CQE_TYPE_SLOW(cqe_fp_type))) { bxe_sp_event(sc, fp, cqe); goto next_cqe; } rx_buf = &fp->rx_mbuf_chain[bd_cons]; if (!CQE_TYPE_FAST(cqe_fp_type)) { struct bxe_sw_tpa_info *tpa_info; uint16_t frag_size, pages; uint8_t queue; #if 0 /* sanity check */ if (!fp->tpa_enable && (CQE_TYPE_START(cqe_fp_type) || CQE_TYPE_STOP(cqe_fp_type))) { BLOGE(sc, "START/STOP packet while !tpa_enable type (0x%x)\n", CQE_TYPE(cqe_fp_type)); } #endif if (CQE_TYPE_START(cqe_fp_type)) { bxe_tpa_start(sc, fp, cqe_fp->queue_index, bd_cons, bd_prod, cqe_fp); m = NULL; /* packet not ready yet */ goto next_rx; } KASSERT(CQE_TYPE_STOP(cqe_fp_type), ("CQE type is not STOP! (0x%x)\n", cqe_fp_type)); queue = cqe->end_agg_cqe.queue_index; tpa_info = &fp->rx_tpa_info[queue]; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA STOP\n", fp->index, queue); frag_size = (le16toh(cqe->end_agg_cqe.pkt_len) - tpa_info->len_on_bd); pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; bxe_tpa_stop(sc, fp, tpa_info, queue, pages, &cqe->end_agg_cqe, comp_ring_cons); bxe_update_sge_prod(sc, fp, pages, &cqe->end_agg_cqe); goto next_cqe; } /* non TPA */ /* is this an error packet? */ if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG)) { BLOGE(sc, "flags 0x%x rx packet %u\n", cqe_fp_flags, sw_cq_cons); fp->eth_q_stats.rx_soft_errors++; goto next_rx; } len = le16toh(cqe_fp->pkt_len_or_gro_seg_len); pad = cqe_fp->placement_offset; m = rx_buf->m; if (__predict_false(m == NULL)) { BLOGE(sc, "No mbuf in rx chain descriptor %d for fp[%02d]\n", bd_cons, fp->index); goto next_rx; } /* XXX double copy if packet length under a threshold */ /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ rc = bxe_alloc_rx_bd_mbuf(fp, bd_cons, (sc->max_rx_bufs != RX_BD_USABLE) ? bd_prod : bd_cons); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", fp->index, rc); fp->eth_q_stats.rx_soft_errors++; if (sc->max_rx_bufs != RX_BD_USABLE) { /* copy this consumer index to the producer index */ memcpy(&fp->rx_mbuf_chain[bd_prod], rx_buf, sizeof(struct bxe_sw_rx_bd)); memset(rx_buf, 0, sizeof(struct bxe_sw_rx_bd)); } goto next_rx; } /* current mbuf was detached from the bd */ fp->eth_q_stats.mbuf_alloc_rx--; /* we allocated a replacement mbuf, fixup the current one */ m_adj(m, pad); m->m_pkthdr.len = m->m_len = len; /* assign packet to this interface interface */ m->m_pkthdr.rcvif = ifp; /* assume no hardware checksum has complated */ m->m_pkthdr.csum_flags = 0; /* validate checksum if offload enabled */ if (ifp->if_capenable & IFCAP_RXCSUM) { /* check for a valid IP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG)) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_ip++; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } } /* check for a valid TCP/UDP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) { if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xFFFF; m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } } } /* if there is a VLAN tag then flag that info */ if (cqe->fast_path_cqe.pars_flags.flags & PARSING_FLAGS_VLAN) { m->m_pkthdr.ether_vtag = cqe->fast_path_cqe.vlan_tag; m->m_flags |= M_VLANTAG; } #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; m->m_flags |= M_FLOWID; #endif next_rx: bd_cons = RX_BD_NEXT(bd_cons); bd_prod = RX_BD_NEXT(bd_prod); bd_prod_fw = RX_BD_NEXT(bd_prod_fw); /* pass the frame to the stack */ if (__predict_true(m != NULL)) { ifp->if_ipackets++; rx_pkts++; (*ifp->if_input)(ifp, m); } next_cqe: sw_cq_prod = RCQ_NEXT(sw_cq_prod); sw_cq_cons = RCQ_NEXT(sw_cq_cons); /* limit spinning on the queue */ if (rx_pkts == sc->rx_budget) { fp->eth_q_stats.rx_budget_reached++; break; } } /* while work to do */ fp->rx_bd_cons = bd_cons; fp->rx_bd_prod = bd_prod_fw; fp->rx_cq_cons = sw_cq_cons; fp->rx_cq_prod = sw_cq_prod; /* Update producers */ bxe_update_rx_prod(sc, fp, bd_prod_fw, sw_cq_prod, fp->rx_sge_prod); fp->eth_q_stats.rx_pkts += rx_pkts; fp->eth_q_stats.rx_calls++; BXE_FP_RX_UNLOCK(fp); return (sw_cq_cons != hw_cq_cons); } static uint16_t bxe_free_tx_pkt(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t idx) { struct bxe_sw_tx_bd *tx_buf = &fp->tx_mbuf_chain[idx]; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_idx = TX_BD(tx_buf->first_bd); uint16_t new_cons; int nbd; /* unmap the mbuf from non-paged memory */ bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); tx_start_bd = &fp->tx_chain[bd_idx].start_bd; nbd = le16toh(tx_start_bd->nbd) - 1; #if 0 if ((nbd - 1) > (MAX_MBUF_FRAGS + 2)) { bxe_panic(sc, ("BAD nbd!\n")); } #endif new_cons = (tx_buf->first_bd + nbd); #if 0 struct eth_tx_bd *tx_data_bd; /* * The following code doesn't do anything but is left here * for clarity on what the new value of new_cons skipped. */ /* get the next bd */ bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); /* skip the parse bd */ --nbd; bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); /* skip the TSO split header bd since they have no mapping */ if (tx_buf->flags & BXE_TSO_SPLIT_BD) { --nbd; bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); } /* now free frags */ while (nbd > 0) { tx_data_bd = &fp->tx_chain[bd_idx].reg_bd; if (--nbd) { bd_idx = TX_BD(TX_BD_NEXT(bd_idx)); } } #endif /* free the mbuf */ if (__predict_true(tx_buf->m != NULL)) { m_freem(tx_buf->m); fp->eth_q_stats.mbuf_alloc_tx--; } else { fp->eth_q_stats.tx_chain_lost_mbuf++; } tx_buf->m = NULL; tx_buf->first_bd = 0; return (new_cons); } /* transmit timeout watchdog */ static int bxe_watchdog(struct bxe_softc *sc, struct bxe_fastpath *fp) { BXE_FP_TX_LOCK(fp); if ((fp->watchdog_timer == 0) || (--fp->watchdog_timer)) { BXE_FP_TX_UNLOCK(fp); return (0); } BLOGE(sc, "TX watchdog timeout on fp[%02d], resetting!\n", fp->index); BXE_FP_TX_UNLOCK(fp); atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_REINIT); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); return (-1); } /* processes transmit completions */ static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { struct ifnet *ifp = sc->ifnet; uint16_t bd_cons, hw_cons, sw_cons, pkt_cons; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); bd_cons = fp->tx_bd_cons; hw_cons = le16toh(*fp->tx_cons_sb); sw_cons = fp->tx_pkt_cons; while (sw_cons != hw_cons) { pkt_cons = TX_BD(sw_cons); BLOGD(sc, DBG_TX, "TX: fp[%d]: hw_cons=%u sw_cons=%u pkt_cons=%u\n", fp->index, hw_cons, sw_cons, pkt_cons); bd_cons = bxe_free_tx_pkt(sc, fp, pkt_cons); sw_cons++; } fp->tx_pkt_cons = sw_cons; fp->tx_bd_cons = bd_cons; BLOGD(sc, DBG_TX, "TX done: fp[%d]: hw_cons=%u sw_cons=%u sw_prod=%u\n", fp->index, hw_cons, fp->tx_pkt_cons, fp->tx_pkt_prod); mb(); tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; } else { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } if (fp->tx_pkt_prod != fp->tx_pkt_cons) { /* reset the watchdog timer if there are pending transmits */ fp->watchdog_timer = BXE_TX_TIMEOUT; return (TRUE); } else { /* clear watchdog when there are no pending transmits */ fp->watchdog_timer = 0; return (FALSE); } } static void bxe_drain_tx_queues(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, count; /* wait until all TX fastpath tasks have completed */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; count = 1000; while (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); if (count == 0) { BLOGE(sc, "Timeout waiting for fp[%d] " "transmits to complete!\n", i); bxe_panic(sc, ("tx drain failure\n")); return; } count--; DELAY(1000); rmb(); } } return; } static int bxe_del_all_macs(struct bxe_softc *sc, struct ecore_vlan_mac_obj *mac_obj, int mac_type, uint8_t wait_for_comp) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; int rc; /* wait for completion of requested */ if (wait_for_comp) { bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); } /* Set the mac type of addresses we want to clear */ bxe_set_bit(mac_type, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to delete MACs (%d)\n", rc); } return (rc); } static int bxe_fill_accept_flags(struct bxe_softc *sc, uint32_t rx_mode, unsigned long *rx_accept_flags, unsigned long *tx_accept_flags) { /* Clear the flags first */ *rx_accept_flags = 0; *tx_accept_flags = 0; switch (rx_mode) { case BXE_RX_MODE_NONE: /* * 'drop all' supersedes any accept flags that may have been * passed to the function. */ break; case BXE_RX_MODE_NORMAL: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_ALLMULTI: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_PROMISC: /* * According to deffinition of SI mode, iface in promisc mode * should receive matched and unmatched (in resolution of port) * unicast packets. */ bxe_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); if (IS_MF_SI(sc)) { bxe_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags); } else { bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); } break; default: BLOGE(sc, "Unknown rx_mode (%d)\n", rx_mode); return (-1); } /* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */ if (rx_mode != BXE_RX_MODE_NONE) { bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags); } return (0); } static int bxe_set_q_rx_mode(struct bxe_softc *sc, uint8_t cl_id, unsigned long rx_mode_flags, unsigned long rx_accept_flags, unsigned long tx_accept_flags, unsigned long ramrod_flags) { struct ecore_rx_mode_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* Prepare ramrod parameters */ ramrod_param.cid = 0; ramrod_param.cl_id = cl_id; ramrod_param.rx_mode_obj = &sc->rx_mode_obj; ramrod_param.func_id = SC_FUNC(sc); ramrod_param.pstate = &sc->sp_state; ramrod_param.state = ECORE_FILTER_RX_MODE_PENDING; ramrod_param.rdata = BXE_SP(sc, rx_mode_rdata); ramrod_param.rdata_mapping = BXE_SP_MAPPING(sc, rx_mode_rdata); bxe_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); ramrod_param.ramrod_flags = ramrod_flags; ramrod_param.rx_mode_flags = rx_mode_flags; ramrod_param.rx_accept_flags = rx_accept_flags; ramrod_param.tx_accept_flags = tx_accept_flags; rc = ecore_config_rx_mode(sc, &ramrod_param); if (rc < 0) { BLOGE(sc, "Set rx_mode %d failed\n", sc->rx_mode); return (rc); } return (0); } static int bxe_set_storm_rx_mode(struct bxe_softc *sc) { unsigned long rx_mode_flags = 0, ramrod_flags = 0; unsigned long rx_accept_flags = 0, tx_accept_flags = 0; int rc; rc = bxe_fill_accept_flags(sc, sc->rx_mode, &rx_accept_flags, &tx_accept_flags); if (rc) { return (rc); } bxe_set_bit(RAMROD_RX, &ramrod_flags); bxe_set_bit(RAMROD_TX, &ramrod_flags); /* XXX ensure all fastpath have same cl_id and/or move it to bxe_softc */ return (bxe_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags, rx_accept_flags, tx_accept_flags, ramrod_flags)); } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_load_no_mcp(struct bxe_softc *sc) { int path = SC_PATH(sc); int port = SC_PORT(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]++; load_count[path][1 + port]++; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 1) { return (FW_MSG_CODE_DRV_LOAD_COMMON); } else if (load_count[path][1 + port] == 1) { return (FW_MSG_CODE_DRV_LOAD_PORT); } else { return (FW_MSG_CODE_DRV_LOAD_FUNCTION); } } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_unload_no_mcp(struct bxe_softc *sc) { int port = SC_PORT(sc); int path = SC_PATH(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]--; load_count[path][1 + port]--; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_COMMON); } else if (load_count[path][1 + port] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_PORT); } else { return (FW_MSG_CODE_DRV_UNLOAD_FUNCTION); } } /* request unload mode from the MCP: COMMON, PORT or FUNCTION */ static uint32_t bxe_send_unload_req(struct bxe_softc *sc, int unload_mode) { uint32_t reset_code = 0; #if 0 int port = SC_PORT(sc); int path = SC_PATH(sc); #endif /* Select the UNLOAD request mode */ if (unload_mode == UNLOAD_NORMAL) { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } #if 0 else if (sc->flags & BXE_NO_WOL_FLAG) { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP; } else if (sc->wol) { uint32_t emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; uint8_t *mac_addr = sc->dev->dev_addr; uint32_t val; uint16_t pmc; /* * The mac address is written to entries 1-4 to * preserve entry 0 which is used by the PMF */ uint8_t entry = (SC_VN(sc) + 1)*8; val = (mac_addr[0] << 8) | mac_addr[1]; EMAC_WR(sc, EMAC_REG_EMAC_MAC_MATCH + entry, val); val = (mac_addr[2] << 24) | (mac_addr[3] << 16) | (mac_addr[4] << 8) | mac_addr[5]; EMAC_WR(sc, EMAC_REG_EMAC_MAC_MATCH + entry + 4, val); /* Enable the PME and clear the status */ pmc = pci_read_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), 2); pmc |= PCIM_PSTAT_PMEENABLE | PCIM_PSTAT_PME; pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), pmc, 4); reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_EN; } #endif else { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } /* Send the request to the MCP */ if (!BXE_NOMCP(sc)) { reset_code = bxe_fw_command(sc, reset_code, 0); } else { reset_code = bxe_nic_unload_no_mcp(sc); } return (reset_code); } /* send UNLOAD_DONE command to the MCP */ static void bxe_send_unload_done(struct bxe_softc *sc, uint8_t keep_link) { uint32_t reset_param = keep_link ? DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET : 0; /* Report UNLOAD_DONE to MCP */ if (!BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, reset_param); } } static int bxe_func_wait_started(struct bxe_softc *sc) { int tout = 50; if (!sc->port.pmf) { return (0); } /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction * 1. Sync IRS for default SB * 2. Sync SP queue - this guarantees us that attention handling started * 3. Wait, that TX disable/enable transaction completes * * 1+2 guarantee that if DCBX attention was scheduled it already changed * pending bit of transaction from STARTED-->TX_STOPPED, if we already * received completion for the transaction the state is TX_STOPPED. * State will return to STARTED after completion of TX_STOPPED-->STARTED * transaction. */ /* XXX make sure default SB ISR is done */ /* need a way to synchronize an irq (intr_mtx?) */ /* XXX flush any work queues */ while (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED && tout--) { DELAY(20000); } if (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED) { /* * Failed to complete the transaction in a "good way" * Force both transactions with CLR bit. */ struct ecore_func_state_params func_params = { NULL }; BLOGE(sc, "Unexpected function state! " "Forcing STARTED-->TX_STOPPED-->STARTED\n"); func_params.f_obj = &sc->func_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); /* STARTED-->TX_STOPPED */ func_params.cmd = ECORE_F_CMD_TX_STOP; ecore_func_state_change(sc, &func_params); /* TX_STOPPED-->STARTED */ func_params.cmd = ECORE_F_CMD_TX_START; return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_stop_queue(struct bxe_softc *sc, int index) { struct bxe_fastpath *fp = &sc->fp[index]; struct ecore_queue_state_params q_params = { NULL }; int rc; BLOGD(sc, DBG_LOAD, "stopping queue %d cid %d\n", index, fp->index); q_params.q_obj = &sc->sp_objs[fp->index].q_obj; /* We want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* Stop the primary connection: */ /* ...halt the connection */ q_params.cmd = ECORE_Q_CMD_HALT; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...terminate the connection */ q_params.cmd = ECORE_Q_CMD_TERMINATE; memset(&q_params.params.terminate, 0, sizeof(q_params.params.terminate)); q_params.params.terminate.cid_index = FIRST_TX_COS_INDEX; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...delete cfc entry */ q_params.cmd = ECORE_Q_CMD_CFC_DEL; memset(&q_params.params.cfc_del, 0, sizeof(q_params.params.cfc_del)); q_params.params.cfc_del.cid_index = FIRST_TX_COS_INDEX; return (ecore_queue_state_change(sc, &q_params)); } /* wait for the outstanding SP commands */ static inline uint8_t bxe_wait_sp_comp(struct bxe_softc *sc, unsigned long mask) { unsigned long tmp; int tout = 5000; /* wait for 5 secs tops */ while (tout--) { mb(); if (!(atomic_load_acq_long(&sc->sp_state) & mask)) { return (TRUE); } DELAY(1000); } mb(); tmp = atomic_load_acq_long(&sc->sp_state); if (tmp & mask) { BLOGE(sc, "Filtering completion timed out: " "sp_state 0x%lx, mask 0x%lx\n", tmp, mask); return (FALSE); } return (FALSE); } static int bxe_func_stop(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_STOP; /* * Try to stop the function the 'good way'. If it fails (in case * of a parity error during bxe_chip_cleanup()) and we are * not in a debug mode, perform a state transaction in order to * enable further HW_RESET transaction. */ rc = ecore_func_state_change(sc, &func_params); if (rc) { BLOGE(sc, "FUNC_STOP ramrod failed. " "Running a dry transaction\n"); bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_reset_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; /* Prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_RESET; func_params.params.hw_init.load_phase = load_code; return (ecore_func_state_change(sc, &func_params)); } static void bxe_int_disable_sync(struct bxe_softc *sc, int disable_hw) { if (disable_hw) { /* prevent the HW from sending interrupts */ bxe_int_disable(sc); } /* XXX need a way to synchronize ALL irqs (intr_mtx?) */ /* make sure all ISRs are done */ /* XXX make sure sp_task is not running */ /* cancel and flush work queues */ } static void bxe_chip_cleanup(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { int port = SC_PORT(sc); struct ecore_mcast_ramrod_params rparam = { NULL }; uint32_t reset_code; int i, rc = 0; bxe_drain_tx_queues(sc); /* give HW time to discard old tx messages */ DELAY(1000); /* Clean all ETH MACs */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_ETH_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to delete all ETH MACs (%d)\n", rc); } /* Clean up UC list */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_UC_LIST_MAC, TRUE); if (rc < 0) { BLOGE(sc, "Failed to delete UC MACs list (%d)\n", rc); } /* Disable LLH */ if (!CHIP_IS_E1(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } /* Set "drop all" to stop Rx */ /* * We need to take the BXE_MCAST_LOCK() here in order to prevent * a race between the completion code and this code. */ BXE_MCAST_LOCK(sc); if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); } else { bxe_set_storm_rx_mode(sc); } /* Clean up multicast configuration */ rparam.mcast_obj = &sc->mcast_obj; rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } BXE_MCAST_UNLOCK(sc); // XXX bxe_iov_chip_cleanup(sc); /* * Send the UNLOAD_REQUEST to the MCP. This will return if * this function should perform FUNCTION, PORT, or COMMON HW * reset. */ reset_code = bxe_send_unload_req(sc, unload_mode); /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction */ rc = bxe_func_wait_started(sc); if (rc) { BLOGE(sc, "bxe_func_wait_started failed\n"); } /* * Close multi and leading connections * Completions for ramrods are collected in a synchronous way */ for (i = 0; i < sc->num_queues; i++) { if (bxe_stop_queue(sc, i)) { goto unload_error; } } /* * If SP settings didn't get completed so far - something * very wrong has happen. */ if (!bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Common slow path ramrods got stuck!\n"); } unload_error: rc = bxe_func_stop(sc); if (rc) { BLOGE(sc, "Function stop failed!\n"); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Reset the chip */ rc = bxe_reset_hw(sc, reset_code); if (rc) { BLOGE(sc, "Hardware reset failed\n"); } /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, keep_link); } static void bxe_disable_close_the_gate(struct bxe_softc *sc) { uint32_t val; int port = SC_PORT(sc); BLOGD(sc, DBG_LOAD, "Disabling 'close the gates'\n"); if (CHIP_IS_E1(sc)) { uint32_t addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; val = REG_RD(sc, addr); val &= ~(0x300); REG_WR(sc, addr, val); } else { val = REG_RD(sc, MISC_REG_AEU_GENERAL_MASK); val &= ~(MISC_AEU_GENERAL_MASK_REG_AEU_PXP_CLOSE_MASK | MISC_AEU_GENERAL_MASK_REG_AEU_NIG_CLOSE_MASK); REG_WR(sc, MISC_REG_AEU_GENERAL_MASK, val); } } /* * Cleans the object that have internal lists without sending * ramrods. Should be run when interrutps are disabled. */ static void bxe_squeeze_objects(struct bxe_softc *sc) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; struct ecore_mcast_ramrod_params rparam = { NULL }; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; int rc; /* Cleanup MACs' object first... */ /* Wait for completion of requested */ bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Perform a dry cleanup */ bxe_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags); /* Clean ETH primary MAC */ bxe_set_bit(ECORE_ETH_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean ETH MACs (%d)\n", rc); } /* Cleanup UC list */ vlan_mac_flags = 0; bxe_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean UC list MACs (%d)\n", rc); } /* Now clean mcast object... */ rparam.mcast_obj = &sc->mcast_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags); /* Add a DEL command... */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } /* now wait until all pending commands are cleared */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); while (rc != 0) { if (rc < 0) { BLOGE(sc, "Failed to clean MCAST object (%d)\n", rc); return; } rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); } } /* stop the controller */ static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { uint8_t global = FALSE; uint32_t val; BXE_CORE_LOCK_ASSERT(sc); BLOGD(sc, DBG_LOAD, "Starting NIC unload...\n"); /* mark driver as unloaded in shmem2 */ if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], val & ~DRV_FLAGS_CAPABILITIES_LOADED_L2); } if (IS_PF(sc) && sc->recovery_state != BXE_RECOVERY_DONE && (sc->state == BXE_STATE_CLOSED || sc->state == BXE_STATE_ERROR)) { /* * We can get here if the driver has been unloaded * during parity error recovery and is either waiting for a * leader to complete or for other functions to unload and * then ifconfig down has been issued. In this case we want to * unload and let other functions to complete a recovery * process. */ sc->recovery_state = BXE_RECOVERY_DONE; sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); BLOGD(sc, DBG_LOAD, "Releasing a leadership...\n"); BLOGE(sc, "Can't unload in closed or error state\n"); return (-1); } /* * Nothing to do during unload if previous bxe_nic_load() * did not completed succesfully - all resourses are released. */ if ((sc->state == BXE_STATE_CLOSED) || (sc->state == BXE_STATE_ERROR)) { return (0); } sc->state = BXE_STATE_CLOSING_WAITING_HALT; mb(); /* stop tx */ bxe_tx_disable(sc); sc->rx_mode = BXE_RX_MODE_NONE; /* XXX set rx mode ??? */ if (IS_PF(sc)) { /* set ALWAYS_ALIVE bit in shmem */ sc->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bxe_drv_pulse(sc); bxe_stats_handle(sc, STATS_EVENT_STOP); bxe_save_statistics(sc); } /* wait till consumers catch up with producers in all queues */ bxe_drain_tx_queues(sc); /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ if (IS_VF(sc)) { ; /* bxe_vfpf_close_vf(sc); */ } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip */ bxe_chip_cleanup(sc, unload_mode, keep_link); } else { /* Send the UNLOAD_REQUEST to the MCP */ bxe_send_unload_req(sc, unload_mode); /* * Prevent transactions to host from the functions on the * engine that doesn't reset global blocks in case of global * attention once gloabl blocks are reset and gates are opened * (the engine which leader will perform the recovery * last). */ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, FALSE); } /* * At this stage no more interrupts will arrive so we may safely clean * the queue'able objects here in case they failed to get cleaned so far. */ if (IS_PF(sc)) { bxe_squeeze_objects(sc); } /* There should be no more pending SP commands at this stage */ sc->sp_state = 0; sc->port.pmf = 0; bxe_free_fp_buffers(sc); if (IS_PF(sc)) { bxe_free_mem(sc); } bxe_free_fw_stats_mem(sc); sc->state = BXE_STATE_CLOSED; /* * Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS. */ if (IS_PF(sc) && bxe_chk_parity_attn(sc, &global, FALSE)) { bxe_set_reset_in_progress(sc); /* Set RESET_IS_GLOBAL if needed */ if (global) { bxe_set_reset_global(sc); } } /* * The last driver must disable a "close the gate" if there is no * parity attention or "process kill" pending. */ if (IS_PF(sc) && !bxe_clear_pf_load(sc) && bxe_reset_is_done(sc, SC_PATH(sc))) { bxe_disable_close_the_gate(sc); } BLOGD(sc, DBG_LOAD, "Ended NIC unload\n"); return (0); } /* * Called by the OS to set various media options (i.e. link, speed, etc.) when * the user runs "ifconfig bxe media ..." or "ifconfig bxe mediaopt ...". */ static int bxe_ifmedia_update(struct ifnet *ifp) { struct bxe_softc *sc = (struct bxe_softc *)ifp->if_softc; struct ifmedia *ifm; ifm = &sc->ifmedia; /* We only support Ethernet media type. */ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) { return (EINVAL); } switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_CX4: case IFM_10G_SR: case IFM_10G_T: case IFM_10G_TWINAX: default: /* We don't support changing the media type. */ BLOGD(sc, DBG_LOAD, "Invalid media type (%d)\n", IFM_SUBTYPE(ifm->ifm_media)); return (EINVAL); } return (0); } /* * Called by the OS to get the current media status (i.e. link, speed, etc.). */ static void bxe_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct bxe_softc *sc = ifp->if_softc; /* Report link down if the driver isn't running. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { ifmr->ifm_active |= IFM_NONE; return; } /* Setup the default interface info. */ ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->link_vars.link_up) { ifmr->ifm_status |= IFM_ACTIVE; } else { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_active |= sc->media; if (sc->link_vars.duplex == DUPLEX_FULL) { ifmr->ifm_active |= IFM_FDX; } else { ifmr->ifm_active |= IFM_HDX; } } static int bxe_ioctl_nvram(struct bxe_softc *sc, uint32_t priv_op, struct ifreq *ifr) { struct bxe_nvram_data nvdata_base; struct bxe_nvram_data *nvdata; int len; int error = 0; copyin(ifr->ifr_data, &nvdata_base, sizeof(nvdata_base)); len = (sizeof(struct bxe_nvram_data) + nvdata_base.len - sizeof(uint32_t)); if (len > sizeof(struct bxe_nvram_data)) { if ((nvdata = (struct bxe_nvram_data *) malloc(len, M_DEVBUF, (M_NOWAIT | M_ZERO))) == NULL) { BLOGE(sc, "BXE_IOC_RD_NVRAM malloc failed\n"); return (1); } memcpy(nvdata, &nvdata_base, sizeof(struct bxe_nvram_data)); } else { nvdata = &nvdata_base; } if (priv_op == BXE_IOC_RD_NVRAM) { BLOGD(sc, DBG_IOCTL, "IOC_RD_NVRAM 0x%x %d\n", nvdata->offset, nvdata->len); error = bxe_nvram_read(sc, nvdata->offset, (uint8_t *)nvdata->value, nvdata->len); copyout(nvdata, ifr->ifr_data, len); } else { /* BXE_IOC_WR_NVRAM */ BLOGD(sc, DBG_IOCTL, "IOC_WR_NVRAM 0x%x %d\n", nvdata->offset, nvdata->len); copyin(ifr->ifr_data, nvdata, len); error = bxe_nvram_write(sc, nvdata->offset, (uint8_t *)nvdata->value, nvdata->len); } if (len > sizeof(struct bxe_nvram_data)) { free(nvdata, M_DEVBUF); } return (error); } static int bxe_ioctl_stats_show(struct bxe_softc *sc, uint32_t priv_op, struct ifreq *ifr) { const size_t str_size = (BXE_NUM_ETH_STATS * STAT_NAME_LEN); const size_t stats_size = (BXE_NUM_ETH_STATS * sizeof(uint64_t)); caddr_t p_tmp; uint32_t *offset; int i; switch (priv_op) { case BXE_IOC_STATS_SHOW_NUM: memset(ifr->ifr_data, 0, sizeof(union bxe_stats_show_data)); ((union bxe_stats_show_data *)ifr->ifr_data)->desc.num = BXE_NUM_ETH_STATS; ((union bxe_stats_show_data *)ifr->ifr_data)->desc.len = STAT_NAME_LEN; return (0); case BXE_IOC_STATS_SHOW_STR: memset(ifr->ifr_data, 0, str_size); p_tmp = ifr->ifr_data; for (i = 0; i < BXE_NUM_ETH_STATS; i++) { strcpy(p_tmp, bxe_eth_stats_arr[i].string); p_tmp += STAT_NAME_LEN; } return (0); case BXE_IOC_STATS_SHOW_CNT: memset(ifr->ifr_data, 0, stats_size); p_tmp = ifr->ifr_data; for (i = 0; i < BXE_NUM_ETH_STATS; i++) { offset = ((uint32_t *)&sc->eth_stats + bxe_eth_stats_arr[i].offset); switch (bxe_eth_stats_arr[i].size) { case 4: *((uint64_t *)p_tmp) = (uint64_t)*offset; break; case 8: *((uint64_t *)p_tmp) = HILO_U64(*offset, *(offset + 1)); break; default: *((uint64_t *)p_tmp) = 0; } p_tmp += sizeof(uint64_t); } return (0); default: return (-1); } } static void bxe_handle_chip_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; long work = atomic_load_acq_long(&sc->chip_tq_flags); switch (work) { case CHIP_TQ_START: if ((sc->ifnet->if_flags & IFF_UP) && !(sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { /* start the interface */ BLOGD(sc, DBG_LOAD, "Starting the interface...\n"); BXE_CORE_LOCK(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } break; case CHIP_TQ_STOP: if (!(sc->ifnet->if_flags & IFF_UP) && (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { /* bring down the interface */ BLOGD(sc, DBG_LOAD, "Stopping the interface...\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); BXE_CORE_UNLOCK(sc); } break; case CHIP_TQ_REINIT: if (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING) { /* restart the interface */ BLOGD(sc, DBG_LOAD, "Restarting the interface...\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } break; default: break; } } /* * Handles any IOCTL calls from the operating system. * * Returns: * 0 = Success, >0 Failure */ static int bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct bxe_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct bxe_nvram_data *nvdata; uint32_t priv_op; int mask = 0; int reinit = 0; int error = 0; int mtu_min = (ETH_MIN_PACKET_SIZE - ETH_HLEN); int mtu_max = (MJUM9BYTES - ETH_OVERHEAD - IP_HEADER_ALIGNMENT_PADDING); switch (command) { case SIOCSIFMTU: BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMTU ioctl (mtu=%d)\n", ifr->ifr_mtu); if (sc->mtu == ifr->ifr_mtu) { /* nothing to change */ break; } if ((ifr->ifr_mtu < mtu_min) || (ifr->ifr_mtu > mtu_max)) { BLOGE(sc, "Unsupported MTU size %d (range is %d-%d)\n", ifr->ifr_mtu, mtu_min, mtu_max); error = EINVAL; break; } atomic_store_rel_int((volatile unsigned int *)&sc->mtu, (unsigned long)ifr->ifr_mtu); atomic_store_rel_long((volatile unsigned long *)&ifp->if_mtu, (unsigned long)ifr->ifr_mtu); reinit = 1; break; case SIOCSIFFLAGS: /* toggle the interface state up or down */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFFLAGS ioctl\n"); /* check if the interface is up */ if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* set the receive mode flags */ bxe_set_rx_mode(sc); } else { atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_START); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_STOP); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); } } break; case SIOCADDMULTI: case SIOCDELMULTI: /* add/delete multicast addresses */ BLOGD(sc, DBG_IOCTL, "Received SIOCADDMULTI/SIOCDELMULTI ioctl\n"); /* check if the interface is up */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* set the receive mode flags */ bxe_set_rx_mode(sc); } break; case SIOCSIFCAP: /* find out which capabilities have changed */ mask = (ifr->ifr_reqcap ^ ifp->if_capenable); BLOGD(sc, DBG_IOCTL, "Received SIOCSIFCAP ioctl (mask=0x%08x)\n", mask); /* toggle the LRO capabilites enable flag */ if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; BLOGD(sc, DBG_IOCTL, "Turning LRO %s\n", (ifp->if_capenable & IFCAP_LRO) ? "ON" : "OFF"); reinit = 1; } /* toggle the TXCSUM checksum capabilites enable flag */ if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; BLOGD(sc, DBG_IOCTL, "Turning TXCSUM %s\n", (ifp->if_capenable & IFCAP_TXCSUM) ? "ON" : "OFF"); if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } else { ifp->if_hwassist = 0; } } /* toggle the RXCSUM checksum capabilities enable flag */ if (mask & IFCAP_RXCSUM) { ifp->if_capenable ^= IFCAP_RXCSUM; BLOGD(sc, DBG_IOCTL, "Turning RXCSUM %s\n", (ifp->if_capenable & IFCAP_RXCSUM) ? "ON" : "OFF"); if (ifp->if_capenable & IFCAP_RXCSUM) { ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } else { ifp->if_hwassist = 0; } } /* toggle TSO4 capabilities enabled flag */ if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; BLOGD(sc, DBG_IOCTL, "Turning TSO4 %s\n", (ifp->if_capenable & IFCAP_TSO4) ? "ON" : "OFF"); } /* toggle TSO6 capabilities enabled flag */ if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; BLOGD(sc, DBG_IOCTL, "Turning TSO6 %s\n", (ifp->if_capenable & IFCAP_TSO6) ? "ON" : "OFF"); } /* toggle VLAN_HWTSO capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VLAN_HWTSO; BLOGD(sc, DBG_IOCTL, "Turning VLAN_HWTSO %s\n", (ifp->if_capenable & IFCAP_VLAN_HWTSO) ? "ON" : "OFF"); } /* toggle VLAN_HWCSUM capabilities enabled flag */ if (mask & IFCAP_VLAN_HWCSUM) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWCSUM is not supported!\n"); error = EINVAL; } /* toggle VLAN_MTU capabilities enable flag */ if (mask & IFCAP_VLAN_MTU) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_MTU is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWTAGGING capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTAGGING) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWTAGGING is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWFILTER capabilities enabled flag */ if (mask & IFCAP_VLAN_HWFILTER) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWFILTER is not supported!\n"); error = EINVAL; } /* XXX not yet... * IFCAP_WOL_MAGIC */ break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: /* set/get interface media */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMEDIA/SIOCGIFMEDIA ioctl (cmd=%lu)\n", (command & 0xff)); error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCGPRIVATE_0: copyin(ifr->ifr_data, &priv_op, sizeof(priv_op)); switch (priv_op) { case BXE_IOC_RD_NVRAM: case BXE_IOC_WR_NVRAM: nvdata = (struct bxe_nvram_data *)ifr->ifr_data; BLOGD(sc, DBG_IOCTL, "Received Private NVRAM ioctl addr=0x%x size=%u\n", nvdata->offset, nvdata->len); error = bxe_ioctl_nvram(sc, priv_op, ifr); break; case BXE_IOC_STATS_SHOW_NUM: case BXE_IOC_STATS_SHOW_STR: case BXE_IOC_STATS_SHOW_CNT: BLOGD(sc, DBG_IOCTL, "Received Private Stats ioctl (%d)\n", priv_op); error = bxe_ioctl_stats_show(sc, priv_op, ifr); break; default: BLOGW(sc, "Received Private Unknown ioctl (%d)\n", priv_op); error = EINVAL; break; } break; default: BLOGD(sc, DBG_IOCTL, "Received Unknown Ioctl (cmd=%lu)\n", (command & 0xff)); error = ether_ioctl(ifp, command, data); break; } if (reinit && (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { BLOGD(sc, DBG_LOAD | DBG_IOCTL, "Re-initializing hardware from IOCTL change\n"); atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_REINIT); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); } return (error); } static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents) { char * type; int i = 0; if (!(sc->debug & DBG_MBUF)) { return; } if (m == NULL) { BLOGD(sc, DBG_MBUF, "mbuf: null pointer\n"); return; } while (m) { BLOGD(sc, DBG_MBUF, "%02d: mbuf=%p m_len=%d m_flags=0x%b m_data=%p\n", i, m, m->m_len, m->m_flags, "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY", m->m_data); if (m->m_flags & M_PKTHDR) { BLOGD(sc, DBG_MBUF, "%02d: - m_pkthdr: tot_len=%d flags=0x%b csum_flags=%b\n", i, m->m_pkthdr.len, m->m_flags, "\20\12M_BCAST\13M_MCAST\14M_FRAG" "\15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG" "\22M_PROMISC\23M_NOFREE", (int)m->m_pkthdr.csum_flags, "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP\4CSUM_IP_FRAGS" "\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED" "\12CSUM_IP_VALID\13CSUM_DATA_VALID" "\14CSUM_PSEUDO_HDR"); } if (m->m_flags & M_EXT) { switch (m->m_ext.ext_type) { case EXT_CLUSTER: type = "EXT_CLUSTER"; break; case EXT_SFBUF: type = "EXT_SFBUF"; break; case EXT_JUMBOP: type = "EXT_JUMBOP"; break; case EXT_JUMBO9: type = "EXT_JUMBO9"; break; case EXT_JUMBO16: type = "EXT_JUMBO16"; break; case EXT_PACKET: type = "EXT_PACKET"; break; case EXT_MBUF: type = "EXT_MBUF"; break; case EXT_NET_DRV: type = "EXT_NET_DRV"; break; case EXT_MOD_TYPE: type = "EXT_MOD_TYPE"; break; case EXT_DISPOSABLE: type = "EXT_DISPOSABLE"; break; case EXT_EXTREF: type = "EXT_EXTREF"; break; default: type = "UNKNOWN"; break; } BLOGD(sc, DBG_MBUF, "%02d: - m_ext: %p ext_size=%d type=%s\n", i, m->m_ext.ext_buf, m->m_ext.ext_size, type); } if (contents) { bxe_dump_mbuf_data(sc, "mbuf data", m, TRUE); } m = m->m_next; i++; } } /* * Checks to ensure the 13 bd sliding window is >= MSS for TSO. * Check that (13 total bds - 3 bds) = 10 bd window >= MSS. * The window: 3 bds are = 1 for headers BD + 2 for parse BD and last BD * The headers comes in a seperate bd in FreeBSD so 13-3=10. * Returns: 0 if OK to send, 1 if packet needs further defragmentation */ static int bxe_chktso_window(struct bxe_softc *sc, int nsegs, bus_dma_segment_t *segs, struct mbuf *m) { uint32_t num_wnds, wnd_size, wnd_sum; int32_t frag_idx, wnd_idx; unsigned short lso_mss; int defrag; defrag = 0; wnd_sum = 0; wnd_size = 10; num_wnds = nsegs - wnd_size; lso_mss = htole16(m->m_pkthdr.tso_segsz); /* * Total header lengths Eth+IP+TCP in first FreeBSD mbuf so calculate the * first window sum of data while skipping the first assuming it is the * header in FreeBSD. */ for (frag_idx = 1; (frag_idx <= wnd_size); frag_idx++) { wnd_sum += htole16(segs[frag_idx].ds_len); } /* check the first 10 bd window size */ if (wnd_sum < lso_mss) { return (1); } /* run through the windows */ for (wnd_idx = 0; wnd_idx < num_wnds; wnd_idx++, frag_idx++) { /* subtract the first mbuf->m_len of the last wndw(-header) */ wnd_sum -= htole16(segs[wnd_idx+1].ds_len); /* add the next mbuf len to the len of our new window */ wnd_sum += htole16(segs[frag_idx].ds_len); if (wnd_sum < lso_mss) { return (1); } } return (0); } static uint8_t bxe_set_pbd_csum_e2(struct bxe_fastpath *fp, struct mbuf *m, uint32_t *parsing_data) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; int e_hlen, ip_hlen, l4_off; uint16_t proto; if (m->m_pkthdr.csum_flags == CSUM_IP) { /* no L4 checksum offload needed */ return (0); } /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 2); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = sizeof(struct ip6_hdr); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ l4_off = (e_hlen + ip_hlen); *parsing_data |= (((l4_off >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; th = (struct tcphdr *)(ip + ip_hlen); /* th_off is number of 32-bit words */ *parsing_data |= ((th->th_off << ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW); return (l4_off + (th->th_off << 2)); /* entire header length */ } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; return (l4_off + sizeof(struct udphdr)); /* entire header length */ } else { /* XXX error stat ??? */ return (0); } } static uint8_t bxe_set_pbd_csum(struct bxe_fastpath *fp, struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; struct udphdr *uh = NULL; int e_hlen, ip_hlen; uint16_t proto; uint8_t hlen; uint16_t tmp_csum; uint32_t *tmp_uh; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 1); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = (sizeof(struct ip6_hdr) >> 1); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } hlen = (e_hlen >> 1); /* note that rest of global_data is indirectly zeroed here */ if (m->m_flags & M_VLANTAG) { pbd->global_data = htole16(hlen | (1 << ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT)); } else { pbd->global_data = htole16(hlen); } pbd->ip_hlen_w = ip_hlen; hlen += pbd->ip_hlen_w; /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { th = (struct tcphdr *)(ip + (ip_hlen << 1)); /* th_off is number of 32-bit words */ hlen += (uint16_t)(th->th_off << 1); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { uh = (struct udphdr *)(ip + (ip_hlen << 1)); hlen += (sizeof(struct udphdr) / 2); } else { /* valid case as only CSUM_IP was set */ return (0); } pbd->total_hlen_w = htole16(hlen); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; pbd->tcp_pseudo_csum = ntohs(th->th_sum); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; /* * Everest1 (i.e. 57710, 57711, 57711E) does not natively support UDP * checksums and does not know anything about the UDP header and where * the checksum field is located. It only knows about TCP. Therefore * we "lie" to the hardware for outgoing UDP packets w/ checksum * offload. Since the checksum field offset for TCP is 16 bytes and * for UDP it is 6 bytes we pass a pointer to the hardware that is 10 * bytes less than the start of the UDP header. This allows the * hardware to write the checksum in the correct spot. But the * hardware will compute a checksum which includes the last 10 bytes * of the IP header. To correct this we tweak the stack computed * pseudo checksum by folding in the calculation of the inverse * checksum for those final 10 bytes of the IP header. This allows * the correct checksum to be computed by the hardware. */ /* set pointer 10 bytes before UDP header */ tmp_uh = (uint32_t *)((uint8_t *)uh - 10); /* calculate a pseudo header checksum over the first 10 bytes */ tmp_csum = in_pseudo(*tmp_uh, *(tmp_uh + 1), *(uint16_t *)(tmp_uh + 2)); pbd->tcp_pseudo_csum = ntohs(in_addword(uh->uh_sum, ~tmp_csum)); } return (hlen * 2); /* entire header length, number of bytes */ } static void bxe_set_pbd_lso_e2(struct mbuf *m, uint32_t *parsing_data) { *parsing_data |= ((m->m_pkthdr.tso_segsz << ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & ETH_TX_PARSE_BD_E2_LSO_MSS); /* XXX test for IPv6 with extension header... */ #if 0 struct ip6_hdr *ip6; if (ip6 && ip6->ip6_nxt == 'some ipv6 extension header') *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR; #endif } static void bxe_set_pbd_lso(struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip = NULL; struct tcphdr *th = NULL; int e_hlen; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ e_hlen = (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ? (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) : ETHER_HDR_LEN; /* get the IP and TCP header, with LSO entire header in first mbuf */ /* XXX assuming IPv4 */ ip = (struct ip *)(m->m_data + e_hlen); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); pbd->lso_mss = htole16(m->m_pkthdr.tso_segsz); pbd->tcp_send_seq = ntohl(th->th_seq); pbd->tcp_flags = ((ntohl(((uint32_t *)th)[3]) >> 16) & 0xff); #if 1 /* XXX IPv4 */ pbd->ip_id = ntohs(ip->ip_id); pbd->tcp_pseudo_csum = ntohs(in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP))); #else /* XXX IPv6 */ pbd->tcp_pseudo_csum = ntohs(in_pseudo(&ip6->ip6_src, &ip6->ip6_dst, htons(IPPROTO_TCP))); #endif pbd->global_data |= htole16(ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN); } /* * Encapsulte an mbuf cluster into the tx bd chain and makes the memory * visible to the controller. * * If an mbuf is submitted to this routine and cannot be given to the * controller (e.g. it has too many fragments) then the function may free * the mbuf and return to the caller. * * Returns: * 0 = Success, !0 = Failure * Note the side effect that an mbuf may be freed if it causes a problem. */ static int bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) { bus_dma_segment_t segs[32]; struct mbuf *m0; struct bxe_sw_tx_bd *tx_buf; struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; /* struct eth_tx_parse_2nd_bd *pbd2 = NULL; */ struct eth_tx_bd *tx_data_bd; struct eth_tx_bd *tx_total_pkt_size_bd; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_prod, pkt_prod, total_pkt_size; uint8_t mac_type; int defragged, error, nsegs, rc, nbds, vlan_off, ovlan; struct bxe_softc *sc; uint16_t tx_bd_avail; struct ether_vlan_header *eh; uint32_t pbd_e2_parsing_data = 0; uint8_t hlen = 0; int tmp_bd; int i; sc = fp->sc; M_ASSERTPKTHDR(*m_head); m0 = *m_head; rc = defragged = nbds = ovlan = vlan_off = total_pkt_size = 0; tx_start_bd = NULL; tx_data_bd = NULL; tx_total_pkt_size_bd = NULL; /* get the H/W pointer for packets and BDs */ pkt_prod = fp->tx_pkt_prod; bd_prod = fp->tx_bd_prod; mac_type = UNICAST_ADDRESS; /* map the mbuf into the next open DMAable memory */ tx_buf = &fp->tx_mbuf_chain[TX_BD(pkt_prod)]; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); /* mapping errors */ if(__predict_false(error != 0)) { fp->eth_q_stats.tx_dma_mapping_failure++; if (error == ENOMEM) { /* resource issue, try again later */ rc = ENOMEM; } else if (error == EFBIG) { /* possibly recoverable with defragmentation */ fp->eth_q_stats.mbuf_defrag_attempts++; m0 = m_defrag(*m_head, M_DONTWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; rc = error; } } } else { /* unknown, unrecoverable mapping error */ BLOGE(sc, "Unknown TX mapping error rc=%d\n", error); bxe_dump_mbuf(sc, m0, FALSE); rc = error; } goto bxe_tx_encap_continue; } tx_bd_avail = bxe_tx_avail(sc, fp); /* make sure there is enough room in the send queue */ if (__predict_false(tx_bd_avail < (nsegs + 2))) { /* Recoverable, try again later. */ fp->eth_q_stats.tx_hw_queue_full++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); rc = ENOMEM; goto bxe_tx_encap_continue; } /* capture the current H/W TX chain high watermark */ if (__predict_false(fp->eth_q_stats.tx_hw_max_queue_depth < (TX_BD_USABLE - tx_bd_avail))) { fp->eth_q_stats.tx_hw_max_queue_depth = (TX_BD_USABLE - tx_bd_avail); } /* make sure it fits in the packet window */ if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { /* * The mbuf may be to big for the controller to handle. If the frame * is a TSO frame we'll need to do an additional check. */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { if (bxe_chktso_window(sc, nsegs, segs, m0) == 0) { goto bxe_tx_encap_continue; /* OK to send */ } else { fp->eth_q_stats.tx_window_violation_tso++; } } else { fp->eth_q_stats.tx_window_violation_std++; } /* lets try to defragment this mbuf and remap it */ fp->eth_q_stats.mbuf_defrag_attempts++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); m0 = m_defrag(*m_head, M_DONTWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; /* Ugh, just drop the frame... :( */ rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; /* No sense in trying to defrag/copy chain, drop it. :( */ rc = error; } else { /* if the chain is still too long then drop it */ if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); rc = ENODEV; } } } } bxe_tx_encap_continue: /* Check for errors */ if (rc) { if (rc == ENOMEM) { /* recoverable try again later */ } else { fp->eth_q_stats.tx_soft_errors++; fp->eth_q_stats.mbuf_alloc_tx--; m_freem(*m_head); *m_head = NULL; } return (rc); } /* set flag according to packet type (UNICAST_ADDRESS is default) */ if (m0->m_flags & M_BCAST) { mac_type = BROADCAST_ADDRESS; } else if (m0->m_flags & M_MCAST) { mac_type = MULTICAST_ADDRESS; } /* store the mbuf into the mbuf ring */ tx_buf->m = m0; tx_buf->first_bd = fp->tx_bd_prod; tx_buf->flags = 0; /* prepare the first transmit (start) BD for the mbuf */ tx_start_bd = &fp->tx_chain[TX_BD(bd_prod)].start_bd; BLOGD(sc, DBG_TX, "sending pkt_prod=%u tx_buf=%p next_idx=%u bd=%u tx_start_bd=%p\n", pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_start_bd); tx_start_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); tx_start_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); tx_start_bd->nbytes = htole16(segs[0].ds_len); total_pkt_size += tx_start_bd->nbytes; tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; tx_start_bd->general_data = (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); /* all frames have at least Start BD + Parsing BD */ nbds = nsegs + 1; tx_start_bd->nbd = htole16(nbds); if (m0->m_flags & M_VLANTAG) { tx_start_bd->vlan_or_ethertype = htole16(m0->m_pkthdr.ether_vtag); tx_start_bd->bd_flags.as_bitfield |= (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); } else { /* vf tx, start bd must hold the ethertype for fw to enforce it */ if (IS_VF(sc)) { /* map ethernet header to find type and header length */ eh = mtod(m0, struct ether_vlan_header *); tx_start_bd->vlan_or_ethertype = eh->evl_encap_proto; } else { /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = htole16(fp->tx_pkt_prod); #if 0 /* * If NPAR-SD is active then FW should do the tagging regardless * of value of priority. Otherwise, if priority indicates this is * a control packet we need to indicate to FW to avoid tagging. */ if (!IS_MF_AFEX(sc) && (mbuf priority == PRIO_CONTROL)) { SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_FORCE_VLAN_MODE, 1); } #endif } } /* * add a parsing BD from the chain. The parsing BD is always added * though it is only used for TSO and chksum */ bd_prod = TX_BD_NEXT(bd_prod); if (m0->m_pkthdr.csum_flags) { if (m0->m_pkthdr.csum_flags & CSUM_IP) { fp->eth_q_stats.tx_ofld_frames_csum_ip++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM; } if (m0->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_L4_CSUM); } else if (m0->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_IS_UDP | ETH_TX_BD_FLAGS_L4_CSUM); } else if ((m0->m_pkthdr.csum_flags & CSUM_TCP) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM; } else if (m0->m_pkthdr.csum_flags & CSUM_UDP) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_L4_CSUM | ETH_TX_BD_FLAGS_IS_UDP); } } if (!CHIP_IS_E1x(sc)) { pbd_e2 = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e2; memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum_e2(fp, m0, &pbd_e2_parsing_data); } #if 0 /* * Add the MACs to the parsing BD if the module param was * explicitly set, if this is a vf, or in switch independent * mode. */ if (sc->flags & BXE_TX_SWITCHING || IS_VF(sc) || IS_MF_SI(sc)) { eh = mtod(m0, struct ether_vlan_header *); bxe_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi, &pbd_e2->data.mac_addr.src_mid, &pbd_e2->data.mac_addr.src_lo, eh->evl_shost); bxe_set_fw_mac_addr(&pbd_e2->data.mac_addr.dst_hi, &pbd_e2->data.mac_addr.dst_mid, &pbd_e2->data.mac_addr.dst_lo, eh->evl_dhost); } #endif SET_FLAG(pbd_e2_parsing_data, ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type); } else { uint16_t global_data = 0; pbd_e1x = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e1x; memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum(fp, m0, pbd_e1x); } SET_FLAG(global_data, ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, mac_type); pbd_e1x->global_data |= htole16(global_data); } /* setup the parsing BD with TSO specific info */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { fp->eth_q_stats.tx_ofld_frames_lso++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; if (__predict_false(tx_start_bd->nbytes > hlen)) { fp->eth_q_stats.tx_ofld_frames_lso_hdr_splits++; /* split the first BD into header/data making the fw job easy */ nbds++; tx_start_bd->nbd = htole16(nbds); tx_start_bd->nbytes = htole16(hlen); bd_prod = TX_BD_NEXT(bd_prod); /* new transmit BD after the tx_parse_bd */ tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr + hlen)); tx_data_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr + hlen)); tx_data_bd->nbytes = htole16(segs[0].ds_len - hlen); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } BLOGD(sc, DBG_TX, "TSO split header size is %d (%x:%x) nbds %d\n", le16toh(tx_start_bd->nbytes), le32toh(tx_start_bd->addr_hi), le32toh(tx_start_bd->addr_lo), nbds); } if (!CHIP_IS_E1x(sc)) { bxe_set_pbd_lso_e2(m0, &pbd_e2_parsing_data); } else { bxe_set_pbd_lso(m0, pbd_e1x); } } if (pbd_e2_parsing_data) { pbd_e2->parsing_data = htole32(pbd_e2_parsing_data); } /* prepare remaining BDs, start tx bd contains first seg/frag */ for (i = 1; i < nsegs ; i++) { bd_prod = TX_BD_NEXT(bd_prod); tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_lo = htole32(U64_LO(segs[i].ds_addr)); tx_data_bd->addr_hi = htole32(U64_HI(segs[i].ds_addr)); tx_data_bd->nbytes = htole16(segs[i].ds_len); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } total_pkt_size += tx_data_bd->nbytes; } BLOGD(sc, DBG_TX, "last bd %p\n", tx_data_bd); if (tx_total_pkt_size_bd != NULL) { tx_total_pkt_size_bd->total_pkt_bytes = total_pkt_size; } if (__predict_false(sc->debug & DBG_TX)) { tmp_bd = tx_buf->first_bd; for (i = 0; i < nbds; i++) { if (i == 0) { BLOGD(sc, DBG_TX, "TX Strt: %p bd=%d nbd=%d vlan=0x%x " "bd_flags=0x%x hdr_nbds=%d\n", tx_start_bd, tmp_bd, le16toh(tx_start_bd->nbd), le16toh(tx_start_bd->vlan_or_ethertype), tx_start_bd->bd_flags.as_bitfield, (tx_start_bd->general_data & ETH_TX_START_BD_HDR_NBDS)); } else if (i == 1) { if (pbd_e1x) { BLOGD(sc, DBG_TX, "-> Prse: %p bd=%d global=0x%x ip_hlen_w=%u " "ip_id=%u lso_mss=%u tcp_flags=0x%x csum=0x%x " "tcp_seq=%u total_hlen_w=%u\n", pbd_e1x, tmp_bd, pbd_e1x->global_data, pbd_e1x->ip_hlen_w, pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags, pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq, le16toh(pbd_e1x->total_hlen_w)); } else { /* if (pbd_e2) */ BLOGD(sc, DBG_TX, "-> Parse: %p bd=%d dst=%02x:%02x:%02x " "src=%02x:%02x:%02x parsing_data=0x%x\n", pbd_e2, tmp_bd, pbd_e2->data.mac_addr.dst_hi, pbd_e2->data.mac_addr.dst_mid, pbd_e2->data.mac_addr.dst_lo, pbd_e2->data.mac_addr.src_hi, pbd_e2->data.mac_addr.src_mid, pbd_e2->data.mac_addr.src_lo, pbd_e2->parsing_data); } } if (i != 1) { /* skip parse db as it doesn't hold data */ tx_data_bd = &fp->tx_chain[TX_BD(tmp_bd)].reg_bd; BLOGD(sc, DBG_TX, "-> Frag: %p bd=%d nbytes=%d hi=0x%x lo: 0x%x\n", tx_data_bd, tmp_bd, le16toh(tx_data_bd->nbytes), le32toh(tx_data_bd->addr_hi), le32toh(tx_data_bd->addr_lo)); } tmp_bd = TX_BD_NEXT(tmp_bd); } } BLOGD(sc, DBG_TX, "doorbell: nbds=%d bd=%u\n", nbds, bd_prod); /* update TX BD producer index value for next TX */ bd_prod = TX_BD_NEXT(bd_prod); /* * If the chain of tx_bd's describing this frame is adjacent to or spans * an eth_tx_next_bd element then we need to increment the nbds value. */ if (TX_BD_IDX(bd_prod) < nbds) { nbds++; } /* don't allow reordering of writes for nbd and packets */ mb(); fp->tx_db.data.prod += nbds; /* producer points to the next free tx_bd at this point */ fp->tx_pkt_prod++; fp->tx_bd_prod = bd_prod; DOORBELL(sc, fp->index, fp->tx_db.raw); fp->eth_q_stats.tx_pkts++; /* Prevent speculative reads from getting ahead of the status block. */ bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_READ); /* Prevent speculative reads from getting ahead of the doorbell. */ bus_space_barrier(sc->bar[BAR2].tag, sc->bar[BAR2].handle, 0, 0, BUS_SPACE_BARRIER_READ); return (0); } static void bxe_tx_start_locked(struct bxe_softc *sc, struct ifnet *ifp, struct bxe_fastpath *fp) { struct mbuf *m = NULL; int tx_count = 0; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); /* keep adding entries while there are frames to send */ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { /* * check for any frames to send * dequeue can still be NULL even if queue is not empty */ IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (__predict_false(m == NULL)) { break; } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ if (__predict_false(bxe_tx_encap(fp, &m))) { fp->eth_q_stats.tx_encap_failures++; if (m != NULL) { /* mark the TX queue as full and return the frame */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_queue_xoff++; } /* stop looking for more work */ break; } /* the frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners. */ BPF_MTAP(ifp, m); tx_bd_avail = bxe_tx_avail(sc, fp); /* handle any completions if we're running low */ if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { break; } } } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } } /* Legacy (non-RSS) dispatch routine */ static void bxe_tx_start(struct ifnet *ifp) { struct bxe_softc *sc; struct bxe_fastpath *fp; sc = ifp->if_softc; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { BLOGW(sc, "Interface not running, ignoring transmit request\n"); return; } if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { BLOGW(sc, "Interface TX queue is full, ignoring transmit request\n"); return; } if (!sc->link_vars.link_up) { BLOGW(sc, "Interface link is down, ignoring transmit request\n"); return; } fp = &sc->fp[0]; BXE_FP_TX_LOCK(fp); bxe_tx_start_locked(sc, ifp, fp); BXE_FP_TX_UNLOCK(fp); } #if __FreeBSD_version >= 800000 static int bxe_tx_mq_start_locked(struct bxe_softc *sc, struct ifnet *ifp, struct bxe_fastpath *fp, struct mbuf *m) { struct buf_ring *tx_br = fp->tx_br; struct mbuf *next; int depth, rc, tx_count; uint16_t tx_bd_avail; rc = tx_count = 0; if (!tx_br) { BLOGE(sc, "Multiqueue TX and no buf_ring!\n"); return (EINVAL); } /* fetch the depth of the driver queue */ depth = drbr_inuse(ifp, tx_br); if (depth > fp->eth_q_stats.tx_max_drbr_queue_depth) { fp->eth_q_stats.tx_max_drbr_queue_depth = depth; } BXE_FP_TX_LOCK_ASSERT(fp); if (m == NULL) { /* no new work, check for pending frames */ next = drbr_dequeue(ifp, tx_br); } else if (drbr_needs_enqueue(ifp, tx_br)) { /* have both new and pending work, maintain packet order */ rc = drbr_enqueue(ifp, tx_br, m); if (rc != 0) { fp->eth_q_stats.tx_soft_errors++; goto bxe_tx_mq_start_locked_exit; } next = drbr_dequeue(ifp, tx_br); } else { /* new work only and nothing pending */ next = m; } /* keep adding entries while there are frames to send */ while (next != NULL) { /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ rc = bxe_tx_encap(fp, &next); if (__predict_false(rc != 0)) { fp->eth_q_stats.tx_encap_failures++; if (next != NULL) { /* mark the TX queue as full and save the frame */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* XXX this may reorder the frame */ rc = drbr_enqueue(ifp, tx_br, next); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_frames_deferred++; } /* stop looking for more work */ break; } /* the transmit frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners */ BPF_MTAP(ifp, next); tx_bd_avail = bxe_tx_avail(sc, fp); /* handle any completions if we're running low */ if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { break; } } next = drbr_dequeue(ifp, tx_br); } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } bxe_tx_mq_start_locked_exit: return (rc); } /* Multiqueue (TSS) dispatch routine. */ static int bxe_tx_mq_start(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc = ifp->if_softc; struct bxe_fastpath *fp; int fp_index, rc; fp_index = 0; /* default is the first queue */ /* change the queue if using flow ID */ if ((m->m_flags & M_FLOWID) != 0) { fp_index = (m->m_pkthdr.flowid % sc->num_queues); } fp = &sc->fp[fp_index]; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { BLOGW(sc, "Interface not running, ignoring transmit request\n"); return (ENETDOWN); } if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { BLOGW(sc, "Interface TX queue is full, ignoring transmit request\n"); return (EBUSY); } if (!sc->link_vars.link_up) { BLOGW(sc, "Interface link is down, ignoring transmit request\n"); return (ENETDOWN); } /* XXX change to TRYLOCK here and if failed then schedule taskqueue */ BXE_FP_TX_LOCK(fp); rc = bxe_tx_mq_start_locked(sc, ifp, fp, m); BXE_FP_TX_UNLOCK(fp); return (rc); } static void bxe_mq_flush(struct ifnet *ifp) { struct bxe_softc *sc = ifp->if_softc; struct bxe_fastpath *fp; struct mbuf *m; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->state != BXE_FP_STATE_OPEN) { BLOGD(sc, DBG_LOAD, "Not clearing fp[%02d] buf_ring (state=%d)\n", fp->index, fp->state); continue; } if (fp->tx_br != NULL) { BLOGD(sc, DBG_LOAD, "Clearing fp[%02d] buf_ring\n", fp->index); BXE_FP_TX_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) { m_freem(m); } BXE_FP_TX_UNLOCK(fp); } } if_qflush(ifp); } #endif /* FreeBSD_version >= 800000 */ static uint16_t bxe_cid_ilt_lines(struct bxe_softc *sc) { if (IS_SRIOV(sc)) { return ((BXE_FIRST_VF_CID + BXE_VF_CIDS) / ILT_PAGE_CIDS); } return (L2_ILT_LINES(sc)); } static void bxe_ilt_set_info(struct bxe_softc *sc) { struct ilt_client_info *ilt_client; struct ecore_ilt *ilt = sc->ilt; uint16_t line = 0; ilt->start_line = FUNC_ILT_BASE(SC_FUNC(sc)); BLOGD(sc, DBG_LOAD, "ilt starts at line %d\n", ilt->start_line); /* CDU */ ilt_client = &ilt->clients[ILT_CLIENT_CDU]; ilt_client->client_num = ILT_CLIENT_CDU; ilt_client->page_size = CDU_ILT_PAGE_SZ; ilt_client->flags = ILT_CLIENT_SKIP_MEM; ilt_client->start = line; line += bxe_cid_ilt_lines(sc); if (CNIC_SUPPORT(sc)) { line += CNIC_ILT_LINES; } ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[CDU]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* QM */ if (QM_INIT(sc->qm_cid_count)) { ilt_client = &ilt->clients[ILT_CLIENT_QM]; ilt_client->client_num = ILT_CLIENT_QM; ilt_client->page_size = QM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; /* 4 bytes for each cid */ line += DIV_ROUND_UP(sc->qm_cid_count * QM_QUEUES_PER_FUNC * 4, QM_ILT_PAGE_SZ); ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[QM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } if (CNIC_SUPPORT(sc)) { /* SRC */ ilt_client = &ilt->clients[ILT_CLIENT_SRC]; ilt_client->client_num = ILT_CLIENT_SRC; ilt_client->page_size = SRC_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += SRC_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[SRC]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* TM */ ilt_client = &ilt->clients[ILT_CLIENT_TM]; ilt_client->client_num = ILT_CLIENT_TM; ilt_client->page_size = TM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += TM_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[TM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } KASSERT((line <= ILT_MAX_LINES), ("Invalid number of ILT lines!")); } static void bxe_set_fp_rx_buf_size(struct bxe_softc *sc) { int i; BLOGD(sc, DBG_LOAD, "mtu = %d\n", sc->mtu); for (i = 0; i < sc->num_queues; i++) { /* get the Rx buffer size for RX frames */ sc->fp[i].rx_buf_size = (IP_HEADER_ALIGNMENT_PADDING + ETH_OVERHEAD + sc->mtu); BLOGD(sc, DBG_LOAD, "rx_buf_size for fp[%02d] = %d\n", i, sc->fp[i].rx_buf_size); /* get the mbuf allocation size for RX frames */ if (sc->fp[i].rx_buf_size <= MCLBYTES) { sc->fp[i].mbuf_alloc_size = MCLBYTES; } else if (sc->fp[i].rx_buf_size <= BCM_PAGE_SIZE) { sc->fp[i].mbuf_alloc_size = PAGE_SIZE; } else { sc->fp[i].mbuf_alloc_size = MJUM9BYTES; } BLOGD(sc, DBG_LOAD, "mbuf_alloc_size for fp[%02d] = %d\n", i, sc->fp[i].mbuf_alloc_size); } } static int bxe_alloc_ilt_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt = (struct ecore_ilt *)malloc(sizeof(struct ecore_ilt), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static int bxe_alloc_ilt_lines_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt->lines = (struct ilt_line *)malloc((sizeof(struct ilt_line) * ILT_MAX_LINES), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static void bxe_free_ilt_mem(struct bxe_softc *sc) { if (sc->ilt != NULL) { free(sc->ilt, M_BXE_ILT); sc->ilt = NULL; } } static void bxe_free_ilt_lines_mem(struct bxe_softc *sc) { if (sc->ilt->lines != NULL) { free(sc->ilt->lines, M_BXE_ILT); sc->ilt->lines = NULL; } } static void bxe_free_mem(struct bxe_softc *sc) { int i; #if 0 if (!CONFIGURE_NIC_MODE(sc)) { /* free searcher T2 table */ bxe_dma_free(sc, &sc->t2); } #endif for (i = 0; i < L2_ILT_LINES(sc); i++) { bxe_dma_free(sc, &sc->context[i].vcxt_dma); sc->context[i].vcxt = NULL; sc->context[i].size = 0; } ecore_ilt_mem_op(sc, ILT_MEMOP_FREE); bxe_free_ilt_lines_mem(sc); #if 0 bxe_iov_free_mem(sc); #endif } static int bxe_alloc_mem(struct bxe_softc *sc) { int context_size; int allocated; int i; #if 0 if (!CONFIGURE_NIC_MODE(sc)) { /* allocate searcher T2 table */ if (bxe_dma_alloc(sc, SRC_T2_SZ, &sc->t2, "searcher t2 table") != 0) { return (-1); } } #endif /* * Allocate memory for CDU context: * This memory is allocated separately and not in the generic ILT * functions because CDU differs in few aspects: * 1. There can be multiple entities allocating memory for context - * regular L2, CNIC, and SRIOV drivers. Each separately controls * its own ILT lines. * 2. Since CDU page-size is not a single 4KB page (which is the case * for the other ILT clients), to be efficient we want to support * allocation of sub-page-size in the last entry. * 3. Context pointers are used by the driver to pass to FW / update * the context (for the other ILT clients the pointers are used just to * free the memory during unload). */ context_size = (sizeof(union cdu_context) * BXE_L2_CID_COUNT(sc)); for (i = 0, allocated = 0; allocated < context_size; i++) { sc->context[i].size = min(CDU_ILT_PAGE_SZ, (context_size - allocated)); if (bxe_dma_alloc(sc, sc->context[i].size, &sc->context[i].vcxt_dma, "cdu context") != 0) { bxe_free_mem(sc); return (-1); } sc->context[i].vcxt = (union cdu_context *)sc->context[i].vcxt_dma.vaddr; allocated += sc->context[i].size; } bxe_alloc_ilt_lines_mem(sc); BLOGD(sc, DBG_LOAD, "ilt=%p start_line=%u lines=%p\n", sc->ilt, sc->ilt->start_line, sc->ilt->lines); { for (i = 0; i < 4; i++) { BLOGD(sc, DBG_LOAD, "c%d page_size=%u start=%u end=%u num=%u flags=0x%x\n", i, sc->ilt->clients[i].page_size, sc->ilt->clients[i].start, sc->ilt->clients[i].end, sc->ilt->clients[i].client_num, sc->ilt->clients[i].flags); } } if (ecore_ilt_mem_op(sc, ILT_MEMOP_ALLOC)) { BLOGE(sc, "ecore_ilt_mem_op ILT_MEMOP_ALLOC failed\n"); bxe_free_mem(sc); return (-1); } #if 0 if (bxe_iov_alloc_mem(sc)) { BLOGE(sc, "Failed to allocate memory for SRIOV\n"); bxe_free_mem(sc); return (-1); } #endif return (0); } static void bxe_free_rx_bd_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } /* free all mbufs and unload all maps */ for (i = 0; i < RX_BD_TOTAL; i++) { if (fp->rx_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map); } if (fp->rx_mbuf_chain[i].m != NULL) { m_freem(fp->rx_mbuf_chain[i].m); fp->rx_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_rx--; } } } static void bxe_free_tpa_pool(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i, max_agg_queues; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } max_agg_queues = MAX_AGG_QS(sc); /* release all mbufs and unload all DMA maps in the TPA pool */ for (i = 0; i < max_agg_queues; i++) { if (fp->rx_tpa_info[i].bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map); } if (fp->rx_tpa_info[i].bd.m != NULL) { m_freem(fp->rx_tpa_info[i].bd.m); fp->rx_tpa_info[i].bd.m = NULL; fp->eth_q_stats.mbuf_alloc_tpa--; } } } static void bxe_free_sge_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_sge_mbuf_tag == NULL) { return; } /* rree all mbufs and unload all maps */ for (i = 0; i < RX_SGE_TOTAL; i++) { if (fp->rx_sge_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map); } if (fp->rx_sge_mbuf_chain[i].m != NULL) { m_freem(fp->rx_sge_mbuf_chain[i].m); fp->rx_sge_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_sge--; } } } static void bxe_free_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; #if __FreeBSD_version >= 800000 if (fp->tx_br != NULL) { struct mbuf *m; /* just in case bxe_mq_flush() wasn't called */ while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) { m_freem(m); } buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } #endif /* free all RX buffers */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); if (fp->eth_q_stats.mbuf_alloc_rx != 0) { BLOGE(sc, "failed to claim all rx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_rx); } if (fp->eth_q_stats.mbuf_alloc_sge != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_sge); } if (fp->eth_q_stats.mbuf_alloc_tpa != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tpa); } if (fp->eth_q_stats.mbuf_alloc_tx != 0) { BLOGE(sc, "failed to release tx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tx); } /* XXX verify all mbufs were reclaimed */ if (mtx_initialized(&fp->tx_mtx)) { mtx_destroy(&fp->tx_mtx); } if (mtx_initialized(&fp->rx_mtx)) { mtx_destroy(&fp->rx_mtx); } } } static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index) { struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs, rc; rc = 0; /* allocate the new RX BD mbuf */ m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_bd_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_rx++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_bd_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_rx--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing RX BD mbuf mappings */ if (prev_index != index) { rx_buf = &fp->rx_mbuf_chain[prev_index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We only get here from bxe_rxeof() when the maximum number * of rx buffers is less than RX_BD_USABLE. bxe_rxeof() already * holds the mbuf in the prev_index so it's OK to NULL it out * here without concern of a memory leak. */ fp->rx_mbuf_chain[prev_index].m = NULL; } rx_buf = &fp->rx_mbuf_chain[index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = (prev_index != index) ? fp->rx_mbuf_chain[prev_index].m_map : rx_buf->m_map; rx_buf->m_map = fp->rx_mbuf_spare_map; fp->rx_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_PREREAD); rx_buf->m = m; rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); rx_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue) { struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate the new TPA mbuf */ m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_tpa_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_tpa++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_tpa_mapping_failed++; m_free(m); fp->eth_q_stats.mbuf_alloc_tpa--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing TPA mbuf mapping */ if (tpa_info->bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, tpa_info->bd.m_map); } /* save the mbuf and mapping info for the TPA mbuf */ map = tpa_info->bd.m_map; tpa_info->bd.m_map = fp->rx_tpa_info_mbuf_spare_map; fp->rx_tpa_info_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_PREREAD); tpa_info->bd.m = m; tpa_info->seg = segs[0]; return (rc); } /* * Allocate an mbuf and assign it to the receive scatter gather chain. The * caller must take care to save a copy of the existing mbuf in the SG mbuf * chain. */ static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index) { struct bxe_sw_rx_bd *sge_buf; struct eth_rx_sge *sge; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate a new SGE mbuf */ m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, SGE_PAGE_SIZE); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_sge_alloc_failed++; return (ENOMEM); } fp->eth_q_stats.mbuf_alloc_sge++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = SGE_PAGE_SIZE; /* map the SGE mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_sge_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_sge--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); sge_buf = &fp->rx_sge_mbuf_chain[index]; /* release any existing SGE mbuf mapping */ if (sge_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, sge_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = sge_buf->m_map; sge_buf->m_map = fp->rx_sge_mbuf_spare_map; fp->rx_sge_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_PREREAD); sge_buf->m = m; sge = &fp->rx_sge_chain[index]; sge->addr_hi = htole32(U64_HI(segs[0].ds_addr)); sge->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static __noinline int bxe_alloc_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, j, rc = 0; int ring_prod, cqe_ring_prod; int max_agg_queues; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; #if __FreeBSD_version >= 800000 fp->tx_br = buf_ring_alloc(BXE_BR_SIZE, M_DEVBUF, M_DONTWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) { BLOGE(sc, "buf_ring alloc fail for fp[%02d]\n", i); goto bxe_alloc_fp_buffers_error; } #endif ring_prod = cqe_ring_prod = 0; fp->rx_bd_cons = 0; fp->rx_cq_cons = 0; /* allocate buffers for the RX BDs in RX BD chain */ for (j = 0; j < sc->max_rx_bufs; j++) { rc = bxe_alloc_rx_bd_mbuf(fp, ring_prod, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", i, rc); goto bxe_alloc_fp_buffers_error; } ring_prod = RX_BD_NEXT(ring_prod); cqe_ring_prod = RCQ_NEXT(cqe_ring_prod); } fp->rx_bd_prod = ring_prod; fp->rx_cq_prod = cqe_ring_prod; fp->eth_q_stats.rx_calls = fp->eth_q_stats.rx_pkts = 0; if (sc->ifnet->if_capenable & IFCAP_LRO) { max_agg_queues = MAX_AGG_QS(sc); fp->tpa_enable = TRUE; /* fill the TPA pool */ for (j = 0; j < max_agg_queues; j++) { rc = bxe_alloc_rx_tpa_mbuf(fp, j); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] TPA queue %d\n", i, j); fp->tpa_enable = FALSE; goto bxe_alloc_fp_buffers_error; } fp->rx_tpa_info[j].state = BXE_TPA_STATE_STOP; } if (fp->tpa_enable) { /* fill the RX SGE chain */ ring_prod = 0; for (j = 0; j < RX_SGE_USABLE; j++) { rc = bxe_alloc_rx_sge_mbuf(fp, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] SGE %d\n", i, ring_prod); fp->tpa_enable = FALSE; ring_prod = 0; goto bxe_alloc_fp_buffers_error; } ring_prod = RX_SGE_NEXT(ring_prod); } fp->rx_sge_prod = ring_prod; } } } return (0); bxe_alloc_fp_buffers_error: /* unwind what was already allocated */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); return (ENOBUFS); } static void bxe_free_fw_stats_mem(struct bxe_softc *sc) { bxe_dma_free(sc, &sc->fw_stats_dma); sc->fw_stats_num = 0; sc->fw_stats_req_size = 0; sc->fw_stats_req = NULL; sc->fw_stats_req_mapping = 0; sc->fw_stats_data_size = 0; sc->fw_stats_data = NULL; sc->fw_stats_data_mapping = 0; } static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc) { uint8_t num_queue_stats; int num_groups; /* number of queues for statistics is number of eth queues */ num_queue_stats = BXE_NUM_ETH_QUEUES(sc); /* * Total number of FW statistics requests = * 1 for port stats + 1 for PF stats + num of queues */ sc->fw_stats_num = (2 + num_queue_stats); /* * Request is built from stats_query_header and an array of * stats_query_cmd_group each of which contains STATS_QUERY_CMD_COUNT * rules. The real number or requests is configured in the * stats_query_header. */ num_groups = ((sc->fw_stats_num / STATS_QUERY_CMD_COUNT) + ((sc->fw_stats_num % STATS_QUERY_CMD_COUNT) ? 1 : 0)); BLOGD(sc, DBG_LOAD, "stats fw_stats_num %d num_groups %d\n", sc->fw_stats_num, num_groups); sc->fw_stats_req_size = (sizeof(struct stats_query_header) + (num_groups * sizeof(struct stats_query_cmd_group))); /* * Data for statistics requests + stats_counter. * stats_counter holds per-STORM counters that are incremented when * STORM has finished with the current request. Memory for FCoE * offloaded statistics are counted anyway, even if they will not be sent. * VF stats are not accounted for here as the data of VF stats is stored * in memory allocated by the VF, not here. */ sc->fw_stats_data_size = (sizeof(struct stats_counter) + sizeof(struct per_port_stats) + sizeof(struct per_pf_stats) + /* sizeof(struct fcoe_statistics_params) + */ (sizeof(struct per_queue_stats) * num_queue_stats)); if (bxe_dma_alloc(sc, (sc->fw_stats_req_size + sc->fw_stats_data_size), &sc->fw_stats_dma, "fw stats") != 0) { bxe_free_fw_stats_mem(sc); return (-1); } /* set up the shortcuts */ sc->fw_stats_req = (struct bxe_fw_stats_req *)sc->fw_stats_dma.vaddr; sc->fw_stats_req_mapping = sc->fw_stats_dma.paddr; sc->fw_stats_data = (struct bxe_fw_stats_data *)((uint8_t *)sc->fw_stats_dma.vaddr + sc->fw_stats_req_size); sc->fw_stats_data_mapping = (sc->fw_stats_dma.paddr + sc->fw_stats_req_size); BLOGD(sc, DBG_LOAD, "statistics request base address set to %#jx\n", (uintmax_t)sc->fw_stats_req_mapping); BLOGD(sc, DBG_LOAD, "statistics data base address set to %#jx\n", (uintmax_t)sc->fw_stats_data_mapping); return (0); } /* * Bits map: * 0-7 - Engine0 load counter. * 8-15 - Engine1 load counter. * 16 - Engine0 RESET_IN_PROGRESS bit. * 17 - Engine1 RESET_IN_PROGRESS bit. * 18 - Engine0 ONE_IS_LOADED. Set when there is at least one active * function on the engine * 19 - Engine1 ONE_IS_LOADED. * 20 - Chip reset flow bit. When set none-leader must wait for both engines * leader to complete (check for both RESET_IN_PROGRESS bits and not * for just the one belonging to its engine). */ #define BXE_RECOVERY_GLOB_REG MISC_REG_GENERIC_POR_1 #define BXE_PATH0_LOAD_CNT_MASK 0x000000ff #define BXE_PATH0_LOAD_CNT_SHIFT 0 #define BXE_PATH1_LOAD_CNT_MASK 0x0000ff00 #define BXE_PATH1_LOAD_CNT_SHIFT 8 #define BXE_PATH0_RST_IN_PROG_BIT 0x00010000 #define BXE_PATH1_RST_IN_PROG_BIT 0x00020000 #define BXE_GLOBAL_RESET_BIT 0x00040000 /* set the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_set_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val | BXE_GLOBAL_RESET_BIT); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_clear_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val & (~BXE_GLOBAL_RESET_BIT)); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* checks the GLOBAL_RESET bit, should be run under rtnl lock */ static uint8_t bxe_reset_is_global(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "GLOB_REG=0x%08x\n", val); return (val & BXE_GLOBAL_RESET_BIT) ? TRUE : FALSE; } /* clear RESET_IN_PROGRESS bit for the engine, should be run under rtnl lock */ static void bxe_set_reset_done(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Clear the bit */ val &= ~bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* set RESET_IN_PROGRESS for the engine, should be run under rtnl lock */ static void bxe_set_reset_in_progress(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Set the bit */ val |= bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* check RESET_IN_PROGRESS bit for an engine, should be run under rtnl lock */ static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); uint32_t bit = engine ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; /* return false if bit is set */ return (val & bit) ? FALSE : TRUE; } /* get the load status for an engine, should be run under rtnl lock */ static uint8_t bxe_get_load_status(struct bxe_softc *sc, int engine) { uint32_t mask = engine ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = engine ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); val = ((val & mask) >> shift); BLOGD(sc, DBG_LOAD, "Load mask engine %d = 0x%08x\n", engine, val); return (val != 0); } /* set pf load mark */ /* XXX needs to be under rtnl lock */ static void bxe_set_pf_load(struct bxe_softc *sc) { uint32_t val; uint32_t val1; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); /* get the current counter value */ val1 = ((val & mask) >> shift); /* set bit of this PF */ val1 |= (1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear pf load mark */ /* XXX needs to be under rtnl lock */ static uint8_t bxe_clear_pf_load(struct bxe_softc *sc) { uint32_t val1, val; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old GEN_REG_VAL=0x%08x\n", val); /* get the current counter value */ val1 = (val & mask) >> shift; /* clear bit of that PF */ val1 &= ~(1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); return (val1 != 0); } /* send load requrest to mcp and analyze response */ static int bxe_nic_load_request(struct bxe_softc *sc, uint32_t *load_code) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "initial fw_seq 0x%04x\n", sc->fw_seq); /* get the current FW pulse sequence */ sc->fw_drv_pulse_wr_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb) & DRV_PULSE_SEQ_MASK); BLOGD(sc, DBG_LOAD, "initial drv_pulse 0x%04x\n", sc->fw_drv_pulse_wr_seq); /* load request */ (*load_code) = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); /* if the MCP fails to respond we must abort */ if (!(*load_code)) { BLOGE(sc, "MCP response failure!\n"); return (-1); } /* if MCP refused then must abort */ if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) { BLOGE(sc, "MCP refused load request\n"); return (-1); } return (0); } /* * Check whether another PF has already loaded FW to chip. In virtualized * environments a pf from anoth VM may have already initialized the device * including loading FW. */ static int bxe_nic_load_analyze_req(struct bxe_softc *sc, uint32_t load_code) { uint32_t my_fw, loaded_fw; /* is another pf loaded on this engine? */ if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { /* build my FW version dword */ my_fw = (BCM_5710_FW_MAJOR_VERSION + (BCM_5710_FW_MINOR_VERSION << 8 ) + (BCM_5710_FW_REVISION_VERSION << 16) + (BCM_5710_FW_ENGINEERING_VERSION << 24)); /* read loaded FW from chip */ loaded_fw = REG_RD(sc, XSEM_REG_PRAM); BLOGD(sc, DBG_LOAD, "loaded FW 0x%08x / my FW 0x%08x\n", loaded_fw, my_fw); /* abort nic load if version mismatch */ if (my_fw != loaded_fw) { BLOGE(sc, "FW 0x%08x already loaded (mine is 0x%08x)", loaded_fw, my_fw); return (-1); } } return (0); } /* mark PMF if applicable */ static void bxe_nic_load_pmf(struct bxe_softc *sc, uint32_t load_code) { uint32_t ncsi_oem_data_addr; if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) || (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) || (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) { /* * Barrier here for ordering between the writing to sc->port.pmf here * and reading it from the periodic task. */ sc->port.pmf = 1; mb(); } else { sc->port.pmf = 0; } BLOGD(sc, DBG_LOAD, "pmf %d\n", sc->port.pmf); /* XXX needed? */ if (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) { if (SHMEM2_HAS(sc, ncsi_oem_data_addr)) { ncsi_oem_data_addr = SHMEM2_RD(sc, ncsi_oem_data_addr); if (ncsi_oem_data_addr) { REG_WR(sc, (ncsi_oem_data_addr + offsetof(struct glob_ncsi_oem_data, driver_version)), 0); } } } } static void bxe_read_mf_cfg(struct bxe_softc *sc) { int n = (CHIP_IS_MODE_4_PORT(sc) ? 2 : 1); int abs_func; int vn; if (BXE_NOMCP(sc)) { return; /* what should be the default bvalue in this case */ } /* * The formula for computing the absolute function number is... * For 2 port configuration (4 functions per port): * abs_func = 2 * vn + SC_PORT + SC_PATH * For 4 port configuration (2 functions per port): * abs_func = 4 * vn + 2 * SC_PORT + SC_PATH */ for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { abs_func = (n * (2 * vn + SC_PORT(sc)) + SC_PATH(sc)); if (abs_func >= E1H_FUNC_MAX) { break; } sc->devinfo.mf_info.mf_config[vn] = MFCFG_RD(sc, func_mf_config[abs_func].config); } if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_LOAD, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; } else { BLOGD(sc, DBG_LOAD, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; } } /* acquire split MCP access lock register */ static int bxe_acquire_alr(struct bxe_softc *sc) { uint32_t j, val; for (j = 0; j < 1000; j++) { val = (1UL << 31); REG_WR(sc, GRCBASE_MCP + 0x9c, val); val = REG_RD(sc, GRCBASE_MCP + 0x9c); if (val & (1L << 31)) break; DELAY(5000); } if (!(val & (1L << 31))) { BLOGE(sc, "Cannot acquire MCP access lock register\n"); return (-1); } return (0); } /* release split MCP access lock register */ static void bxe_release_alr(struct bxe_softc *sc) { REG_WR(sc, GRCBASE_MCP + 0x9c, 0); } static void bxe_fan_failure(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t ext_phy_config; /* mark the failure */ ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); ext_phy_config &= ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK; ext_phy_config |= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE; SHMEM_WR(sc, dev_info.port_hw_config[port].external_phy_config, ext_phy_config); /* log the failure */ BLOGW(sc, "Fan Failure has caused the driver to shutdown " "the card to prevent permanent damage. " "Please contact OEM Support for assistance\n"); /* XXX */ #if 1 bxe_panic(sc, ("Schedule task to handle fan failure\n")); #else /* * Schedule device reset (unload) * This is due to some boards consuming sufficient power when driver is * up to overheat if fan fails. */ bxe_set_bit(BXE_SP_RTNL_FAN_FAILURE, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); #endif } /* this function is called upon a link interrupt */ static void bxe_link_attn(struct bxe_softc *sc) { uint32_t pause_enabled = 0; struct host_port_stats *pstats; int cmng_fns; /* Make sure that we are synced with the current statistics */ bxe_stats_handle(sc, STATS_EVENT_STOP); elink_link_update(&sc->link_params, &sc->link_vars); if (sc->link_vars.link_up) { /* dropless flow control */ if (!CHIP_IS_E1(sc) && sc->dropless_fc) { pause_enabled = 0; if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { pause_enabled = 1; } REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_ETH_PAUSE_ENABLED_OFFSET(SC_PORT(sc))), pause_enabled); } if (sc->link_vars.mac_type != ELINK_MAC_TYPE_EMAC) { pstats = BXE_SP(sc, port_stats); /* reset old mac stats */ memset(&(pstats->mac_stx[0]), 0, sizeof(struct mac_stx)); } if (sc->state == BXE_STATE_OPEN) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } } if (sc->link_vars.link_up && sc->link_vars.line_speed) { cmng_fns = bxe_get_cmng_fns_mode(sc); if (cmng_fns != CMNG_FNS_NONE) { bxe_cmng_fns_init(sc, FALSE, cmng_fns); storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } else { /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "single function mode without fairness\n"); } } bxe_link_report_locked(sc); if (IS_MF(sc)) { ; // XXX bxe_link_sync_notify(sc); } } static void bxe_attn_int_asserted(struct bxe_softc *sc, uint32_t asserted) { int port = SC_PORT(sc); uint32_t aeu_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; uint32_t nig_int_mask_addr = port ? NIG_REG_MASK_INTERRUPT_PORT1 : NIG_REG_MASK_INTERRUPT_PORT0; uint32_t aeu_mask; uint32_t nig_mask = 0; uint32_t reg_addr; uint32_t igu_acked; uint32_t cnt; if (sc->attn_state & asserted) { BLOGE(sc, "IGU ERROR attn=0x%08x\n", asserted); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, aeu_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly asserted 0x%08x\n", aeu_mask, asserted); aeu_mask &= ~(asserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, aeu_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state |= asserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); if (asserted & ATTN_HARD_WIRED_MASK) { if (asserted & ATTN_NIG_FOR_FUNC) { BXE_PHY_LOCK(sc); /* save nig interrupt mask */ nig_mask = REG_RD(sc, nig_int_mask_addr); /* If nig_mask is not set, no need to call the update function */ if (nig_mask) { REG_WR(sc, nig_int_mask_addr, 0); bxe_link_attn(sc); } /* handle unicore attn? */ } if (asserted & ATTN_SW_TIMER_4_FUNC) { BLOGD(sc, DBG_INTR, "ATTN_SW_TIMER_4_FUNC!\n"); } if (asserted & GPIO_2_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_2_FUNC!\n"); } if (asserted & GPIO_3_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_3_FUNC!\n"); } if (asserted & GPIO_4_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_4_FUNC!\n"); } if (port == 0) { if (asserted & ATTN_GENERAL_ATTN_1) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_1!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_1, 0x0); } if (asserted & ATTN_GENERAL_ATTN_2) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_2!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_2, 0x0); } if (asserted & ATTN_GENERAL_ATTN_3) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_3!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_3, 0x0); } } else { if (asserted & ATTN_GENERAL_ATTN_4) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_4!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_4, 0x0); } if (asserted & ATTN_GENERAL_ATTN_5) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_5!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_5, 0x0); } if (asserted & ATTN_GENERAL_ATTN_6) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_6!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_6, 0x0); } } } /* hardwired */ if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_SET); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_SET_UPPER*8); } BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", asserted, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, asserted); /* now set back the mask */ if (asserted & ATTN_NIG_FOR_FUNC) { /* * Verify that IGU ack through BAR was written before restoring * NIG mask. This loop should exit after 2-3 iterations max. */ if (sc->devinfo.int_block != INT_BLOCK_HC) { cnt = 0; do { igu_acked = REG_RD(sc, IGU_REG_ATTENTION_ACK_BITS); } while (((igu_acked & ATTN_NIG_FOR_FUNC) == 0) && (++cnt < MAX_IGU_ATTN_ACK_TO)); if (!igu_acked) { BLOGE(sc, "Failed to verify IGU ack on time\n"); } mb(); } REG_WR(sc, nig_int_mask_addr, nig_mask); BXE_PHY_UNLOCK(sc); } } static void bxe_print_next_block(struct bxe_softc *sc, int idx, const char *blk) { BLOGI(sc, "%s%s", idx ? ", " : "", blk); } static int bxe_check_blocks_with_parity0(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "BRB"); break; case AEU_INPUTS_ATTN_BITS_PARSER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PARSER"); break; case AEU_INPUTS_ATTN_BITS_TSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSDM"); break; case AEU_INPUTS_ATTN_BITS_SEARCHER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "SEARCHER"); break; case AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TCM"); break; case AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSEMI"); break; case AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XPB"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity1(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { int i = 0; uint32_t cur_bit = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PBF_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PBF"); break; case AEU_INPUTS_ATTN_BITS_QM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "QM"); break; case AEU_INPUTS_ATTN_BITS_TIMERS_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TM"); break; case AEU_INPUTS_ATTN_BITS_XSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSDM"); break; case AEU_INPUTS_ATTN_BITS_XCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XCM"); break; case AEU_INPUTS_ATTN_BITS_XSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSEMI"); break; case AEU_INPUTS_ATTN_BITS_DOORBELLQ_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DOORBELLQ"); break; case AEU_INPUTS_ATTN_BITS_NIG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "NIG"); break; case AEU_INPUTS_ATTN_BITS_VAUX_PCI_CORE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "VAUX PCI CORE"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_DEBUG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DEBUG"); break; case AEU_INPUTS_ATTN_BITS_USDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USDM"); break; case AEU_INPUTS_ATTN_BITS_UCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UCM"); break; case AEU_INPUTS_ATTN_BITS_USEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USEMI"); break; case AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UPB"); break; case AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSDM"); break; case AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CCM"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity2(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_CSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSEMI"); break; case AEU_INPUTS_ATTN_BITS_PXP_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXP"); break; case AEU_IN_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXPPCICLOCKCLIENT"); break; case AEU_INPUTS_ATTN_BITS_CFC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CFC"); break; case AEU_INPUTS_ATTN_BITS_CDU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CDU"); break; case AEU_INPUTS_ATTN_BITS_DMAE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DMAE"); break; case AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "IGU"); break; case AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "MISC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity3(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP ROM"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP RX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP TX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP SCPAD"); *global = TRUE; break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity4(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PGLUE_B"); break; case AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "ATC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static uint8_t bxe_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print, uint32_t *sig) { int par_num = 0; if ((sig[0] & HW_PRTY_ASSERT_SET_0) || (sig[1] & HW_PRTY_ASSERT_SET_1) || (sig[2] & HW_PRTY_ASSERT_SET_2) || (sig[3] & HW_PRTY_ASSERT_SET_3) || (sig[4] & HW_PRTY_ASSERT_SET_4)) { BLOGE(sc, "Parity error: HW block parity attention:\n" "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n", (uint32_t)(sig[0] & HW_PRTY_ASSERT_SET_0), (uint32_t)(sig[1] & HW_PRTY_ASSERT_SET_1), (uint32_t)(sig[2] & HW_PRTY_ASSERT_SET_2), (uint32_t)(sig[3] & HW_PRTY_ASSERT_SET_3), (uint32_t)(sig[4] & HW_PRTY_ASSERT_SET_4)); if (print) BLOGI(sc, "Parity errors detected in blocks: "); par_num = bxe_check_blocks_with_parity0(sc, sig[0] & HW_PRTY_ASSERT_SET_0, par_num, print); par_num = bxe_check_blocks_with_parity1(sc, sig[1] & HW_PRTY_ASSERT_SET_1, par_num, global, print); par_num = bxe_check_blocks_with_parity2(sc, sig[2] & HW_PRTY_ASSERT_SET_2, par_num, print); par_num = bxe_check_blocks_with_parity3(sc, sig[3] & HW_PRTY_ASSERT_SET_3, par_num, global, print); par_num = bxe_check_blocks_with_parity4(sc, sig[4] & HW_PRTY_ASSERT_SET_4, par_num, print); if (print) BLOGI(sc, "\n"); return (TRUE); } return (FALSE); } static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print) { struct attn_route attn = { {0} }; int port = SC_PORT(sc); attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); if (!CHIP_IS_E1x(sc)) attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); return (bxe_parity_attn(sc, global, print, attn.sig)); } static void bxe_attn_int_deasserted4(struct bxe_softc *sc, uint32_t attn) { uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_PGLUE_HW_INTERRUPT) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS_CLR); BLOGE(sc, "PGLUE hw attention 0x%08x\n", val); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW\n"); } if (attn & AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT) { val = REG_RD(sc, ATC_REG_ATC_INT_STS_CLR); BLOGE(sc, "ATC hw attention 0x%08x\n", val); if (val & ATC_ATC_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ADDRESS_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND\n"); if (val & ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS\n"); if (val & ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU\n"); } if (attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)) { BLOGE(sc, "FATAL parity attention set4 0x%08x\n", (uint32_t)(attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR))); } } static void bxe_e1h_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); bxe_tx_disable(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } static void bxe_e1h_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); // XXX bxe_tx_enable(sc); } /* * called due to MCP event (on pmf): * reread new bandwidth configuration * configure FW * notify others function about the change */ static void bxe_config_mf_bw(struct bxe_softc *sc) { if (sc->link_vars.link_up) { bxe_cmng_fns_init(sc, TRUE, CMNG_FNS_MINMAX); // XXX bxe_link_sync_notify(sc); } storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } static void bxe_set_mf_bw(struct bxe_softc *sc) { bxe_config_mf_bw(sc); bxe_fw_command(sc, DRV_MSG_CODE_SET_MF_BW_ACK, 0); } static void bxe_handle_eee_event(struct bxe_softc *sc) { BLOGD(sc, DBG_INTR, "EEE - LLDP event\n"); bxe_fw_command(sc, DRV_MSG_CODE_EEE_RESULTS_ACK, 0); } #define DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED 3 static void bxe_drv_info_ether_stat(struct bxe_softc *sc) { struct eth_stats_info *ether_stat = &sc->sp->drv_info_to_mcp.ether_stat; strlcpy(ether_stat->version, BXE_DRIVER_VERSION, ETH_STAT_INFO_VERSION_LEN); /* XXX (+ MAC_PAD) taken from other driver... verify this is right */ sc->sp_objs[0].mac_obj.get_n_elements(sc, &sc->sp_objs[0].mac_obj, DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, ether_stat->mac_local + MAC_PAD, MAC_PAD, ETH_ALEN); ether_stat->mtu_size = sc->mtu; ether_stat->feature_flags |= FEATURE_ETH_CHKSUM_OFFLOAD_MASK; if (sc->ifnet->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) { ether_stat->feature_flags |= FEATURE_ETH_LSO_MASK; } // XXX ether_stat->feature_flags |= ???; ether_stat->promiscuous_mode = 0; // (flags & PROMISC) ? 1 : 0; ether_stat->txq_size = sc->tx_ring_size; ether_stat->rxq_size = sc->rx_ring_size; } static void bxe_handle_drv_info_req(struct bxe_softc *sc) { enum drv_info_opcode op_code; uint32_t drv_info_ctl = SHMEM2_RD(sc, drv_info_control); /* if drv_info version supported by MFW doesn't match - send NACK */ if ((drv_info_ctl & DRV_INFO_CONTROL_VER_MASK) != DRV_INFO_CUR_VER) { bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } op_code = ((drv_info_ctl & DRV_INFO_CONTROL_OP_CODE_MASK) >> DRV_INFO_CONTROL_OP_CODE_SHIFT); memset(&sc->sp->drv_info_to_mcp, 0, sizeof(union drv_info_to_mcp)); switch (op_code) { case ETH_STATS_OPCODE: bxe_drv_info_ether_stat(sc); break; case FCOE_STATS_OPCODE: case ISCSI_STATS_OPCODE: default: /* if op code isn't supported - send NACK */ bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } /* * If we got drv_info attn from MFW then these fields are defined in * shmem2 for sure */ SHMEM2_WR(sc, drv_info_host_addr_lo, U64_LO(BXE_SP_MAPPING(sc, drv_info_to_mcp))); SHMEM2_WR(sc, drv_info_host_addr_hi, U64_HI(BXE_SP_MAPPING(sc, drv_info_to_mcp))); bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_ACK, 0); } static void bxe_dcc_event(struct bxe_softc *sc, uint32_t dcc_event) { BLOGD(sc, DBG_INTR, "dcc_event 0x%08x\n", dcc_event); if (dcc_event & DRV_STATUS_DCC_DISABLE_ENABLE_PF) { /* * This is the only place besides the function initialization * where the sc->flags can change so it is done without any * locks */ if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_INTR, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; bxe_e1h_disable(sc); } else { BLOGD(sc, DBG_INTR, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; bxe_e1h_enable(sc); } dcc_event &= ~DRV_STATUS_DCC_DISABLE_ENABLE_PF; } if (dcc_event & DRV_STATUS_DCC_BANDWIDTH_ALLOCATION) { bxe_config_mf_bw(sc); dcc_event &= ~DRV_STATUS_DCC_BANDWIDTH_ALLOCATION; } /* Report results to MCP */ if (dcc_event) bxe_fw_command(sc, DRV_MSG_CODE_DCC_FAILURE, 0); else bxe_fw_command(sc, DRV_MSG_CODE_DCC_OK, 0); } static void bxe_pmf_update(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; sc->port.pmf = 1; BLOGD(sc, DBG_INTR, "pmf %d\n", sc->port.pmf); /* * We need the mb() to ensure the ordering between the writing to * sc->port.pmf here and reading it from the bxe_periodic_task(). */ mb(); /* queue a periodic task */ // XXX schedule task... // XXX bxe_dcbx_pmf_update(sc); /* enable nig attention */ val = (0xff0f | (1 << (SC_VN(sc) + 4))); if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, val); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, val); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); } bxe_stats_handle(sc, STATS_EVENT_PMF); } static int bxe_mc_assert(struct bxe_softc *sc) { char last_idx; int i, rc = 0; uint32_t row0, row1, row2, row3; /* XSTORM */ last_idx = REG_RD8(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) BLOGE(sc, "XSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "XSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* TSTORM */ last_idx = REG_RD8(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "TSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "TSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* CSTORM */ last_idx = REG_RD8(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "CSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "CSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* USTORM */ last_idx = REG_RD8(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "USTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "USTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } return (rc); } static void bxe_attn_int_deasserted3(struct bxe_softc *sc, uint32_t attn) { int func = SC_FUNC(sc); uint32_t val; if (attn & EVEREST_GEN_ATTN_IN_USE_MASK) { if (attn & BXE_PMF_LINK_ASSERT(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); bxe_read_mf_cfg(sc); sc->devinfo.mf_info.mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); val = SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_status); if (val & DRV_STATUS_DCC_EVENT_MASK) bxe_dcc_event(sc, (val & DRV_STATUS_DCC_EVENT_MASK)); if (val & DRV_STATUS_SET_MF_BW) bxe_set_mf_bw(sc); if (val & DRV_STATUS_DRV_INFO_REQ) bxe_handle_drv_info_req(sc); #if 0 if (val & DRV_STATUS_VF_DISABLED) bxe_vf_handle_flr_event(sc); #endif if ((sc->port.pmf == 0) && (val & DRV_STATUS_PMF)) bxe_pmf_update(sc); #if 0 if (sc->port.pmf && (val & DRV_STATUS_DCBX_NEGOTIATION_RESULTS) && (sc->dcbx_enabled > 0)) /* start dcbx state machine */ bxe_dcbx_set_params(sc, BXE_DCBX_STATE_NEG_RECEIVED); #endif #if 0 if (val & DRV_STATUS_AFEX_EVENT_MASK) bxe_handle_afex_cmd(sc, val & DRV_STATUS_AFEX_EVENT_MASK); #endif if (val & DRV_STATUS_EEE_NEGOTIATION_RESULTS) bxe_handle_eee_event(sc); if (sc->link_vars.periodic_flags & ELINK_PERIODIC_FLAGS_LINK_EVENT) { /* sync with link */ BXE_PHY_LOCK(sc); sc->link_vars.periodic_flags &= ~ELINK_PERIODIC_FLAGS_LINK_EVENT; BXE_PHY_UNLOCK(sc); if (IS_MF(sc)) ; // XXX bxe_link_sync_notify(sc); bxe_link_report(sc); } /* * Always call it here: bxe_link_report() will * prevent the link indication duplication. */ bxe_link_status_update(sc); } else if (attn & BXE_MC_ASSERT_BITS) { BLOGE(sc, "MC assert!\n"); bxe_mc_assert(sc); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_10, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_9, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_8, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_7, 0); bxe_panic(sc, ("MC assert!\n")); } else if (attn & BXE_MCP_ASSERT) { BLOGE(sc, "MCP assert!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_11, 0); // XXX bxe_fw_dump(sc); } else { BLOGE(sc, "Unknown HW assert! (attn 0x%08x)\n", attn); } } if (attn & EVEREST_LATCHED_ATTN_IN_USE_MASK) { BLOGE(sc, "LATCHED attention 0x%08x (masked)\n", attn); if (attn & BXE_GRC_TIMEOUT) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_TIMEOUT_ATTN); BLOGE(sc, "GRC time-out 0x%08x\n", val); } if (attn & BXE_GRC_RSV) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_RSV_ATTN); BLOGE(sc, "GRC reserved 0x%08x\n", val); } REG_WR(sc, MISC_REG_AEU_CLR_LATCH_SIGNAL, 0x7ff); } } static void bxe_attn_int_deasserted2(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val0, mask0, val1, mask1; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_CFC_HW_INTERRUPT) { val = REG_RD(sc, CFC_REG_CFC_INT_STS_CLR); BLOGE(sc, "CFC hw attention 0x%08x\n", val); /* CFC error attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from CFC\n"); } } if (attn & AEU_INPUTS_ATTN_BITS_PXP_HW_INTERRUPT) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_0); BLOGE(sc, "PXP hw attention-0 0x%08x\n", val); /* RQ_USDMDP_FIFO_OVERFLOW */ if (val & 0x18000) { BLOGE(sc, "FATAL error from PXP\n"); } if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_1); BLOGE(sc, "PXP hw attention-1 0x%08x\n", val); } } #define PXP2_EOP_ERROR_BIT PXP2_PXP2_INT_STS_CLR_0_REG_WR_PGLUE_EOP_ERROR #define AEU_PXP2_HW_INT_BIT AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_HW_INTERRUPT if (attn & AEU_PXP2_HW_INT_BIT) { /* CQ47854 workaround do not panic on * PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR */ if (!CHIP_IS_E1x(sc)) { mask0 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_0); val1 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_1); mask1 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_1); val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_0); /* * If the olny PXP2_EOP_ERROR_BIT is set in * STS0 and STS1 - clear it * * probably we lose additional attentions between * STS0 and STS_CLR0, in this case user will not * be notified about them */ if (val0 & mask0 & PXP2_EOP_ERROR_BIT && !(val1 & mask1)) val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); /* print the register, since no one can restore it */ BLOGE(sc, "PXP2_REG_PXP2_INT_STS_CLR_0 0x%08x\n", val0); /* * if PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR * then notify */ if (val0 & PXP2_EOP_ERROR_BIT) { BLOGE(sc, "PXP2_WR_PGLUE_EOP_ERROR\n"); /* * if only PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR is * set then clear attention from PXP2 block without panic */ if (((val0 & mask0) == PXP2_EOP_ERROR_BIT) && ((val1 & mask1) == 0)) attn &= ~AEU_PXP2_HW_INT_BIT; } } } if (attn & HW_INTERRUT_ASSERT_SET_2) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_2 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_2); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set2 0x%x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_2)); bxe_panic(sc, ("HW block attention set2\n")); } } static void bxe_attn_int_deasserted1(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT) { val = REG_RD(sc, DORQ_REG_DORQ_INT_STS_CLR); BLOGE(sc, "DB hw attention 0x%08x\n", val); /* DORQ discard attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from DORQ\n"); } } if (attn & HW_INTERRUT_ASSERT_SET_1) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_1 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_1); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set1 0x%08x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_1)); bxe_panic(sc, ("HW block attention set1\n")); } } static void bxe_attn_int_deasserted0(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; if (attn & AEU_INPUTS_ATTN_BITS_SPIO5) { val = REG_RD(sc, reg_offset); val &= ~AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_offset, val); BLOGW(sc, "SPIO5 hw attention\n"); /* Fan failure attention */ elink_hw_reset_phy(&sc->link_params); bxe_fan_failure(sc); } if ((attn & sc->link_vars.aeu_int_mask) && sc->port.pmf) { BXE_PHY_LOCK(sc); elink_handle_module_detect_int(&sc->link_params); BXE_PHY_UNLOCK(sc); } if (attn & HW_INTERRUT_ASSERT_SET_0) { val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_0); REG_WR(sc, reg_offset, val); bxe_panic(sc, ("FATAL HW block attention set0 0x%lx\n", (attn & HW_INTERRUT_ASSERT_SET_0))); } } static void bxe_attn_int_deasserted(struct bxe_softc *sc, uint32_t deasserted) { struct attn_route attn; struct attn_route *group_mask; int port = SC_PORT(sc); int index; uint32_t reg_addr; uint32_t val; uint32_t aeu_mask; uint8_t global = FALSE; /* * Need to take HW lock because MCP or other port might also * try to handle this event. */ bxe_acquire_alr(sc); if (bxe_chk_parity_attn(sc, &global, TRUE)) { /* XXX * In case of parity errors don't handle attentions so that * other function would "see" parity errors. */ sc->recovery_state = BXE_RECOVERY_INIT; // XXX schedule a recovery task... /* disable HW interrupts */ bxe_int_disable(sc); bxe_release_alr(sc); return; } attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); if (!CHIP_IS_E1x(sc)) { attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); } else { attn.sig[4] = 0; } BLOGD(sc, DBG_INTR, "attn: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", attn.sig[0], attn.sig[1], attn.sig[2], attn.sig[3], attn.sig[4]); for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { if (deasserted & (1 << index)) { group_mask = &sc->attn_group[index]; BLOGD(sc, DBG_INTR, "group[%d]: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", index, group_mask->sig[0], group_mask->sig[1], group_mask->sig[2], group_mask->sig[3], group_mask->sig[4]); bxe_attn_int_deasserted4(sc, attn.sig[4] & group_mask->sig[4]); bxe_attn_int_deasserted3(sc, attn.sig[3] & group_mask->sig[3]); bxe_attn_int_deasserted1(sc, attn.sig[1] & group_mask->sig[1]); bxe_attn_int_deasserted2(sc, attn.sig[2] & group_mask->sig[2]); bxe_attn_int_deasserted0(sc, attn.sig[0] & group_mask->sig[0]); } } bxe_release_alr(sc); if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_CLR); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_CLR_UPPER*8); } val = ~deasserted; BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", val, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, val); if (~sc->attn_state & deasserted) { BLOGE(sc, "IGU error\n"); } reg_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, reg_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly deasserted 0x%08x\n", aeu_mask, deasserted); aeu_mask |= (deasserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, reg_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state &= ~deasserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); } static void bxe_attn_int(struct bxe_softc *sc) { /* read local copy of bits */ uint32_t attn_bits = le32toh(sc->def_sb->atten_status_block.attn_bits); uint32_t attn_ack = le32toh(sc->def_sb->atten_status_block.attn_bits_ack); uint32_t attn_state = sc->attn_state; /* look for changed bits */ uint32_t asserted = attn_bits & ~attn_ack & ~attn_state; uint32_t deasserted = ~attn_bits & attn_ack & attn_state; BLOGD(sc, DBG_INTR, "attn_bits 0x%08x attn_ack 0x%08x asserted 0x%08x deasserted 0x%08x\n", attn_bits, attn_ack, asserted, deasserted); if (~(attn_bits ^ attn_ack) & (attn_bits ^ attn_state)) { BLOGE(sc, "BAD attention state\n"); } /* handle bits that were raised */ if (asserted) { bxe_attn_int_asserted(sc, asserted); } if (deasserted) { bxe_attn_int_deasserted(sc, deasserted); } } static uint16_t bxe_update_dsb_idx(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; uint16_t rc = 0; mb(); /* status block is written to by the chip */ if (sc->def_att_idx != def_sb->atten_status_block.attn_bits_index) { sc->def_att_idx = def_sb->atten_status_block.attn_bits_index; rc |= BXE_DEF_SB_ATT_IDX; } if (sc->def_idx != def_sb->sp_sb.running_index) { sc->def_idx = def_sb->sp_sb.running_index; rc |= BXE_DEF_SB_IDX; } mb(); return (rc); } static inline struct ecore_queue_sp_obj * bxe_cid_to_q_obj(struct bxe_softc *sc, uint32_t cid) { BLOGD(sc, DBG_SP, "retrieving fp from cid %d\n", cid); return (&sc->sp_objs[CID_TO_FP(cid, sc)].q_obj); } static void bxe_handle_mcast_eqe(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam; int rc; memset(&rparam, 0, sizeof(rparam)); rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* clear pending state for the last command */ sc->mcast_obj.raw.clear_pending(&sc->mcast_obj.raw); /* if there are pending mcast commands - send them */ if (sc->mcast_obj.check_pending(&sc->mcast_obj)) { rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); if (rc < 0) { BLOGD(sc, DBG_SP, "ERROR: Failed to send pending mcast commands (%d)\n", rc); } } BXE_MCAST_UNLOCK(sc); } static void bxe_handle_classification_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { unsigned long ramrod_flags = 0; int rc = 0; uint32_t cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; struct ecore_vlan_mac_obj *vlan_mac_obj; /* always push next commands out, don't wait here */ bit_set(&ramrod_flags, RAMROD_CONT); switch (le32toh(elem->message.data.eth_event.echo) >> BXE_SWCID_SHIFT) { case ECORE_FILTER_MAC_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MAC completions\n"); vlan_mac_obj = &sc->sp_objs[cid].mac_obj; break; case ECORE_FILTER_MCAST_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MCAST completions\n"); /* * This is only relevant for 57710 where multicast MACs are * configured as unicast MACs using the same ramrod. */ bxe_handle_mcast_eqe(sc); return; default: BLOGE(sc, "Unsupported classification command: %d\n", elem->message.data.eth_event.echo); return; } rc = vlan_mac_obj->complete(sc, vlan_mac_obj, elem, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to schedule new commands (%d)\n", rc); } else if (rc > 0) { BLOGD(sc, DBG_SP, "Scheduled next pending commands...\n"); } } static void bxe_handle_rx_mode_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { bxe_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); /* send rx_mode command again if was requested */ if (bxe_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) { bxe_set_storm_rx_mode(sc); } #if 0 else if (bxe_test_and_clear_bit(ECORE_FILTER_ISCSI_ETH_START_SCHED, &sc->sp_state)) { bxe_set_iscsi_eth_rx_mode(sc, TRUE); } else if (bxe_test_and_clear_bit(ECORE_FILTER_ISCSI_ETH_STOP_SCHED, &sc->sp_state)) { bxe_set_iscsi_eth_rx_mode(sc, FALSE); } #endif } static void bxe_update_eq_prod(struct bxe_softc *sc, uint16_t prod) { storm_memset_eq_prod(sc, prod, SC_FUNC(sc)); wmb(); /* keep prod updates ordered */ } static void bxe_eq_int(struct bxe_softc *sc) { uint16_t hw_cons, sw_cons, sw_prod; union event_ring_elem *elem; uint8_t echo; uint32_t cid; uint8_t opcode; int spqe_cnt = 0; struct ecore_queue_sp_obj *q_obj; struct ecore_func_sp_obj *f_obj = &sc->func_obj; struct ecore_raw_obj *rss_raw = &sc->rss_conf_obj.raw; hw_cons = le16toh(*sc->eq_cons_sb); /* * The hw_cons range is 1-255, 257 - the sw_cons range is 0-254, 256. * when we get to the next-page we need to adjust so the loop * condition below will be met. The next element is the size of a * regular element and hence incrementing by 1 */ if ((hw_cons & EQ_DESC_MAX_PAGE) == EQ_DESC_MAX_PAGE) { hw_cons++; } /* * This function may never run in parallel with itself for a * specific sc and no need for a read memory barrier here. */ sw_cons = sc->eq_cons; sw_prod = sc->eq_prod; BLOGD(sc, DBG_SP,"EQ: hw_cons=%u sw_cons=%u eq_spq_left=0x%lx\n", hw_cons, sw_cons, atomic_load_acq_long(&sc->eq_spq_left)); for (; sw_cons != hw_cons; sw_prod = NEXT_EQ_IDX(sw_prod), sw_cons = NEXT_EQ_IDX(sw_cons)) { elem = &sc->eq[EQ_DESC(sw_cons)]; #if 0 int rc; rc = bxe_iov_eq_sp_event(sc, elem); if (!rc) { BLOGE(sc, "bxe_iov_eq_sp_event returned %d\n", rc); goto next_spqe; } #endif /* elem CID originates from FW, actually LE */ cid = SW_CID(elem->message.data.cfc_del_event.cid); opcode = elem->message.opcode; /* handle eq element */ switch (opcode) { #if 0 case EVENT_RING_OPCODE_VF_PF_CHANNEL: BLOGD(sc, DBG_SP, "vf/pf channel element on eq\n"); bxe_vf_mbx(sc, &elem->message.data.vf_pf_event); continue; #endif case EVENT_RING_OPCODE_STAT_QUERY: BLOGD(sc, DBG_SP, "got statistics completion event %d\n", sc->stats_comp++); /* nothing to do with stats comp */ goto next_spqe; case EVENT_RING_OPCODE_CFC_DEL: /* handle according to cid range */ /* we may want to verify here that the sc state is HALTING */ BLOGD(sc, DBG_SP, "got delete ramrod for MULTI[%d]\n", cid); q_obj = bxe_cid_to_q_obj(sc, cid); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_CFC_DEL)) { break; } goto next_spqe; case EVENT_RING_OPCODE_STOP_TRAFFIC: BLOGD(sc, DBG_SP, "got STOP TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_STOP)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_PAUSED); goto next_spqe; case EVENT_RING_OPCODE_START_TRAFFIC: BLOGD(sc, DBG_SP, "got START TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_START)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_RELEASED); goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_UPDATE: echo = elem->message.data.function_update_event.echo; if (echo == SWITCH_UPDATE) { BLOGD(sc, DBG_SP, "got FUNC_SWITCH_UPDATE ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_SWITCH_UPDATE)) { break; } } else { BLOGD(sc, DBG_SP, "AFEX: ramrod completed FUNCTION_UPDATE\n"); #if 0 f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_AFEX_UPDATE); /* * We will perform the queues update from the sp_core_task as * all queue SP operations should run with CORE_LOCK. */ bxe_set_bit(BXE_SP_CORE_AFEX_F_UPDATE, &sc->sp_core_state); taskqueue_enqueue(sc->sp_tq, &sc->sp_tq_task); #endif } goto next_spqe; #if 0 case EVENT_RING_OPCODE_AFEX_VIF_LISTS: f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_AFEX_VIFLISTS); bxe_after_afex_vif_lists(sc, elem); goto next_spqe; #endif case EVENT_RING_OPCODE_FORWARD_SETUP: q_obj = &bxe_fwd_sp_obj(sc, q_obj); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_SETUP_TX_ONLY)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_START: BLOGD(sc, DBG_SP, "got FUNC_START ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_START)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_STOP: BLOGD(sc, DBG_SP, "got FUNC_STOP ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_STOP)) { break; } goto next_spqe; } switch (opcode | sc->state) { case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPENING_WAITING_PORT): cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; BLOGD(sc, DBG_SP, "got RSS_UPDATE ramrod. CID %d\n", cid); rss_raw->clear_pending(rss_raw); break; case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_CLOSING_WAITING_HALT): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got (un)set mac ramrod\n"); bxe_handle_classification_eqe(sc, elem); break; case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got mcast ramrod\n"); bxe_handle_mcast_eqe(sc); break; case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got rx_mode ramrod\n"); bxe_handle_rx_mode_eqe(sc, elem); break; default: /* unknown event log error and continue */ BLOGE(sc, "Unknown EQ event %d, sc->state 0x%x\n", elem->message.opcode, sc->state); } next_spqe: spqe_cnt++; } /* for */ mb(); atomic_add_acq_long(&sc->eq_spq_left, spqe_cnt); sc->eq_cons = sw_cons; sc->eq_prod = sw_prod; /* make sure that above mem writes were issued towards the memory */ wmb(); /* update producer */ bxe_update_eq_prod(sc, sc->eq_prod); } static void bxe_handle_sp_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; uint16_t status; BLOGD(sc, DBG_SP, "---> SP TASK <---\n"); /* what work needs to be performed? */ status = bxe_update_dsb_idx(sc); BLOGD(sc, DBG_SP, "dsb status 0x%04x\n", status); /* HW attentions */ if (status & BXE_DEF_SB_ATT_IDX) { BLOGD(sc, DBG_SP, "---> ATTN INTR <---\n"); bxe_attn_int(sc); status &= ~BXE_DEF_SB_ATT_IDX; } /* SP events: STAT_QUERY and others */ if (status & BXE_DEF_SB_IDX) { /* handle EQ completions */ BLOGD(sc, DBG_SP, "---> EQ INTR <---\n"); bxe_eq_int(sc); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, le16toh(sc->def_idx), IGU_INT_NOP, 1); status &= ~BXE_DEF_SB_IDX; } /* if status is non zero then something went wrong */ if (__predict_false(status)) { BLOGE(sc, "Got an unknown SP interrupt! (0x%04x)\n", status); } /* ack status block only if something was actually handled */ bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, le16toh(sc->def_att_idx), IGU_INT_ENABLE, 1); /* * Must be called after the EQ processing (since eq leads to sriov * ramrod completion flows). * This flow may have been scheduled by the arrival of a ramrod * completion, or by the sriov code rescheduling itself. */ // XXX bxe_iov_sp_task(sc); #if 0 /* AFEX - poll to check if VIFSET_ACK should be sent to MFW */ if (bxe_test_and_clear_bit(ECORE_AFEX_PENDING_VIFSET_MCP_ACK, &sc->sp_state)) { bxe_link_report(sc); bxe_fw_command(sc, DRV_MSG_CODE_AFEX_VIFSET_ACK, 0); } #endif } static void bxe_handle_fp_tq(void *context, int pending) { struct bxe_fastpath *fp = (struct bxe_fastpath *)context; struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK QUEUE (%d) <---\n", fp->index); /* XXX * IFF_DRV_RUNNING state can't be checked here since we process * slowpath events on a client queue during setup. Instead * we need to add a "process/continue" flag here that the driver * can use to tell the task here not to do anything. */ #if 0 if (!(sc->ifnet->if_drv_flags & IFF_DRV_RUNNING)) { return; } #endif /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do */ taskqueue_enqueue_fast(fp->tq, &fp->tq_task); return; } bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } static void bxe_task_fp(struct bxe_fastpath *fp) { struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK ISR (%d) <---\n", fp->index); /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do, bail out if this ISR and process later */ taskqueue_enqueue_fast(fp->tq, &fp->tq_task); return; } /* * Here we write the fastpath index taken before doing any tx or rx work. * It is very well possible other hw events occurred up to this point and * they were actually processed accordingly above. Since we're going to * write an older fastpath index, an interrupt is coming which we might * not do any work in. */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } /* * Legacy interrupt entry point. * * Verifies that the controller generated the interrupt and * then calls a separate routine to handle the various * interrupt causes: link, RX, and TX. */ static void bxe_intr_legacy(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; struct bxe_fastpath *fp; uint16_t status, mask; int i; BLOGD(sc, DBG_INTR, "---> BXE INTx <---\n"); #if 0 /* Don't handle any interrupts if we're not ready. */ if (__predict_false(sc->intr_sem != 0)) { return; } #endif /* * 0 for ustorm, 1 for cstorm * the bits returned from ack_int() are 0-15 * bit 0 = attention status block * bit 1 = fast path status block * a mask of 0x2 or more = tx/rx event * a mask of 1 = slow path event */ status = bxe_ack_int(sc); /* the interrupt is not for us */ if (__predict_false(status == 0)) { BLOGD(sc, DBG_INTR, "Not our interrupt!\n"); return; } BLOGD(sc, DBG_INTR, "Interrupt status 0x%04x\n", status); FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; mask = (0x2 << (fp->index + CNIC_SUPPORT(sc))); if (status & mask) { /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); status &= ~mask; } } #if 0 if (CNIC_SUPPORT(sc)) { mask = 0x2; if (status & (mask | 0x1)) { ... status &= ~mask; } } #endif if (__predict_false(status & 0x1)) { /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue_fast(sc->sp_tq, &sc->sp_tq_task); status &= ~0x1; } if (__predict_false(status)) { BLOGW(sc, "Unexpected fastpath status (0x%08x)!\n", status); } } /* slowpath interrupt entry point */ static void bxe_intr_sp(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BLOGD(sc, (DBG_INTR | DBG_SP), "---> SP INTR <---\n"); /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue_fast(sc->sp_tq, &sc->sp_tq_task); } /* fastpath interrupt entry point */ static void bxe_intr_fp(void *xfp) { struct bxe_fastpath *fp = (struct bxe_fastpath *)xfp; struct bxe_softc *sc = fp->sc; BLOGD(sc, DBG_INTR, "---> FP INTR %d <---\n", fp->index); BLOGD(sc, DBG_INTR, "(cpu=%d) MSI-X fp=%d fw_sb=%d igu_sb=%d\n", curcpu, fp->index, fp->fw_sb_id, fp->igu_sb_id); #if 0 /* Don't handle any interrupts if we're not ready. */ if (__predict_false(sc->intr_sem != 0)) { return; } #endif /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); } /* Release all interrupts allocated by the driver. */ static void bxe_interrupt_free(struct bxe_softc *sc) { int i; switch (sc->interrupt_mode) { case INTR_MODE_INTX: BLOGD(sc, DBG_LOAD, "Releasing legacy INTx vector\n"); if (sc->intr[0].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[0].rid, sc->intr[0].resource); } break; case INTR_MODE_MSI: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; case INTR_MODE_MSIX: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI-X vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; default: /* nothing to do as initial allocation failed */ break; } } /* * This function determines and allocates the appropriate * interrupt based on system capabilites and user request. * * The user may force a particular interrupt mode, specify * the number of receive queues, specify the method for * distribuitng received frames to receive queues, or use * the default settings which will automatically select the * best supported combination. In addition, the OS may or * may not support certain combinations of these settings. * This routine attempts to reconcile the settings requested * by the user with the capabilites available from the system * to select the optimal combination of features. * * Returns: * 0 = Success, !0 = Failure. */ static int bxe_interrupt_alloc(struct bxe_softc *sc) { int msix_count = 0; int msi_count = 0; int num_requested = 0; int num_allocated = 0; int rid, i, j; int rc; /* get the number of available MSI/MSI-X interrupts from the OS */ if (sc->interrupt_mode > 0) { if (sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) { msix_count = pci_msix_count(sc->dev); } if (sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) { msi_count = pci_msi_count(sc->dev); } BLOGD(sc, DBG_LOAD, "%d MSI and %d MSI-X vectors available\n", msi_count, msix_count); } do { /* try allocating MSI-X interrupt resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSIX) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) == 0) || (msix_count < 2)) { sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } /* ask for the necessary number of MSI-X vectors */ num_requested = min((sc->num_queues + 1), msix_count); BLOGD(sc, DBG_LOAD, "Requesting %d MSI-X vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msix(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI-X alloc failed! (%d)\n", rc); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } if (num_allocated < 2) { /* possible? */ BLOGE(sc, "MSI-X allocation less than 2!\n"); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI-X vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated - 1; rid = 1; /* initial resource identifier */ /* allocate the MSI-X vectors */ for (i = 0; i < num_allocated; i++) { sc->intr[i].rid = (rid + i); if ((sc->intr[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[i].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI-X[%d] (rid=%d)!\n", i, (rid + i)); for (j = (i - 1); j >= 0; j--) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[j].rid, sc->intr[j].resource); } sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI-X[%d] (rid=%d)\n", i, (rid + i)); } } while (0); do { /* try allocating MSI vector resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSI) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) == 0) || (msi_count < 1)) { sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } /* ask for a single MSI vector */ num_requested = 1; BLOGD(sc, DBG_LOAD, "Requesting %d MSI vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msi(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI alloc failed (%d)!\n", rc); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } if (num_allocated != 1) { /* possible? */ BLOGE(sc, "MSI allocation is not 1!\n"); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated; rid = 1; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI[0] (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI[0] (rid=%d)\n", rid); } while (0); do { /* try allocating INTx vector resources */ if (sc->interrupt_mode != INTR_MODE_INTX) { break; } BLOGD(sc, DBG_LOAD, "Requesting legacy INTx interrupt\n"); /* only one vector for INTx */ sc->intr_count = 1; sc->num_queues = 1; rid = 0; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, (RF_ACTIVE | RF_SHAREABLE))) == NULL) { BLOGE(sc, "Failed to map INTx (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = -1; /* Failed! */ break; } BLOGD(sc, DBG_LOAD, "Mapped INTx (rid=%d)\n", rid); } while (0); if (sc->interrupt_mode == -1) { BLOGE(sc, "Interrupt Allocation: FAILED!!!\n"); rc = 1; } else { BLOGD(sc, DBG_LOAD, "Interrupt Allocation: interrupt_mode=%d, num_queues=%d\n", sc->interrupt_mode, sc->num_queues); rc = 0; } return (rc); } static void bxe_interrupt_detach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; /* release interrupt resources */ for (i = 0; i < sc->intr_count; i++) { if (sc->intr[i].resource && sc->intr[i].tag) { BLOGD(sc, DBG_LOAD, "Disabling interrupt vector %d\n", i); bus_teardown_intr(sc->dev, sc->intr[i].resource, sc->intr[i].tag); } } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->tq) { taskqueue_drain(fp->tq, &fp->tq_task); taskqueue_free(fp->tq); fp->tq = NULL; } } if (sc->rx_mode_tq) { taskqueue_drain(sc->rx_mode_tq, &sc->rx_mode_tq_task); taskqueue_free(sc->rx_mode_tq); sc->rx_mode_tq = NULL; } if (sc->sp_tq) { taskqueue_drain(sc->sp_tq, &sc->sp_tq_task); taskqueue_free(sc->sp_tq); sc->sp_tq = NULL; } } /* * Enables interrupts and attach to the ISR. * * When using multiple MSI/MSI-X vectors the first vector * is used for slowpath operations while all remaining * vectors are used for fastpath operations. If only a * single MSI/MSI-X vector is used (SINGLE_ISR) then the * ISR must look for both slowpath and fastpath completions. */ static int bxe_interrupt_attach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int rc = 0; int i; snprintf(sc->sp_tq_name, sizeof(sc->sp_tq_name), "bxe%d_sp_tq", sc->unit); TASK_INIT(&sc->sp_tq_task, 0, bxe_handle_sp_tq, sc); sc->sp_tq = taskqueue_create_fast(sc->sp_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->sp_tq); taskqueue_start_threads(&sc->sp_tq, 1, PWAIT, /* lower priority */ "%s", sc->sp_tq_name); snprintf(sc->rx_mode_tq_name, sizeof(sc->rx_mode_tq_name), "bxe%d_rx_mode_tq", sc->unit); TASK_INIT(&sc->rx_mode_tq_task, 0, bxe_handle_rx_mode_tq, sc); sc->rx_mode_tq = taskqueue_create_fast(sc->rx_mode_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->rx_mode_tq); taskqueue_start_threads(&sc->rx_mode_tq, 1, PWAIT, /* lower priority */ "%s", sc->rx_mode_tq_name); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; snprintf(fp->tq_name, sizeof(fp->tq_name), "bxe%d_fp%d_tq", sc->unit, i); TASK_INIT(&fp->tq_task, 0, bxe_handle_fp_tq, fp); fp->tq = taskqueue_create_fast(fp->tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->tq); taskqueue_start_threads(&fp->tq, 1, PI_NET, /* higher priority */ "%s", fp->tq_name); } /* setup interrupt handlers */ if (sc->interrupt_mode == INTR_MODE_MSIX) { BLOGD(sc, DBG_LOAD, "Enabling slowpath MSI-X[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the driver instance * to the interrupt handler for the slowpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_sp, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[0].resource, sc->intr[0].tag, "sp"); /* bus_bind_intr(sc->dev, sc->intr[0].resource, 0); */ /* initialize the fastpath vectors (note the first was used for sp) */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; BLOGD(sc, DBG_LOAD, "Enabling MSI-X[%d] vector\n", (i + 1)); /* * Setup the interrupt handler. Note that we pass the * fastpath context to the interrupt handler in this * case. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[i + 1].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_fp, fp, &sc->intr[i + 1].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[%d] vector (%d)\n", (i + 1), rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[i + 1].resource, sc->intr[i + 1].tag, "fp%02d", i); /* bind the fastpath instance to a cpu */ if (sc->num_queues > 1) { bus_bind_intr(sc->dev, sc->intr[i + 1].resource, i); } fp->state = BXE_FP_STATE_IRQ; } } else if (sc->interrupt_mode == INTR_MODE_MSI) { BLOGD(sc, DBG_LOAD, "Enabling MSI[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } } else { /* (sc->interrupt_mode == INTR_MODE_INTX) */ BLOGD(sc, DBG_LOAD, "Enabling INTx interrupts\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate INTx interrupt (%d)\n", rc); goto bxe_interrupt_attach_exit; } } bxe_interrupt_attach_exit: return (rc); } static int bxe_init_hw_common_chip(struct bxe_softc *sc); static int bxe_init_hw_common(struct bxe_softc *sc); static int bxe_init_hw_port(struct bxe_softc *sc); static int bxe_init_hw_func(struct bxe_softc *sc); static void bxe_reset_common(struct bxe_softc *sc); static void bxe_reset_port(struct bxe_softc *sc); static void bxe_reset_func(struct bxe_softc *sc); static int bxe_gunzip_init(struct bxe_softc *sc); static void bxe_gunzip_end(struct bxe_softc *sc); static int bxe_init_firmware(struct bxe_softc *sc); static void bxe_release_firmware(struct bxe_softc *sc); static struct ecore_func_sp_drv_ops bxe_func_sp_drv = { .init_hw_cmn_chip = bxe_init_hw_common_chip, .init_hw_cmn = bxe_init_hw_common, .init_hw_port = bxe_init_hw_port, .init_hw_func = bxe_init_hw_func, .reset_hw_cmn = bxe_reset_common, .reset_hw_port = bxe_reset_port, .reset_hw_func = bxe_reset_func, .gunzip_init = bxe_gunzip_init, .gunzip_end = bxe_gunzip_end, .init_fw = bxe_init_firmware, .release_fw = bxe_release_firmware, }; static void bxe_init_func_obj(struct bxe_softc *sc) { sc->dmae_ready = 0; ecore_init_func_obj(sc, &sc->func_obj, BXE_SP(sc, func_rdata), BXE_SP_MAPPING(sc, func_rdata), BXE_SP(sc, func_afex_rdata), BXE_SP_MAPPING(sc, func_afex_rdata), &bxe_func_sp_drv); } static int bxe_init_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare the parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_INIT; func_params.params.hw_init.load_phase = load_code; /* * Via a plethora of function pointers, we will eventually reach * bxe_init_hw_common(), bxe_init_hw_port(), or bxe_init_hw_func(). */ rc = ecore_func_state_change(sc, &func_params); return (rc); } static void bxe_fill(struct bxe_softc *sc, uint32_t addr, int fill, uint32_t len) { uint32_t i; if (!(len % 4) && !(addr % 4)) { for (i = 0; i < len; i += 4) { REG_WR(sc, (addr + i), fill); } } else { for (i = 0; i < len; i++) { REG_WR8(sc, (addr + i), fill); } } } /* writes FP SP data to FW - data_size in dwords */ static void bxe_wr_fp_sb_data(struct bxe_softc *sc, int fw_sb_id, uint32_t *sb_data_p, uint32_t data_size) { int index; for (index = 0; index < data_size; index++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_OFFSET(fw_sb_id) + (sizeof(uint32_t) * index)), *(sb_data_p + index)); } } static void bxe_zero_fp_sb(struct bxe_softc *sc, int fw_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; uint32_t *sb_data_p; uint32_t data_size = 0; if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_DISABLED; sb_data_e2.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_DISABLED; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); } bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SYNC_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_SYNC_BLOCK_SIZE); } static void bxe_wr_sp_sb_data(struct bxe_softc *sc, struct hc_sp_status_block_data *sp_sb_data) { int i; for (i = 0; i < (sizeof(struct hc_sp_status_block_data) / sizeof(uint32_t)); i++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(SC_FUNC(sc)) + (i * sizeof(uint32_t))), *((uint32_t *)sp_sb_data + i)); } } static void bxe_zero_sp_sb(struct bxe_softc *sc) { struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); sp_sb_data.state = SB_DISABLED; sp_sb_data.p_func.vf_valid = FALSE; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_SYNC_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_SYNC_BLOCK_SIZE); } static void bxe_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm, int igu_sb_id, int igu_seg_id) { hc_sm->igu_sb_id = igu_sb_id; hc_sm->igu_seg_id = igu_seg_id; hc_sm->timer_value = 0xFF; hc_sm->time_to_expire = 0xFFFFFFFF; } static void bxe_map_sb_state_machines(struct hc_index_data *index_data) { /* zero out state machine indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID; /* map indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |= (SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT); /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); } static void bxe_init_sb(struct bxe_softc *sc, bus_addr_t busaddr, int vfid, uint8_t vf_valid, int fw_sb_id, int igu_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; struct hc_status_block_sm *hc_sm_p; uint32_t *sb_data_p; int igu_seg_id; int data_size; if (CHIP_INT_MODE_IS_BC(sc)) { igu_seg_id = HC_SEG_ACCESS_NORM; } else { igu_seg_id = IGU_SEG_ACCESS_NORM; } bxe_zero_fp_sb(sc, fw_sb_id); if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_ENABLED; sb_data_e2.common.p_func.pf_id = SC_FUNC(sc); sb_data_e2.common.p_func.vf_id = vfid; sb_data_e2.common.p_func.vf_valid = vf_valid; sb_data_e2.common.p_func.vnic_id = SC_VN(sc); sb_data_e2.common.same_igu_sb_1b = TRUE; sb_data_e2.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e2.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e2.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e2.index_data); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_ENABLED; sb_data_e1x.common.p_func.pf_id = SC_FUNC(sc); sb_data_e1x.common.p_func.vf_id = 0xff; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_e1x.common.p_func.vnic_id = SC_VN(sc); sb_data_e1x.common.same_igu_sb_1b = TRUE; sb_data_e1x.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e1x.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e1x.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e1x.index_data); } bxe_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID], igu_sb_id, igu_seg_id); bxe_setup_ndsb_state_machine(&hc_sm_p[SM_TX_ID], igu_sb_id, igu_seg_id); BLOGD(sc, DBG_LOAD, "Init FW SB %d\n", fw_sb_id); /* write indices to HW - PCI guarantees endianity of regpairs */ bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); } static inline uint8_t bxe_fp_qzone_id(struct bxe_fastpath *fp) { if (CHIP_IS_E1x(fp->sc)) { return (fp->cl_id + SC_PORT(fp->sc) * ETH_MAX_RX_CLIENTS_E1H); } else { return (fp->cl_id); } } static inline uint32_t bxe_rx_ustorm_prods_offset(struct bxe_softc *sc, struct bxe_fastpath *fp) { uint32_t offset = BAR_USTRORM_INTMEM; #if 0 if (IS_VF(sc)) { return (PXP_VF_ADDR_USDM_QUEUES_START + (sc->acquire_resp.resc.hw_qid[fp->index] * sizeof(struct ustorm_queue_zone_data))); } else #endif if (!CHIP_IS_E1x(sc)) { offset += USTORM_RX_PRODS_E2_OFFSET(fp->cl_qzone_id); } else { offset += USTORM_RX_PRODS_E1X_OFFSET(SC_PORT(sc), fp->cl_id); } return (offset); } static void bxe_init_eth_fp(struct bxe_softc *sc, int idx) { struct bxe_fastpath *fp = &sc->fp[idx]; uint32_t cids[ECORE_MULTI_TX_COS] = { 0 }; unsigned long q_type = 0; int cos; fp->sc = sc; fp->index = idx; snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "bxe%d_fp%d_tx_lock", sc->unit, idx); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); snprintf(fp->rx_mtx_name, sizeof(fp->rx_mtx_name), "bxe%d_fp%d_rx_lock", sc->unit, idx); mtx_init(&fp->rx_mtx, fp->rx_mtx_name, NULL, MTX_DEF); fp->igu_sb_id = (sc->igu_base_sb + idx + CNIC_SUPPORT(sc)); fp->fw_sb_id = (sc->base_fw_ndsb + idx + CNIC_SUPPORT(sc)); fp->cl_id = (CHIP_IS_E1x(sc)) ? (SC_L_ID(sc) + idx) : /* want client ID same as IGU SB ID for non-E1 */ fp->igu_sb_id; fp->cl_qzone_id = bxe_fp_qzone_id(fp); /* setup sb indices */ if (!CHIP_IS_E1x(sc)) { fp->sb_index_values = fp->status_block.e2_sb->sb.index_values; fp->sb_running_index = fp->status_block.e2_sb->sb.running_index; } else { fp->sb_index_values = fp->status_block.e1x_sb->sb.index_values; fp->sb_running_index = fp->status_block.e1x_sb->sb.running_index; } /* init shortcut */ fp->ustorm_rx_prods_offset = bxe_rx_ustorm_prods_offset(sc, fp); fp->rx_cq_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS]; /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. */ for (cos = 0; cos < sc->max_cos; cos++) { cids[cos] = idx; } fp->tx_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_TX_CQ_CONS_COS0]; /* nothing more for a VF to do */ if (IS_VF(sc)) { return; } bxe_init_sb(sc, fp->sb_dma.paddr, BXE_VF_ID_INVALID, FALSE, fp->fw_sb_id, fp->igu_sb_id); bxe_update_fp_sb_idx(fp); /* Configure Queue State object */ bit_set(&q_type, ECORE_Q_TYPE_HAS_RX); bit_set(&q_type, ECORE_Q_TYPE_HAS_TX); ecore_init_queue_obj(sc, &sc->sp_objs[idx].q_obj, fp->cl_id, cids, sc->max_cos, SC_FUNC(sc), BXE_SP(sc, q_rdata), BXE_SP_MAPPING(sc, q_rdata), q_type); /* configure classification DBs */ ecore_init_mac_obj(sc, &sc->sp_objs[idx].mac_obj, fp->cl_id, idx, SC_FUNC(sc), BXE_SP(sc, mac_rdata), BXE_SP_MAPPING(sc, mac_rdata), ECORE_FILTER_MAC_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX_TX, &sc->macs_pool); BLOGD(sc, DBG_LOAD, "fp[%d]: sb=%p cl_id=%d fw_sb=%d igu_sb=%d\n", idx, fp->status_block.e2_sb, fp->cl_id, fp->fw_sb_id, fp->igu_sb_id); } static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod) { struct ustorm_eth_rx_producers rx_prods = { 0 }; uint32_t i; /* update producers */ rx_prods.bd_prod = rx_bd_prod; rx_prods.cqe_prod = rx_cq_prod; rx_prods.sge_prod = rx_sge_prod; /* * Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated. * This is only applicable for weak-ordered memory model archs such * as IA-64. The following barrier is also mandatory since FW will * assumes BDs must have buffers. */ wmb(); for (i = 0; i < (sizeof(rx_prods) / 4); i++) { REG_WR(sc, (fp->ustorm_rx_prods_offset + (i * 4)), ((uint32_t *)&rx_prods)[i]); } wmb(); /* keep prod updates ordered */ BLOGD(sc, DBG_RX, "RX fp[%d]: wrote prods bd_prod=%u cqe_prod=%u sge_prod=%u\n", fp->index, rx_bd_prod, rx_cq_prod, rx_sge_prod); } static void bxe_init_rx_rings(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->rx_bd_cons = 0; /* * Activate the BD ring... * Warning, this will generate an interrupt (to the TSTORM) * so this can only be done after the chip is initialized */ bxe_update_rx_prod(sc, fp, fp->rx_bd_prod, fp->rx_cq_prod, fp->rx_sge_prod); if (i != 0) { continue; } if (CHIP_IS_E1(sc)) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc))), U64_LO(fp->rcq_dma.paddr)); REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc)) + 4), U64_HI(fp->rcq_dma.paddr)); } } } static void bxe_init_tx_ring_one(struct bxe_fastpath *fp) { SET_FLAG(fp->tx_db.data.header.header, DOORBELL_HDR_DB_TYPE, 1); fp->tx_db.data.zero_fill1 = 0; fp->tx_db.data.prod = 0; fp->tx_pkt_prod = 0; fp->tx_pkt_cons = 0; fp->tx_bd_prod = 0; fp->tx_bd_cons = 0; fp->eth_q_stats.tx_pkts = 0; } static inline void bxe_init_tx_rings(struct bxe_softc *sc) { int i; for (i = 0; i < sc->num_queues; i++) { #if 0 uint8_t cos; for (cos = 0; cos < sc->max_cos; cos++) { bxe_init_tx_ring_one(&sc->fp[i].txdata[cos]); } #else bxe_init_tx_ring_one(&sc->fp[i]); #endif } } static void bxe_init_def_sb(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; bus_addr_t mapping = sc->def_sb_dma.paddr; int igu_sp_sb_index; int igu_seg_id; int port = SC_PORT(sc); int func = SC_FUNC(sc); int reg_offset, reg_offset_en5; uint64_t section; int index, sindex; struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); if (CHIP_INT_MODE_IS_BC(sc)) { igu_sp_sb_index = DEF_SB_IGU_ID; igu_seg_id = HC_SEG_ACCESS_DEF; } else { igu_sp_sb_index = sc->igu_dsb_id; igu_seg_id = IGU_SEG_ACCESS_DEF; } /* attentions */ section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, atten_status_block)); def_sb->atten_status_block.status_block_id = igu_sp_sb_index; sc->attn_state = 0; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; reg_offset_en5 = (port) ? MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0; for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { /* take care of sig[0]..sig[4] */ for (sindex = 0; sindex < 4; sindex++) { sc->attn_group[index].sig[sindex] = REG_RD(sc, (reg_offset + (sindex * 0x4) + (0x10 * index))); } if (!CHIP_IS_E1x(sc)) { /* * enable5 is separate from the rest of the registers, * and the address skip is 4 and not 16 between the * different groups */ sc->attn_group[index].sig[4] = REG_RD(sc, (reg_offset_en5 + (0x4 * index))); } else { sc->attn_group[index].sig[4] = 0; } } if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_offset = (port) ? HC_REG_ATTN_MSG1_ADDR_L : HC_REG_ATTN_MSG0_ADDR_L; REG_WR(sc, reg_offset, U64_LO(section)); REG_WR(sc, (reg_offset + 4), U64_HI(section)); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_L, U64_LO(section)); REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_H, U64_HI(section)); } section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, sp_sb)); bxe_zero_sp_sb(sc); /* PCI guarantees endianity of regpair */ sp_sb_data.state = SB_ENABLED; sp_sb_data.host_sb_addr.lo = U64_LO(section); sp_sb_data.host_sb_addr.hi = U64_HI(section); sp_sb_data.igu_sb_id = igu_sp_sb_index; sp_sb_data.igu_seg_id = igu_seg_id; sp_sb_data.p_func.pf_id = func; sp_sb_data.p_func.vnic_id = SC_VN(sc); sp_sb_data.p_func.vf_id = 0xff; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); } static void bxe_init_sp_ring(struct bxe_softc *sc) { atomic_store_rel_long(&sc->cq_spq_left, MAX_SPQ_PENDING); sc->spq_prod_idx = 0; sc->dsb_sp_prod = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_ETH_DEF_CONS]; sc->spq_prod_bd = sc->spq; sc->spq_last_bd = (sc->spq_prod_bd + MAX_SP_DESC_CNT); } static void bxe_init_eq_ring(struct bxe_softc *sc) { union event_ring_elem *elem; int i; for (i = 1; i <= NUM_EQ_PAGES; i++) { elem = &sc->eq[EQ_DESC_CNT_PAGE * i - 1]; elem->next_page.addr.hi = htole32(U64_HI(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); elem->next_page.addr.lo = htole32(U64_LO(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); } sc->eq_cons = 0; sc->eq_prod = NUM_EQ_DESC; sc->eq_cons_sb = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_EQ_CONS]; atomic_store_rel_long(&sc->eq_spq_left, (min((MAX_SP_DESC_CNT - MAX_SPQ_PENDING), NUM_EQ_DESC) - 1)); } static void bxe_init_internal_common(struct bxe_softc *sc) { int i; if (IS_MF_SI(sc)) { /* * In switch independent mode, the TSTORM needs to accept * packets that failed classification, since approximate match * mac addresses aren't written to NIG LLH. */ REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_ACCEPT_CLASSIFY_FAILED_OFFSET), 2); } else if (!CHIP_IS_E1(sc)) { /* 57710 doesn't support MF */ REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_ACCEPT_CLASSIFY_FAILED_OFFSET), 0); } /* * Zero this manually as its initialization is currently missing * in the initTool. */ for (i = 0; i < (USTORM_AGG_DATA_SIZE >> 2); i++) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_AGG_DATA_OFFSET + (i * 4)), 0); } if (!CHIP_IS_E1x(sc)) { REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_IGU_MODE_OFFSET), CHIP_INT_MODE_IS_BC(sc) ? HC_IGU_BC_MODE : HC_IGU_NBC_MODE); } } static void bxe_init_internal(struct bxe_softc *sc, uint32_t load_code) { switch (load_code) { case FW_MSG_CODE_DRV_LOAD_COMMON: case FW_MSG_CODE_DRV_LOAD_COMMON_CHIP: bxe_init_internal_common(sc); /* no break */ case FW_MSG_CODE_DRV_LOAD_PORT: /* nothing to do */ /* no break */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: /* internal memory per function is initialized inside bxe_pf_init */ break; default: BLOGE(sc, "Unknown load_code (0x%x) from MCP\n", load_code); break; } } static void storm_memset_func_cfg(struct bxe_softc *sc, struct tstorm_eth_function_common_config *tcfg, uint16_t abs_fid) { uint32_t addr; size_t size; addr = (BAR_TSTRORM_INTMEM + TSTORM_FUNCTION_COMMON_CONFIG_OFFSET(abs_fid)); size = sizeof(struct tstorm_eth_function_common_config); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)tcfg); } static void bxe_func_init(struct bxe_softc *sc, struct bxe_func_init_params *p) { struct tstorm_eth_function_common_config tcfg = { 0 }; if (CHIP_IS_E1x(sc)) { storm_memset_func_cfg(sc, &tcfg, p->func_id); } /* Enable the function in the FW */ storm_memset_vf_to_pf(sc, p->func_id, p->pf_id); storm_memset_func_en(sc, p->func_id, 1); /* spq */ if (p->func_flgs & FUNC_FLG_SPQ) { storm_memset_spq_addr(sc, p->spq_map, p->func_id); REG_WR(sc, (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PROD_OFFSET(p->func_id)), p->spq_prod); } } /* * Calculates the sum of vn_min_rates. * It's needed for further normalizing of the min_rates. * Returns: * sum of vn_min_rates. * or * 0 - if all the min_rates are 0. * In the later case fainess algorithm should be deactivated. * If all min rates are not zero then those that are zeroes will be set to 1. */ static void bxe_calc_vn_min(struct bxe_softc *sc, struct cmng_init_input *input) { uint32_t vn_cfg; uint32_t vn_min_rate; int all_zero = 1; int vn; for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { vn_cfg = sc->devinfo.mf_info.mf_config[vn]; vn_min_rate = (((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT) * 100); if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { /* skip hidden VNs */ vn_min_rate = 0; } else if (!vn_min_rate) { /* If min rate is zero - set it to 100 */ vn_min_rate = DEF_MIN_RATE; } else { all_zero = 0; } input->vnic_min_rate[vn] = vn_min_rate; } /* if ETS or all min rates are zeros - disable fairness */ if (BXE_IS_ETS_ENABLED(sc)) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fairness disabled (ETS)\n"); } else if (all_zero) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fariness disabled (all MIN values are zeroes)\n"); } else { input->flags.cmng_enables |= CMNG_FLAGS_PER_PORT_FAIRNESS_VN; } } static inline uint16_t bxe_extract_max_cfg(struct bxe_softc *sc, uint32_t mf_cfg) { uint16_t max_cfg = ((mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); if (!max_cfg) { BLOGD(sc, DBG_LOAD, "Max BW configured to 0 - using 100 instead\n"); max_cfg = 100; } return (max_cfg); } static void bxe_calc_vn_max(struct bxe_softc *sc, int vn, struct cmng_init_input *input) { uint16_t vn_max_rate; uint32_t vn_cfg = sc->devinfo.mf_info.mf_config[vn]; uint32_t max_cfg; if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { vn_max_rate = 0; } else { max_cfg = bxe_extract_max_cfg(sc, vn_cfg); if (IS_MF_SI(sc)) { /* max_cfg in percents of linkspeed */ vn_max_rate = ((sc->link_vars.line_speed * max_cfg) / 100); } else { /* SD modes */ /* max_cfg is absolute in 100Mb units */ vn_max_rate = (max_cfg * 100); } } BLOGD(sc, DBG_LOAD, "vn %d: vn_max_rate %d\n", vn, vn_max_rate); input->vnic_max_rate[vn] = vn_max_rate; } static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type) { struct cmng_init_input input; int vn; memset(&input, 0, sizeof(struct cmng_init_input)); input.port_rate = sc->link_vars.line_speed; if (cmng_type == CMNG_FNS_MINMAX) { /* read mf conf from shmem */ if (read_cfg) { bxe_read_mf_cfg(sc); } /* get VN min rate and enable fairness if not 0 */ bxe_calc_vn_min(sc, &input); /* get VN max rate */ if (sc->port.pmf) { for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { bxe_calc_vn_max(sc, vn, &input); } } /* always enable rate shaping and fairness */ input.flags.cmng_enables |= CMNG_FLAGS_PER_PORT_RATE_SHAPING_VN; ecore_init_cmng(&input, &sc->cmng); return; } /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "rate shaping and fairness have been disabled\n"); } static int bxe_get_cmng_fns_mode(struct bxe_softc *sc) { if (CHIP_REV_IS_SLOW(sc)) { return (CMNG_FNS_NONE); } if (IS_MF(sc)) { return (CMNG_FNS_MINMAX); } return (CMNG_FNS_NONE); } static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port) { int vn; int func; uint32_t addr; size_t size; addr = (BAR_XSTRORM_INTMEM + XSTORM_CMNG_PER_PORT_VARS_OFFSET(port)); size = sizeof(struct cmng_struct_per_port); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->port); for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { func = func_by_vn(sc, vn); addr = (BAR_XSTRORM_INTMEM + XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(func)); size = sizeof(struct rate_shaping_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_max_rate[vn]); addr = (BAR_XSTRORM_INTMEM + XSTORM_FAIRNESS_PER_VN_VARS_OFFSET(func)); size = sizeof(struct fairness_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_min_rate[vn]); } } static void bxe_pf_init(struct bxe_softc *sc) { struct bxe_func_init_params func_init = { 0 }; struct event_ring_data eq_data = { { 0 } }; uint16_t flags; if (!CHIP_IS_E1x(sc)) { /* reset IGU PF statistics: MSIX + ATTN */ /* PF */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); /* ATTN */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + (BXE_IGU_STAS_MSG_PF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); } /* function setup flags */ flags = (FUNC_FLG_STATS | FUNC_FLG_LEADING | FUNC_FLG_SPQ); /* * This flag is relevant for E1x only. * E2 doesn't have a TPA configuration in a function level. */ flags |= (sc->ifnet->if_capenable & IFCAP_LRO) ? FUNC_FLG_TPA : 0; func_init.func_flgs = flags; func_init.pf_id = SC_FUNC(sc); func_init.func_id = SC_FUNC(sc); func_init.spq_map = sc->spq_dma.paddr; func_init.spq_prod = sc->spq_prod_idx; bxe_func_init(sc, &func_init); memset(&sc->cmng, 0, sizeof(struct cmng_struct_per_port)); /* * Congestion management values depend on the link rate. * There is no active link so initial link rate is set to 10Gbps. * When the link comes up the congestion management values are * re-calculated according to the actual link rate. */ sc->link_vars.line_speed = SPEED_10000; bxe_cmng_fns_init(sc, TRUE, bxe_get_cmng_fns_mode(sc)); /* Only the PMF sets the HW */ if (sc->port.pmf) { storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } /* init Event Queue - PCI bus guarantees correct endainity */ eq_data.base_addr.hi = U64_HI(sc->eq_dma.paddr); eq_data.base_addr.lo = U64_LO(sc->eq_dma.paddr); eq_data.producer = sc->eq_prod; eq_data.index_id = HC_SP_INDEX_EQ_CONS; eq_data.sb_id = DEF_SB_ID; storm_memset_eq_data(sc, &eq_data, SC_FUNC(sc)); } static void bxe_hc_int_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; if (msix) { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0); val |= (HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (single_msix) { val |= HC_CONFIG_0_REG_SINGLE_ISR_EN_0; } } else if (msi) { val &= ~HC_CONFIG_0_REG_INT_LINE_EN_0; val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (!CHIP_IS_E1(sc)) { BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); REG_WR(sc, addr, val); val &= ~HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0; } } if (CHIP_IS_E1(sc)) { REG_WR(sc, (HC_REG_INT_MASK + port*4), 0x1FFFF); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x) mode %s\n", val, port, addr, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, addr, val); /* ensure that HC_CONFIG is written before leading/trailing edge config */ mb(); if (!CHIP_IS_E1(sc)) { /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, (HC_REG_TRAILING_EDGE_0 + port*8), val); REG_WR(sc, (HC_REG_LEADING_EDGE_0 + port*8), val); } /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_igu_int_enable(struct bxe_softc *sc) { uint32_t val; uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); if (msix) { val &= ~(IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_SINGLE_ISR_EN); val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN); if (single_msix) { val |= IGU_PF_CONF_SINGLE_ISR_EN; } } else if (msi) { val &= ~IGU_PF_CONF_INT_LINE_EN; val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } else { val &= ~IGU_PF_CONF_MSI_MSIX_EN; val |= (IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } /* clean previous status - need to configure igu prior to ack*/ if ((!msix) || single_msix) { REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); bxe_ack_int(sc); } val |= IGU_PF_CONF_FUNC_EN; BLOGD(sc, DBG_INTR, "write 0x%x to IGU mode %s\n", val, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); mb(); /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_int_enable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_enable(sc); } else { bxe_igu_int_enable(sc); } } static void bxe_hc_int_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); /* * In E1 we must use only PCI configuration space to disable MSI/MSIX * capablility. It's forbidden to disable IGU_PF_CONF_MSI_MSIX_EN in HC * block */ if (CHIP_IS_E1(sc)) { /* * Since IGU_PF_CONF_MSI_MSIX_EN still always on use mask register * to prevent from HC sending interrupts after we exit the function */ REG_WR(sc, (HC_REG_INT_MASK + port*4), 0); val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); /* flush all outstanding writes */ mb(); REG_WR(sc, addr, val); if (REG_RD(sc, addr) != val) { BLOGE(sc, "proper val not read from HC IGU!\n"); } } static void bxe_igu_int_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~(IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN); BLOGD(sc, DBG_INTR, "write %x to IGU\n", val); /* flush all outstanding writes */ mb(); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); if (REG_RD(sc, IGU_REG_PF_CONFIGURATION) != val) { BLOGE(sc, "proper val not read from IGU!\n"); } } static void bxe_int_disable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_disable(sc); } else { bxe_igu_int_disable(sc); } } static void bxe_nic_init(struct bxe_softc *sc, int load_code) { int i; for (i = 0; i < sc->num_queues; i++) { bxe_init_eth_fp(sc, i); } rmb(); /* ensure status block indices were read */ bxe_init_rx_rings(sc); bxe_init_tx_rings(sc); if (IS_VF(sc)) { return; } /* initialize MOD_ABS interrupts */ elink_init_mod_abs_int(sc, &sc->link_vars, sc->devinfo.chip_id, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, SC_PORT(sc)); bxe_init_def_sb(sc); bxe_update_dsb_idx(sc); bxe_init_sp_ring(sc); bxe_init_eq_ring(sc); bxe_init_internal(sc, load_code); bxe_pf_init(sc); bxe_stats_init(sc); /* flush all before enabling interrupts */ mb(); bxe_int_enable(sc); /* check for SPIO5 */ bxe_attn_int_deasserted0(sc, REG_RD(sc, (MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + SC_PORT(sc)*4)) & AEU_INPUTS_ATTN_BITS_SPIO5); } static inline void bxe_init_objs(struct bxe_softc *sc) { /* mcast rules must be added to tx if tx switching is enabled */ ecore_obj_type o_type = (sc->flags & BXE_TX_SWITCHING) ? ECORE_OBJ_TYPE_RX_TX : ECORE_OBJ_TYPE_RX; /* RX_MODE controlling object */ ecore_init_rx_mode_obj(sc, &sc->rx_mode_obj); /* multicast configuration controlling object */ ecore_init_mcast_obj(sc, &sc->mcast_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, mcast_rdata), BXE_SP_MAPPING(sc, mcast_rdata), ECORE_FILTER_MCAST_PENDING, &sc->sp_state, o_type); /* Setup CAM credit pools */ ecore_init_mac_credit_pool(sc, &sc->macs_pool, SC_FUNC(sc), CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); ecore_init_vlan_credit_pool(sc, &sc->vlans_pool, SC_ABS_FUNC(sc) >> 1, CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); /* RSS configuration object */ ecore_init_rss_config_obj(sc, &sc->rss_conf_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, rss_rdata), BXE_SP_MAPPING(sc, rss_rdata), ECORE_FILTER_RSS_CONF_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX); } /* * Initialize the function. This must be called before sending CLIENT_SETUP * for the first client. */ static inline int bxe_func_start(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; struct ecore_func_start_params *start_params = &func_params.params.start; /* Prepare parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_START; /* Function parameters */ start_params->mf_mode = sc->devinfo.mf_info.mf_mode; start_params->sd_vlan_tag = OVLAN(sc); if (CHIP_IS_E2(sc) || CHIP_IS_E3(sc)) { start_params->network_cos_mode = STATIC_COS; } else { /* CHIP_IS_E1X */ start_params->network_cos_mode = FW_WRR; } start_params->gre_tunnel_mode = 0; start_params->gre_tunnel_rss = 0; return (ecore_func_state_change(sc, &func_params)); } static int bxe_set_power_state(struct bxe_softc *sc, uint8_t state) { uint16_t pmcsr; /* If there is no power capability, silently succeed */ if (!(sc->devinfo.pcie_cap_flags & BXE_PM_CAPABLE_FLAG)) { BLOGW(sc, "No power capability\n"); return (0); } pmcsr = pci_read_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), 2); switch (state) { case PCI_PM_D0: pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), ((pmcsr & ~PCIM_PSTAT_DMASK) | PCIM_PSTAT_PME), 2); if (pmcsr & PCIM_PSTAT_DMASK) { /* delay required during transition out of D3hot */ DELAY(20000); } break; case PCI_PM_D3hot: /* XXX if there are other clients above don't shut down the power */ /* don't shut down the power for emulation and FPGA */ if (CHIP_REV_IS_SLOW(sc)) { return (0); } pmcsr &= ~PCIM_PSTAT_DMASK; pmcsr |= PCIM_PSTAT_D3; if (sc->wol) { pmcsr |= PCIM_PSTAT_PMEENABLE; } pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), pmcsr, 4); /* * No more memory access after this point until device is brought back * to D0 state. */ break; default: BLOGE(sc, "Can't support PCI power state = %d\n", state); return (-1); } return (0); } /* return true if succeeded to acquire the lock */ static uint8_t bxe_trylock_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; BLOGD(sc, DBG_LOAD, "Trying to take a resource lock 0x%x\n", resource); /* Validating that the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGD(sc, DBG_LOAD, "resource(0x%x) > HW_LOCK_MAX_RESOURCE_VALUE(0x%x)\n", resource, HW_LOCK_MAX_RESOURCE_VALUE); return (FALSE); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + func*8); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + (func - 6)*8); } /* try to acquire the lock */ REG_WR(sc, hw_lock_control_reg + 4, resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (TRUE); } BLOGE(sc, "Failed to get a resource lock 0x%x\n", resource); return (FALSE); } /* * Get the recovery leader resource id according to the engine this function * belongs to. Currently only only 2 engines is supported. */ static int bxe_get_leader_lock_resource(struct bxe_softc *sc) { if (SC_PATH(sc)) { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_1); } else { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_0); } } /* try to acquire a leader lock for current engine */ static uint8_t bxe_trylock_leader_lock(struct bxe_softc *sc) { return (bxe_trylock_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } static int bxe_release_leader_lock(struct bxe_softc *sc) { return (bxe_release_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } /* close gates #2, #3 and #4 */ static void bxe_set_234_gates(struct bxe_softc *sc, uint8_t close) { uint32_t val; /* gates #2 and #4a are closed/opened for "not E1" only */ if (!CHIP_IS_E1(sc)) { /* #4 */ REG_WR(sc, PXP_REG_HST_DISCARD_DOORBELLS, !!close); /* #2 */ REG_WR(sc, PXP_REG_HST_DISCARD_INTERNAL_WRITES, !!close); } /* #3 */ if (CHIP_IS_E1x(sc)) { /* prevent interrupts from HC on both ports */ val = REG_RD(sc, HC_REG_CONFIG_1); REG_WR(sc, HC_REG_CONFIG_1, (!close) ? (val | HC_CONFIG_1_REG_BLOCK_DISABLE_1) : (val & ~(uint32_t)HC_CONFIG_1_REG_BLOCK_DISABLE_1)); val = REG_RD(sc, HC_REG_CONFIG_0); REG_WR(sc, HC_REG_CONFIG_0, (!close) ? (val | HC_CONFIG_0_REG_BLOCK_DISABLE_0) : (val & ~(uint32_t)HC_CONFIG_0_REG_BLOCK_DISABLE_0)); } else { /* Prevent incomming interrupts in IGU */ val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, (!close) ? (val | IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE) : (val & ~(uint32_t)IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE)); } BLOGD(sc, DBG_LOAD, "%s gates #2, #3 and #4\n", close ? "closing" : "opening"); wmb(); } /* poll for pending writes bit, it should get cleared in no more than 1s */ static int bxe_er_poll_igu_vq(struct bxe_softc *sc) { uint32_t cnt = 1000; uint32_t pend_bits = 0; do { pend_bits = REG_RD(sc, IGU_REG_PENDING_BITS_STATUS); if (pend_bits == 0) { break; } DELAY(1000); } while (--cnt > 0); if (cnt == 0) { BLOGE(sc, "Still pending IGU requests bits=0x%08x!\n", pend_bits); return (-1); } return (0); } #define SHARED_MF_CLP_MAGIC 0x80000000 /* 'magic' bit */ static void bxe_clp_reset_prep(struct bxe_softc *sc, uint32_t *magic_val) { /* Do some magic... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); *magic_val = val & SHARED_MF_CLP_MAGIC; MFCFG_WR(sc, shared_mf_config.clp_mb, val | SHARED_MF_CLP_MAGIC); } /* restore the value of the 'magic' bit */ static void bxe_clp_reset_done(struct bxe_softc *sc, uint32_t magic_val) { /* Restore the 'magic' bit value... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); MFCFG_WR(sc, shared_mf_config.clp_mb, (val & (~SHARED_MF_CLP_MAGIC)) | magic_val); } /* prepare for MCP reset, takes care of CLP configurations */ static void bxe_reset_mcp_prep(struct bxe_softc *sc, uint32_t *magic_val) { uint32_t shmem; uint32_t validity_offset; /* set `magic' bit in order to save MF config */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_prep(sc, magic_val); } /* get shmem offset */ shmem = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); validity_offset = offsetof(struct shmem_region, validity_map[SC_PORT(sc)]); /* Clear validity map flags */ if (shmem > 0) { REG_WR(sc, shmem + validity_offset, 0); } } #define MCP_TIMEOUT 5000 /* 5 seconds (in ms) */ #define MCP_ONE_TIMEOUT 100 /* 100 ms */ static void bxe_mcp_wait_one(struct bxe_softc *sc) { /* special handling for emulation and FPGA (10 times longer) */ if (CHIP_REV_IS_SLOW(sc)) { DELAY((MCP_ONE_TIMEOUT*10) * 1000); } else { DELAY((MCP_ONE_TIMEOUT) * 1000); } } /* initialize shmem_base and waits for validity signature to appear */ static int bxe_init_shmem(struct bxe_softc *sc) { int cnt = 0; uint32_t val = 0; do { sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); if (sc->devinfo.shmem_base) { val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if (val & SHR_MEM_VALIDITY_MB) return (0); } bxe_mcp_wait_one(sc); } while (cnt++ < (MCP_TIMEOUT / MCP_ONE_TIMEOUT)); BLOGE(sc, "BAD MCP validity signature\n"); return (-1); } static int bxe_reset_mcp_comp(struct bxe_softc *sc, uint32_t magic_val) { int rc = bxe_init_shmem(sc); /* Restore the `magic' bit value */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_done(sc, magic_val); } return (rc); } static void bxe_pxp_prep(struct bxe_softc *sc) { if (!CHIP_IS_E1(sc)) { REG_WR(sc, PXP2_REG_RD_START_INIT, 0); REG_WR(sc, PXP2_REG_RQ_RBC_DONE, 0); wmb(); } } /* * Reset the whole chip except for: * - PCIE core * - PCI Glue, PSWHST, PXP/PXP2 RF (all controlled by one reset bit) * - IGU * - MISC (including AEU) * - GRC * - RBCN, RBCP */ static void bxe_process_kill_chip_reset(struct bxe_softc *sc, uint8_t global) { uint32_t not_reset_mask1, reset_mask1, not_reset_mask2, reset_mask2; uint32_t global_bits2, stay_reset2; /* * Bits that have to be set in reset_mask2 if we want to reset 'global' * (per chip) blocks. */ global_bits2 = MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CPU | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CORE; /* * Don't reset the following blocks. * Important: per port blocks (such as EMAC, BMAC, UMAC) can't be * reset, as in 4 port device they might still be owned * by the MCP (there is only one leader per path). */ not_reset_mask1 = MISC_REGISTERS_RESET_REG_1_RST_HC | MISC_REGISTERS_RESET_REG_1_RST_PXPV | MISC_REGISTERS_RESET_REG_1_RST_PXP; not_reset_mask2 = MISC_REGISTERS_RESET_REG_2_RST_PCI_MDIO | MISC_REGISTERS_RESET_REG_2_RST_EMAC0_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_EMAC1_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MISC_CORE | MISC_REGISTERS_RESET_REG_2_RST_RBCN | MISC_REGISTERS_RESET_REG_2_RST_GRC | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_REG_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_HARD_CORE_RST_B | MISC_REGISTERS_RESET_REG_2_RST_ATC | MISC_REGISTERS_RESET_REG_2_PGLC | MISC_REGISTERS_RESET_REG_2_RST_BMAC0 | MISC_REGISTERS_RESET_REG_2_RST_BMAC1 | MISC_REGISTERS_RESET_REG_2_RST_EMAC0 | MISC_REGISTERS_RESET_REG_2_RST_EMAC1 | MISC_REGISTERS_RESET_REG_2_UMAC0 | MISC_REGISTERS_RESET_REG_2_UMAC1; /* * Keep the following blocks in reset: * - all xxMACs are handled by the elink code. */ stay_reset2 = MISC_REGISTERS_RESET_REG_2_XMAC | MISC_REGISTERS_RESET_REG_2_XMAC_SOFT; /* Full reset masks according to the chip */ reset_mask1 = 0xffffffff; if (CHIP_IS_E1(sc)) reset_mask2 = 0xffff; else if (CHIP_IS_E1H(sc)) reset_mask2 = 0x1ffff; else if (CHIP_IS_E2(sc)) reset_mask2 = 0xfffff; else /* CHIP_IS_E3 */ reset_mask2 = 0x3ffffff; /* Don't reset global blocks unless we need to */ if (!global) reset_mask2 &= ~global_bits2; /* * In case of attention in the QM, we need to reset PXP * (MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR) before QM * because otherwise QM reset would release 'close the gates' shortly * before resetting the PXP, then the PSWRQ would send a write * request to PGLUE. Then when PXP is reset, PGLUE would try to * read the payload data from PSWWR, but PSWWR would not * respond. The write queue in PGLUE would stuck, dmae commands * would not return. Therefore it's important to reset the second * reset register (containing the * MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR bit) before the * first one (containing the MISC_REGISTERS_RESET_REG_1_RST_QM * bit). */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, reset_mask2 & (~not_reset_mask2)); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, reset_mask1 & (~not_reset_mask1)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, reset_mask2 & (~stay_reset2)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1); wmb(); } static int bxe_process_kill(struct bxe_softc *sc, uint8_t global) { int cnt = 1000; uint32_t val = 0; uint32_t sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2; uint32_t tags_63_32 = 0; /* Empty the Tetris buffer, wait for 1s */ do { sr_cnt = REG_RD(sc, PXP2_REG_RD_SR_CNT); blk_cnt = REG_RD(sc, PXP2_REG_RD_BLK_CNT); port_is_idle_0 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_0); port_is_idle_1 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_1); pgl_exp_rom2 = REG_RD(sc, PXP2_REG_PGL_EXP_ROM2); if (CHIP_IS_E3(sc)) { tags_63_32 = REG_RD(sc, PGLUE_B_REG_TAGS_63_32); } if ((sr_cnt == 0x7e) && (blk_cnt == 0xa0) && ((port_is_idle_0 & 0x1) == 0x1) && ((port_is_idle_1 & 0x1) == 0x1) && (pgl_exp_rom2 == 0xffffffff) && (!CHIP_IS_E3(sc) || (tags_63_32 == 0xffffffff))) break; DELAY(1000); } while (cnt-- > 0); if (cnt <= 0) { BLOGE(sc, "ERROR: Tetris buffer didn't get empty or there " "are still outstanding read requests after 1s! " "sr_cnt=0x%08x, blk_cnt=0x%08x, port_is_idle_0=0x%08x, " "port_is_idle_1=0x%08x, pgl_exp_rom2=0x%08x\n", sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2); return (-1); } mb(); /* Close gates #2, #3 and #4 */ bxe_set_234_gates(sc, TRUE); /* Poll for IGU VQs for 57712 and newer chips */ if (!CHIP_IS_E1x(sc) && bxe_er_poll_igu_vq(sc)) { return (-1); } /* XXX indicate that "process kill" is in progress to MCP */ /* clear "unprepared" bit */ REG_WR(sc, MISC_REG_UNPREPARED, 0); mb(); /* Make sure all is written to the chip before the reset */ wmb(); /* * Wait for 1ms to empty GLUE and PCI-E core queues, * PSWHST, GRC and PSWRD Tetris buffer. */ DELAY(1000); /* Prepare to chip reset: */ /* MCP */ if (global) { bxe_reset_mcp_prep(sc, &val); } /* PXP */ bxe_pxp_prep(sc); mb(); /* reset the chip */ bxe_process_kill_chip_reset(sc, global); mb(); /* clear errors in PGB */ if (!CHIP_IS_E1(sc)) REG_WR(sc, PGLUE_B_REG_LATCHED_ERRORS_CLR, 0x7f); /* Recover after reset: */ /* MCP */ if (global && bxe_reset_mcp_comp(sc, val)) { return (-1); } /* XXX add resetting the NO_MCP mode DB here */ /* Open the gates #2, #3 and #4 */ bxe_set_234_gates(sc, FALSE); /* XXX * IGU/AEU preparation bring back the AEU/IGU to a reset state * re-enable attentions */ return (0); } static int bxe_leader_reset(struct bxe_softc *sc) { int rc = 0; uint8_t global = bxe_reset_is_global(sc); uint32_t load_code; /* * If not going to reset MCP, load "fake" driver to reset HW while * driver is owner of the HW. */ if (!global && !BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset; } if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { BLOGE(sc, "MCP unexpected response, aborting\n"); rc = -1; goto exit_leader_reset2; } load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset2; } } /* try to recover after the failure */ if (bxe_process_kill(sc, global)) { BLOGE(sc, "Something bad occurred on engine %d!\n", SC_PATH(sc)); rc = -1; goto exit_leader_reset2; } /* * Clear the RESET_IN_PROGRESS and RESET_GLOBAL bits and update the driver * state. */ bxe_set_reset_done(sc); if (global) { bxe_clear_reset_global(sc); } exit_leader_reset2: /* unload "fake driver" if it was loaded */ if (!global && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } exit_leader_reset: sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); return (rc); } /* * prepare INIT transition, parameters configured: * - HC configuration * - Queue's CDU context */ static void bxe_pf_q_prep_init(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_queue_init_params *init_params) { uint8_t cos; int cxt_index, cxt_offset; bxe_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags); /* HC rate */ init_params->rx.hc_rate = sc->hc_rx_ticks ? (1000000 / sc->hc_rx_ticks) : 0; init_params->tx.hc_rate = sc->hc_tx_ticks ? (1000000 / sc->hc_tx_ticks) : 0; /* FW SB ID */ init_params->rx.fw_sb_id = init_params->tx.fw_sb_id = fp->fw_sb_id; /* CQ index among the SB indices */ init_params->rx.sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; init_params->tx.sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS; /* set maximum number of COSs supported by this queue */ init_params->max_cos = sc->max_cos; BLOGD(sc, DBG_LOAD, "fp %d setting queue params max cos to %d\n", fp->index, init_params->max_cos); /* set the context pointers queue object */ for (cos = FIRST_TX_COS_INDEX; cos < init_params->max_cos; cos++) { /* XXX change index/cid here if ever support multiple tx CoS */ /* fp->txdata[cos]->cid */ cxt_index = fp->index / ILT_PAGE_CIDS; cxt_offset = fp->index - (cxt_index * ILT_PAGE_CIDS); init_params->cxts[cos] = &sc->context[cxt_index].vcxt[cxt_offset].eth; } } /* set flags that are common for the Tx-only and not normal connections */ static unsigned long bxe_get_common_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t zero_stats) { unsigned long flags = 0; /* PF driver will always initialize the Queue to an ACTIVE state */ bxe_set_bit(ECORE_Q_FLG_ACTIVE, &flags); /* * tx only connections collect statistics (on the same index as the * parent connection). The statistics are zeroed when the parent * connection is initialized. */ bxe_set_bit(ECORE_Q_FLG_STATS, &flags); if (zero_stats) { bxe_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags); } /* * tx only connections can support tx-switching, though their * CoS-ness doesn't survive the loopback */ if (sc->flags & BXE_TX_SWITCHING) { bxe_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags); } bxe_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags); return (flags); } static unsigned long bxe_get_q_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { unsigned long flags = 0; if (IS_MF_SD(sc)) { bxe_set_bit(ECORE_Q_FLG_OV, &flags); } if (sc->ifnet->if_capenable & IFCAP_LRO) { bxe_set_bit(ECORE_Q_FLG_TPA, &flags); bxe_set_bit(ECORE_Q_FLG_TPA_IPV6, &flags); #if 0 if (fp->mode == TPA_MODE_GRO) __set_bit(ECORE_Q_FLG_TPA_GRO, &flags); #endif } if (leading) { bxe_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags); bxe_set_bit(ECORE_Q_FLG_MCAST, &flags); } bxe_set_bit(ECORE_Q_FLG_VLAN, &flags); #if 0 /* configure silent vlan removal */ if (IS_MF_AFEX(sc)) { bxe_set_bit(ECORE_Q_FLG_SILENT_VLAN_REM, &flags); } #endif /* merge with common flags */ return (flags | bxe_get_common_flags(sc, fp, TRUE)); } static void bxe_pf_q_prep_general(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_general_setup_params *gen_init, uint8_t cos) { gen_init->stat_id = bxe_stats_id(fp); gen_init->spcl_id = fp->cl_id; gen_init->mtu = sc->mtu; gen_init->cos = cos; } static void bxe_pf_rx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct rxq_pause_params *pause, struct ecore_rxq_setup_params *rxq_init) { uint8_t max_sge = 0; uint16_t sge_sz = 0; uint16_t tpa_agg_size = 0; if (sc->ifnet->if_capenable & IFCAP_LRO) { pause->sge_th_lo = SGE_TH_LO(sc); pause->sge_th_hi = SGE_TH_HI(sc); /* validate SGE ring has enough to cross high threshold */ if (sc->dropless_fc && (pause->sge_th_hi + FW_PREFETCH_CNT) > (RX_SGE_USABLE_PER_PAGE * RX_SGE_NUM_PAGES)) { BLOGW(sc, "sge ring threshold limit\n"); } /* minimum max_aggregation_size is 2*MTU (two full buffers) */ tpa_agg_size = (2 * sc->mtu); if (tpa_agg_size < sc->max_aggregation_size) { tpa_agg_size = sc->max_aggregation_size; } max_sge = SGE_PAGE_ALIGN(sc->mtu) >> SGE_PAGE_SHIFT; max_sge = ((max_sge + PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >> PAGES_PER_SGE_SHIFT; sge_sz = (uint16_t)min(SGE_PAGES, 0xffff); } /* pause - not for e1 */ if (!CHIP_IS_E1(sc)) { pause->bd_th_lo = BD_TH_LO(sc); pause->bd_th_hi = BD_TH_HI(sc); pause->rcq_th_lo = RCQ_TH_LO(sc); pause->rcq_th_hi = RCQ_TH_HI(sc); /* validate rings have enough entries to cross high thresholds */ if (sc->dropless_fc && pause->bd_th_hi + FW_PREFETCH_CNT > sc->rx_ring_size) { BLOGW(sc, "rx bd ring threshold limit\n"); } if (sc->dropless_fc && pause->rcq_th_hi + FW_PREFETCH_CNT > RCQ_NUM_PAGES * RCQ_USABLE_PER_PAGE) { BLOGW(sc, "rcq ring threshold limit\n"); } pause->pri_map = 1; } /* rxq setup */ rxq_init->dscr_map = fp->rx_dma.paddr; rxq_init->sge_map = fp->rx_sge_dma.paddr; rxq_init->rcq_map = fp->rcq_dma.paddr; rxq_init->rcq_np_map = (fp->rcq_dma.paddr + BCM_PAGE_SIZE); /* * This should be a maximum number of data bytes that may be * placed on the BD (not including paddings). */ rxq_init->buf_sz = (fp->rx_buf_size - IP_HEADER_ALIGNMENT_PADDING); rxq_init->cl_qzone_id = fp->cl_qzone_id; rxq_init->tpa_agg_sz = tpa_agg_size; rxq_init->sge_buf_sz = sge_sz; rxq_init->max_sges_pkt = max_sge; rxq_init->rss_engine_id = SC_FUNC(sc); rxq_init->mcast_engine_id = SC_FUNC(sc); /* * Maximum number or simultaneous TPA aggregation for this Queue. * For PF Clients it should be the maximum available number. * VF driver(s) may want to define it to a smaller value. */ rxq_init->max_tpa_queues = MAX_AGG_QS(sc); rxq_init->cache_line_log = BXE_RX_ALIGN_SHIFT; rxq_init->fw_sb_id = fp->fw_sb_id; rxq_init->sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; /* * configure silent vlan removal * if multi function mode is afex, then mask default vlan */ if (IS_MF_AFEX(sc)) { rxq_init->silent_removal_value = sc->devinfo.mf_info.afex_def_vlan_tag; rxq_init->silent_removal_mask = EVL_VLID_MASK; } } static void bxe_pf_tx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_txq_setup_params *txq_init, uint8_t cos) { /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. * fp->txdata[cos]->tx_dma.paddr; */ txq_init->dscr_map = fp->tx_dma.paddr; txq_init->sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS + cos; txq_init->traffic_type = LLFC_TRAFFIC_TYPE_NW; txq_init->fw_sb_id = fp->fw_sb_id; /* * set the TSS leading client id for TX classfication to the * leading RSS client id */ txq_init->tss_leading_cl_id = BXE_FP(sc, 0, cl_id); } /* * This function performs 2 steps in a queue state machine: * 1) RESET->INIT * 2) INIT->SETUP */ static int bxe_setup_queue(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { struct ecore_queue_state_params q_params = { NULL }; struct ecore_queue_setup_params *setup_params = &q_params.params.setup; #if 0 struct ecore_queue_setup_tx_only_params *tx_only_params = &q_params.params.tx_only; uint8_t tx_index; #endif int rc; BLOGD(sc, DBG_LOAD, "setting up queue %d\n", fp->index); bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); q_params.q_obj = &BXE_SP_OBJ(sc, fp).q_obj; /* we want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* prepare the INIT parameters */ bxe_pf_q_prep_init(sc, fp, &q_params.params.init); /* Set the command */ q_params.cmd = ECORE_Q_CMD_INIT; /* Change the state to INIT */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) INIT failed\n", fp->index); return (rc); } BLOGD(sc, DBG_LOAD, "init complete\n"); /* now move the Queue to the SETUP state */ memset(setup_params, 0, sizeof(*setup_params)); /* set Queue flags */ setup_params->flags = bxe_get_q_flags(sc, fp, leading); /* set general SETUP parameters */ bxe_pf_q_prep_general(sc, fp, &setup_params->gen_params, FIRST_TX_COS_INDEX); bxe_pf_rx_q_prep(sc, fp, &setup_params->pause_params, &setup_params->rxq_params); bxe_pf_tx_q_prep(sc, fp, &setup_params->txq_params, FIRST_TX_COS_INDEX); /* Set the command */ q_params.cmd = ECORE_Q_CMD_SETUP; /* change the state to SETUP */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) SETUP failed\n", fp->index); return (rc); } #if 0 /* loop through the relevant tx-only indices */ for (tx_index = FIRST_TX_ONLY_COS_INDEX; tx_index < sc->max_cos; tx_index++) { /* prepare and send tx-only ramrod*/ rc = bxe_setup_tx_only(sc, fp, &q_params, tx_only_params, tx_index, leading); if (rc) { BLOGE(sc, "Queue(%d.%d) TX_ONLY_SETUP failed\n", fp->index, tx_index); return (rc); } } #endif return (rc); } static int bxe_setup_leading(struct bxe_softc *sc) { return (bxe_setup_queue(sc, &sc->fp[0], TRUE)); } static int bxe_config_rss_pf(struct bxe_softc *sc, struct ecore_rss_config_obj *rss_obj, uint8_t config_hash) { struct ecore_config_rss_params params = { NULL }; int i; /* * Although RSS is meaningless when there is a single HW queue we * still need it enabled in order to have HW Rx hash generated. */ params.rss_obj = rss_obj; bxe_set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags); bxe_set_bit(ECORE_RSS_MODE_REGULAR, ¶ms.rss_flags); /* RSS configuration */ bxe_set_bit(ECORE_RSS_IPV4, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV4_TCP, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6_TCP, ¶ms.rss_flags); if (rss_obj->udp_rss_v4) { bxe_set_bit(ECORE_RSS_IPV4_UDP, ¶ms.rss_flags); } if (rss_obj->udp_rss_v6) { bxe_set_bit(ECORE_RSS_IPV6_UDP, ¶ms.rss_flags); } /* Hash bits */ params.rss_result_mask = MULTI_MASK; memcpy(params.ind_table, rss_obj->ind_table, sizeof(params.ind_table)); if (config_hash) { /* RSS keys */ for (i = 0; i < sizeof(params.rss_key) / 4; i++) { params.rss_key[i] = arc4random(); } bxe_set_bit(ECORE_RSS_SET_SRCH, ¶ms.rss_flags); } return (ecore_config_rss(sc, ¶ms)); } static int bxe_config_rss_eth(struct bxe_softc *sc, uint8_t config_hash) { return (bxe_config_rss_pf(sc, &sc->rss_conf_obj, config_hash)); } static int bxe_init_rss_pf(struct bxe_softc *sc) { uint8_t num_eth_queues = BXE_NUM_ETH_QUEUES(sc); int i; /* * Prepare the initial contents of the indirection table if * RSS is enabled */ for (i = 0; i < sizeof(sc->rss_conf_obj.ind_table); i++) { sc->rss_conf_obj.ind_table[i] = (sc->fp->cl_id + (i % num_eth_queues)); } if (sc->udp_rss) { sc->rss_conf_obj.udp_rss_v4 = sc->rss_conf_obj.udp_rss_v6 = 1; } /* * For 57710 and 57711 SEARCHER configuration (rss_keys) is * per-port, so if explicit configuration is needed, do it only * for a PMF. * * For 57712 and newer it's a per-function configuration. */ return (bxe_config_rss_eth(sc, sc->port.pmf || !CHIP_IS_E1x(sc))); } static int bxe_set_mac_one(struct bxe_softc *sc, uint8_t *mac, struct ecore_vlan_mac_obj *obj, uint8_t set, int mac_type, unsigned long *ramrod_flags) { struct ecore_vlan_mac_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* fill in general parameters */ ramrod_param.vlan_mac_obj = obj; ramrod_param.ramrod_flags = *ramrod_flags; /* fill a user request section if needed */ if (!bxe_test_bit(RAMROD_CONT, ramrod_flags)) { memcpy(ramrod_param.user_req.u.mac.mac, mac, ETH_ALEN); bxe_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags); /* Set the command: ADD or DEL */ ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD : ECORE_VLAN_MAC_DEL; } rc = ecore_config_vlan_mac(sc, &ramrod_param); if (rc == ECORE_EXISTS) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "%s MAC failed (%d)\n", (set ? "Set" : "Delete"), rc); } return (rc); } static int bxe_set_eth_mac(struct bxe_softc *sc, uint8_t set) { unsigned long ramrod_flags = 0; BLOGD(sc, DBG_LOAD, "Adding Ethernet MAC\n"); bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Eth MAC is set on RSS leading client (fp[0]) */ return (bxe_set_mac_one(sc, sc->link_params.mac_addr, &sc->sp_objs->mac_obj, set, ECORE_ETH_MAC, &ramrod_flags)); } #if 0 static void bxe_update_max_mf_config(struct bxe_softc *sc, uint32_t value) { /* load old values */ uint32_t mf_cfg = sc->devinfo.mf_info.mf_config[SC_VN(sc)]; if (value != bxe_extract_max_cfg(sc, mf_cfg)) { /* leave all but MAX value */ mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK; /* set new MAX value */ mf_cfg |= ((value << FUNC_MF_CFG_MAX_BW_SHIFT) & FUNC_MF_CFG_MAX_BW_MASK); bxe_fw_command(sc, DRV_MSG_CODE_SET_MF_BW, mf_cfg); } } #endif static int bxe_get_cur_phy_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = 0; if (sc->link_params.num_phys <= 1) { return (ELINK_INT_PHY); } if (sc->link_vars.link_up) { sel_phy_idx = ELINK_EXT_PHY1; /* In case link is SERDES, check if the ELINK_EXT_PHY2 is the one */ if ((sc->link_vars.link_status & LINK_STATUS_SERDES_LINK) && (sc->link_params.phy[ELINK_EXT_PHY2].supported & ELINK_SUPPORTED_FIBRE)) sel_phy_idx = ELINK_EXT_PHY2; } else { switch (elink_phy_selection(&sc->link_params)) { case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY1; break; case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY: case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY2; break; } } return (sel_phy_idx); } static int bxe_get_link_cfg_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = bxe_get_cur_phy_idx(sc); /* * The selected activated PHY is always after swapping (in case PHY * swapping is enabled). So when swapping is enabled, we need to reverse * the configuration */ if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { if (sel_phy_idx == ELINK_EXT_PHY1) sel_phy_idx = ELINK_EXT_PHY2; else if (sel_phy_idx == ELINK_EXT_PHY2) sel_phy_idx = ELINK_EXT_PHY1; } return (ELINK_LINK_CONFIG_IDX(sel_phy_idx)); } static void bxe_set_requested_fc(struct bxe_softc *sc) { /* * Initialize link parameters structure variables * It is recommended to turn off RX FC for jumbo frames * for better performance */ if (CHIP_IS_E1x(sc) && (sc->mtu > 5000)) { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_TX; } else { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_BOTH; } } static void bxe_calc_fc_adv(struct bxe_softc *sc) { uint8_t cfg_idx = bxe_get_link_cfg_idx(sc); switch (sc->link_vars.ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) { case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE: default: sc->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | ADVERTISED_Pause); break; case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH: sc->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause | ADVERTISED_Pause); break; case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC: sc->port.advertising[cfg_idx] |= ADVERTISED_Asym_Pause; break; } } static uint16_t bxe_get_mf_speed(struct bxe_softc *sc) { uint16_t line_speed = sc->link_vars.line_speed; if (IS_MF(sc)) { uint16_t maxCfg = bxe_extract_max_cfg(sc, sc->devinfo.mf_info.mf_config[SC_VN(sc)]); /* calculate the current MAX line speed limit for the MF devices */ if (IS_MF_SI(sc)) { line_speed = (line_speed * maxCfg) / 100; } else { /* SD mode */ uint16_t vn_max_rate = maxCfg * 100; if (vn_max_rate < line_speed) { line_speed = vn_max_rate; } } } return (line_speed); } static void bxe_fill_report_data(struct bxe_softc *sc, struct bxe_link_report_data *data) { uint16_t line_speed = bxe_get_mf_speed(sc); memset(data, 0, sizeof(*data)); /* fill the report data with the effective line speed */ data->line_speed = line_speed; /* Link is down */ if (!sc->link_vars.link_up || (sc->flags & BXE_MF_FUNC_DIS)) { bxe_set_bit(BXE_LINK_REPORT_LINK_DOWN, &data->link_report_flags); } /* Full DUPLEX */ if (sc->link_vars.duplex == DUPLEX_FULL) { bxe_set_bit(BXE_LINK_REPORT_FULL_DUPLEX, &data->link_report_flags); } /* Rx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) { bxe_set_bit(BXE_LINK_REPORT_RX_FC_ON, &data->link_report_flags); } /* Tx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { bxe_set_bit(BXE_LINK_REPORT_TX_FC_ON, &data->link_report_flags); } } /* report link status to OS, should be called under phy_lock */ static void bxe_link_report_locked(struct bxe_softc *sc) { struct bxe_link_report_data cur_data; /* reread mf_cfg */ if (IS_PF(sc) && !CHIP_IS_E1(sc)) { bxe_read_mf_cfg(sc); } /* Read the current link report info */ bxe_fill_report_data(sc, &cur_data); /* Don't report link down or exactly the same link status twice */ if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) || (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &sc->last_reported_link.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags))) { return; } sc->link_cnt++; /* report new link params and remember the state for the next time */ memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data)); if (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags)) { if_link_state_change(sc->ifnet, LINK_STATE_DOWN); BLOGI(sc, "NIC Link is Down\n"); } else { const char *duplex; const char *flow; if (bxe_test_and_clear_bit(BXE_LINK_REPORT_FULL_DUPLEX, &cur_data.link_report_flags)) { duplex = "full"; } else { duplex = "half"; } /* * Handle the FC at the end so that only these flags would be * possibly set. This way we may easily check if there is no FC * enabled. */ if (cur_data.link_report_flags) { if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive & transmit"; } else if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && !bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive"; } else if (!bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - transmit"; } else { flow = "none"; /* possible? */ } } else { flow = "none"; } if_link_state_change(sc->ifnet, LINK_STATE_UP); BLOGI(sc, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n", cur_data.line_speed, duplex, flow); } } static void bxe_link_report(struct bxe_softc *sc) { BXE_PHY_LOCK(sc); bxe_link_report_locked(sc); BXE_PHY_UNLOCK(sc); } static void bxe_link_status_update(struct bxe_softc *sc) { if (sc->state != BXE_STATE_OPEN) { return; } #if 0 /* read updated dcb configuration */ if (IS_PF(sc)) bxe_dcbx_pmf_update(sc); #endif if (IS_PF(sc) && !CHIP_REV_IS_SLOW(sc)) { elink_link_status_update(&sc->link_params, &sc->link_vars); } else { sc->port.supported[0] |= (ELINK_SUPPORTED_10baseT_Half | ELINK_SUPPORTED_10baseT_Full | ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full | ELINK_SUPPORTED_1000baseT_Full | ELINK_SUPPORTED_2500baseX_Full | ELINK_SUPPORTED_10000baseT_Full | ELINK_SUPPORTED_TP | ELINK_SUPPORTED_FIBRE | ELINK_SUPPORTED_Autoneg | ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause); sc->port.advertising[0] = sc->port.supported[0]; sc->link_params.sc = sc; sc->link_params.port = SC_PORT(sc); sc->link_params.req_duplex[0] = DUPLEX_FULL; sc->link_params.req_flow_ctrl[0] = ELINK_FLOW_CTRL_NONE; sc->link_params.req_line_speed[0] = SPEED_10000; sc->link_params.speed_cap_mask[0] = 0x7f0000; sc->link_params.switch_cfg = ELINK_SWITCH_CFG_10G; if (CHIP_REV_IS_FPGA(sc)) { sc->link_vars.mac_type = ELINK_MAC_TYPE_EMAC; sc->link_vars.line_speed = ELINK_SPEED_1000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_1000TFD); } else { sc->link_vars.mac_type = ELINK_MAC_TYPE_BMAC; sc->link_vars.line_speed = ELINK_SPEED_10000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_10GTFD); } sc->link_vars.link_up = 1; sc->link_vars.duplex = DUPLEX_FULL; sc->link_vars.flow_ctrl = ELINK_FLOW_CTRL_NONE; if (IS_PF(sc)) { REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + sc->link_params.port*4, 0); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } } if (IS_PF(sc)) { if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } else { bxe_stats_handle(sc, STATS_EVENT_STOP); } bxe_link_report(sc); } else { bxe_link_report(sc); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } } static int bxe_initial_phy_init(struct bxe_softc *sc, int load_mode) { int rc, cfg_idx = bxe_get_link_cfg_idx(sc); uint16_t req_line_speed = sc->link_params.req_line_speed[cfg_idx]; struct elink_params *lp = &sc->link_params; bxe_set_requested_fc(sc); if (CHIP_REV_IS_SLOW(sc)) { uint32_t bond = CHIP_BOND_ID(sc); uint32_t feat = 0; if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } else if (bond & 0x4) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } } else if (bond & 0x8) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } } /* disable EMAC for E3 and above */ if (bond & 0x2) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } sc->link_params.feature_config_flags |= feat; } BXE_PHY_LOCK(sc); if (load_mode == LOAD_DIAG) { lp->loopback_mode = ELINK_LOOPBACK_XGXS; /* Prefer doing PHY loopback at 10G speed, if possible */ if (lp->req_line_speed[cfg_idx] < ELINK_SPEED_10000) { if (lp->speed_cap_mask[cfg_idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { lp->req_line_speed[cfg_idx] = ELINK_SPEED_10000; } else { lp->req_line_speed[cfg_idx] = ELINK_SPEED_1000; } } } if (load_mode == LOAD_LOOPBACK_EXT) { lp->loopback_mode = ELINK_LOOPBACK_EXT; } rc = elink_phy_init(&sc->link_params, &sc->link_vars); BXE_PHY_UNLOCK(sc); bxe_calc_fc_adv(sc); if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } if (!CHIP_REV_IS_SLOW(sc)) { bxe_periodic_start(sc); } sc->link_params.req_line_speed[cfg_idx] = req_line_speed; return (rc); } /* must be called under IF_ADDR_LOCK */ static int bxe_init_mcast_macs_list(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p) { struct ifnet *ifp = sc->ifnet; int mc_count = 0; struct ifmultiaddr *ifma; struct ecore_mcast_list_elem *mc_mac; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) { continue; } mc_count++; } ECORE_LIST_INIT(&p->mcast_list); p->mcast_list_len = 0; if (!mc_count) { return (0); } mc_mac = malloc(sizeof(*mc_mac) * mc_count, M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!mc_mac) { BLOGE(sc, "Failed to allocate temp mcast list\n"); return (-1); } TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) { continue; } mc_mac->mac = (uint8_t *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); ECORE_LIST_PUSH_TAIL(&mc_mac->link, &p->mcast_list); BLOGD(sc, DBG_LOAD, "Setting MCAST %02X:%02X:%02X:%02X:%02X:%02X\n", mc_mac->mac[0], mc_mac->mac[1], mc_mac->mac[2], mc_mac->mac[3], mc_mac->mac[4], mc_mac->mac[5]); mc_mac++; } p->mcast_list_len = mc_count; return (0); } static void bxe_free_mcast_macs_list(struct ecore_mcast_ramrod_params *p) { struct ecore_mcast_list_elem *mc_mac = ECORE_LIST_FIRST_ENTRY(&p->mcast_list, struct ecore_mcast_list_elem, link); if (mc_mac) { /* only a single free as all mc_macs are in the same heap array */ free(mc_mac, M_DEVBUF); } } static int bxe_set_mc_list(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam = { NULL }; int rc = 0; rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* first, clear all configured multicast MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to clear multicast configuration: %d\n", rc); return (rc); } /* configure a new MACs list */ rc = bxe_init_mcast_macs_list(sc, &rparam); if (rc) { BLOGE(sc, "Failed to create mcast MACs list (%d)\n", rc); BXE_MCAST_UNLOCK(sc); return (rc); } /* Now add the new MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_ADD); if (rc < 0) { BLOGE(sc, "Failed to set new mcast config (%d)\n", rc); } bxe_free_mcast_macs_list(&rparam); BXE_MCAST_UNLOCK(sc); return (rc); } static int bxe_set_uc_list(struct bxe_softc *sc) { struct ifnet *ifp = sc->ifnet; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; struct ifaddr *ifa; unsigned long ramrod_flags = 0; int rc; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_addr_rlock(ifp); #endif /* first schedule a cleanup up of old configuration */ rc = bxe_del_all_macs(sc, mac_obj, ECORE_UC_LIST_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to schedule delete of all ETH MACs (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = ifp->if_addr; while (ifa) { if (ifa->ifa_addr->sa_family != AF_LINK) { ifa = TAILQ_NEXT(ifa, ifa_link); continue; } rc = bxe_set_mac_one(sc, (uint8_t *)LLADDR((struct sockaddr_dl *)ifa->ifa_addr), mac_obj, TRUE, ECORE_UC_LIST_MAC, &ramrod_flags); if (rc == -EEXIST) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as an error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "Failed to schedule ADD operations (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = TAILQ_NEXT(ifa, ifa_link); } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif /* Execute the pending commands */ bit_set(&ramrod_flags, RAMROD_CONT); return (bxe_set_mac_one(sc, NULL, mac_obj, FALSE /* don't care */, ECORE_UC_LIST_MAC, &ramrod_flags)); } static void bxe_handle_rx_mode_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; struct ifnet *ifp = sc->ifnet; uint32_t rx_mode = BXE_RX_MODE_NORMAL; BXE_CORE_LOCK(sc); if (sc->state != BXE_STATE_OPEN) { BLOGD(sc, DBG_SP, "state is %x, returning\n", sc->state); BXE_CORE_UNLOCK(sc); return; } BLOGD(sc, DBG_SP, "ifp->if_flags=0x%x\n", ifp->if_flags); if (ifp->if_flags & IFF_PROMISC) { rx_mode = BXE_RX_MODE_PROMISC; } else if ((ifp->if_flags & IFF_ALLMULTI) || ((ifp->if_amcount > BXE_MAX_MULTICAST) && CHIP_IS_E1(sc))) { rx_mode = BXE_RX_MODE_ALLMULTI; } else { if (IS_PF(sc)) { /* some multicasts */ if (bxe_set_mc_list(sc) < 0) { rx_mode = BXE_RX_MODE_ALLMULTI; } if (bxe_set_uc_list(sc) < 0) { rx_mode = BXE_RX_MODE_PROMISC; } } #if 0 else { /* * Configuring mcast to a VF involves sleeping (when we * wait for the PF's response). Since this function is * called from a non sleepable context we must schedule * a work item for this purpose */ bxe_set_bit(BXE_SP_RTNL_VFPF_MCAST, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); } #endif } sc->rx_mode = rx_mode; /* schedule the rx_mode command */ if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { BLOGD(sc, DBG_LOAD, "Scheduled setting rx_mode with ECORE...\n"); bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); BXE_CORE_UNLOCK(sc); return; } if (IS_PF(sc)) { bxe_set_storm_rx_mode(sc); } #if 0 else { /* * Configuring mcast to a VF involves sleeping (when we * wait for the PF's response). Since this function is * called from a non sleepable context we must schedule * a work item for this purpose */ bxe_set_bit(BXE_SP_RTNL_VFPF_STORM_RX_MODE, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); } #endif BXE_CORE_UNLOCK(sc); } static void bxe_set_rx_mode(struct bxe_softc *sc) { taskqueue_enqueue(sc->rx_mode_tq, &sc->rx_mode_tq_task); } /* update flags in shmem */ static void bxe_update_drv_flags(struct bxe_softc *sc, uint32_t flags, uint32_t set) { uint32_t drv_flags; if (SHMEM2_HAS(sc, drv_flags)) { bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); drv_flags = SHMEM2_RD(sc, drv_flags); if (set) { SET_FLAGS(drv_flags, flags); } else { RESET_FLAGS(drv_flags, flags); } SHMEM2_WR(sc, drv_flags, drv_flags); BLOGD(sc, DBG_LOAD, "drv_flags 0x%08x\n", drv_flags); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); } } /* periodic timer callout routine, only runs when the interface is up */ static void bxe_periodic_callout_func(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; int i; if (!BXE_CORE_TRYLOCK(sc)) { /* just bail and try again next time */ if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } return; } if ((sc->state != BXE_STATE_OPEN) || (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) { BLOGW(sc, "periodic callout exit (state=0x%x)\n", sc->state); BXE_CORE_UNLOCK(sc); return; } /* Check for TX timeouts on any fastpath. */ FOR_EACH_QUEUE(sc, i) { if (bxe_watchdog(sc, &sc->fp[i]) != 0) { /* Ruh-Roh, chip was reset! */ break; } } if (!CHIP_REV_IS_SLOW(sc)) { /* * This barrier is needed to ensure the ordering between the writing * to the sc->port.pmf in the bxe_nic_load() or bxe_pmf_update() and * the reading here. */ mb(); if (sc->port.pmf) { BXE_PHY_LOCK(sc); elink_period_func(&sc->link_params, &sc->link_vars); BXE_PHY_UNLOCK(sc); } } if (IS_PF(sc) && !BXE_NOMCP(sc)) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t drv_pulse; uint32_t mcp_pulse; ++sc->fw_drv_pulse_wr_seq; sc->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK; drv_pulse = sc->fw_drv_pulse_wr_seq; bxe_drv_pulse(sc); mcp_pulse = (SHMEM_RD(sc, func_mb[mb_idx].mcp_pulse_mb) & MCP_PULSE_SEQ_MASK); /* * The delta between driver pulse and mcp response should * be 1 (before mcp response) or 0 (after mcp response). */ if ((drv_pulse != mcp_pulse) && (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) { /* someone lost a heartbeat... */ BLOGE(sc, "drv_pulse (0x%x) != mcp_pulse (0x%x)\n", drv_pulse, mcp_pulse); } } /* state is BXE_STATE_OPEN */ bxe_stats_handle(sc, STATS_EVENT_UPDATE); #if 0 /* sample VF bulletin board for new posts from PF */ if (IS_VF(sc)) { bxe_sample_bulletin(sc); } #endif BXE_CORE_UNLOCK(sc); if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } } static void bxe_periodic_start(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_GO); callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } static void bxe_periodic_stop(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_STOP); callout_drain(&sc->periodic_callout); } /* start the controller */ static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode) { uint32_t val; int load_code = 0; int i, rc = 0; BXE_CORE_LOCK_ASSERT(sc); BLOGD(sc, DBG_LOAD, "Starting NIC load...\n"); sc->state = BXE_STATE_OPENING_WAITING_LOAD; if (IS_PF(sc)) { /* must be called before memory allocation and HW init */ bxe_ilt_set_info(sc); } sc->last_reported_link_state = LINK_STATE_UNKNOWN; bxe_set_fp_rx_buf_size(sc); if (bxe_alloc_fp_buffers(sc) != 0) { BLOGE(sc, "Failed to allocate fastpath memory\n"); sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_fw_stats_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (IS_PF(sc)) { /* set pf load just before approaching the MCP */ bxe_set_pf_load(sc); /* if MCP exists send load request and analyze response */ if (!BXE_NOMCP(sc)) { /* attempt to load pf */ if (bxe_nic_load_request(sc, &load_code) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error1; } /* what did the MCP say? */ if (bxe_nic_load_analyze_req(sc, load_code) != 0) { bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } else { BLOGI(sc, "Device has no MCP!\n"); load_code = bxe_nic_load_no_mcp(sc); } /* mark PMF if applicable */ bxe_nic_load_pmf(sc, load_code); /* Init Function state controlling object */ bxe_init_func_obj(sc); /* Initialize HW */ if (bxe_init_hw(sc, load_code) != 0) { BLOGE(sc, "HW init failed\n"); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } /* attach interrupts */ if (bxe_interrupt_attach(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } bxe_nic_init(sc, load_code); /* Init per-function objects */ if (IS_PF(sc)) { bxe_init_objs(sc); // XXX bxe_iov_nic_init(sc); /* set AFEX default VLAN tag to an invalid value */ sc->devinfo.mf_info.afex_def_vlan_tag = -1; // XXX bxe_nic_load_afex_dcc(sc, load_code); sc->state = BXE_STATE_OPENING_WAITING_PORT; rc = bxe_func_start(sc); if (rc) { BLOGE(sc, "Function start failed!\n"); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } /* send LOAD_DONE command to MCP */ if (!BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); sc->state = BXE_STATE_ERROR; rc = ENXIO; goto bxe_nic_load_error3; } } rc = bxe_setup_leading(sc); if (rc) { BLOGE(sc, "Setup leading failed!\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } FOR_EACH_NONDEFAULT_ETH_QUEUE(sc, i) { rc = bxe_setup_queue(sc, &sc->fp[i], FALSE); if (rc) { BLOGE(sc, "Queue(%d) setup failed\n", i); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } rc = bxe_init_rss_pf(sc); if (rc) { BLOGE(sc, "PF RSS init failed\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } /* XXX VF */ #if 0 else { /* VF */ FOR_EACH_ETH_QUEUE(sc, i) { rc = bxe_vfpf_setup_q(sc, i); if (rc) { BLOGE(sc, "Queue(%d) setup failed\n", i); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } } #endif /* now when Clients are configured we are ready to work */ sc->state = BXE_STATE_OPEN; /* Configure a ucast MAC */ if (IS_PF(sc)) { rc = bxe_set_eth_mac(sc, TRUE); } #if 0 else { /* IS_VF(sc) */ rc = bxe_vfpf_set_mac(sc); } #endif if (rc) { BLOGE(sc, "Setting Ethernet MAC failed\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } #if 0 if (IS_PF(sc) && sc->pending_max) { /* for AFEX */ bxe_update_max_mf_config(sc, sc->pending_max); sc->pending_max = 0; } #endif if (sc->port.pmf) { rc = bxe_initial_phy_init(sc, /* XXX load_mode */LOAD_OPEN); if (rc) { sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_BOOT_FROM_SAN; /* start fast path */ /* Initialize Rx filter */ bxe_set_rx_mode(sc); /* start the Tx */ switch (/* XXX load_mode */LOAD_OPEN) { case LOAD_NORMAL: case LOAD_OPEN: break; case LOAD_DIAG: case LOAD_LOOPBACK_EXT: sc->state = BXE_STATE_DIAG; break; default: break; } if (sc->port.pmf) { bxe_update_drv_flags(sc, 1 << DRV_FLAGS_PORT_MASK, 0); } else { bxe_link_status_update(sc); } /* start the periodic timer callout */ bxe_periodic_start(sc); if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { /* mark driver is loaded in shmem2 */ val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], (val | DRV_FLAGS_CAPABILITIES_LOADED_SUPPORTED | DRV_FLAGS_CAPABILITIES_LOADED_L2)); } /* wait for all pending SP commands to complete */ if (IS_PF(sc) && !bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Timeout waiting for all SPs to complete!\n"); bxe_periodic_stop(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, FALSE); return (ENXIO); } #if 0 /* If PMF - send ADMIN DCBX msg to MFW to initiate DCBX FSM */ if (sc->port.pmf && (sc->state != BXE_STATE_DIAG)) { bxe_dcbx_init(sc, FALSE); } #endif /* Tell the stack the driver is running! */ sc->ifnet->if_drv_flags = IFF_DRV_RUNNING; BLOGD(sc, DBG_LOAD, "NIC successfully loaded\n"); return (0); bxe_nic_load_error3: if (IS_PF(sc)) { bxe_int_disable_sync(sc, 1); /* clean out queued objects */ bxe_squeeze_objects(sc); } bxe_interrupt_detach(sc); bxe_nic_load_error2: if (IS_PF(sc) && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } sc->port.pmf = 0; bxe_nic_load_error1: /* clear pf_load status, as it was already set */ if (IS_PF(sc)) { bxe_clear_pf_load(sc); } bxe_nic_load_error0: bxe_free_fw_stats_mem(sc); bxe_free_fp_buffers(sc); bxe_free_mem(sc); return (rc); } static int bxe_init_locked(struct bxe_softc *sc) { int other_engine = SC_PATH(sc) ? 0 : 1; uint8_t other_load_status, load_status; uint8_t global = FALSE; int rc; BXE_CORE_LOCK_ASSERT(sc); /* check if the driver is already running */ if (sc->ifnet->if_drv_flags & IFF_DRV_RUNNING) { BLOGD(sc, DBG_LOAD, "Init called while driver is running!\n"); return (0); } bxe_set_power_state(sc, PCI_PM_D0); /* * If parity occurred during the unload, then attentions and/or * RECOVERY_IN_PROGRES may still be set. If so we want the first function * loaded on the current engine to complete the recovery. Parity recovery * is only relevant for PF driver. */ if (IS_PF(sc)) { other_load_status = bxe_get_load_status(sc, other_engine); load_status = bxe_get_load_status(sc, SC_PATH(sc)); if (!bxe_reset_is_done(sc, SC_PATH(sc)) || bxe_chk_parity_attn(sc, &global, TRUE)) { do { /* * If there are attentions and they are in global blocks, set * the GLOBAL_RESET bit regardless whether it will be this * function that will complete the recovery or not. */ if (global) { bxe_set_reset_global(sc); } /* * Only the first function on the current engine should try * to recover in open. In case of attentions in global blocks * only the first in the chip should try to recover. */ if ((!load_status && (!global || !other_load_status)) && bxe_trylock_leader_lock(sc) && !bxe_leader_reset(sc)) { BLOGI(sc, "Recovered during init\n"); break; } /* recovery has failed... */ bxe_set_power_state(sc, PCI_PM_D3hot); sc->recovery_state = BXE_RECOVERY_FAILED; BLOGE(sc, "Recovery flow hasn't properly " "completed yet, try again later. " "If you still see this message after a " "few retries then power cycle is required.\n"); rc = ENXIO; goto bxe_init_locked_done; } while (0); } } sc->recovery_state = BXE_RECOVERY_DONE; rc = bxe_nic_load(sc, LOAD_OPEN); bxe_init_locked_done: if (rc) { /* Tell the stack the driver is NOT running! */ BLOGE(sc, "Initialization failed, " "stack notified driver is NOT running!\n"); sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING; } return (rc); } static int bxe_stop_locked(struct bxe_softc *sc) { BXE_CORE_LOCK_ASSERT(sc); return (bxe_nic_unload(sc, UNLOAD_NORMAL, TRUE)); } /* * Handles controller initialization when called from an unlocked routine. * ifconfig calls this function. * * Returns: * void */ static void bxe_init(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BXE_CORE_LOCK(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } static int bxe_init_ifnet(struct bxe_softc *sc) { struct ifnet *ifp; /* ifconfig entrypoint for media type/status reporting */ ifmedia_init(&sc->ifmedia, IFM_IMASK, bxe_ifmedia_update, bxe_ifmedia_status); /* set the default interface values */ ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_FDX | sc->media), 0, NULL); ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&sc->ifmedia, (IFM_ETHER | IFM_AUTO)); sc->ifmedia.ifm_media = sc->ifmedia.ifm_cur->ifm_media; /* XXX ? */ /* allocate the ifnet structure */ if ((ifp = if_alloc(IFT_ETHER)) == NULL) { BLOGE(sc, "Interface allocation failed!\n"); return (ENXIO); } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ifp->if_ioctl = bxe_ioctl; ifp->if_start = bxe_tx_start; #if __FreeBSD_version >= 800000 ifp->if_transmit = bxe_tx_mq_start; ifp->if_qflush = bxe_mq_flush; #endif #ifdef FreeBSD8_0 ifp->if_timer = 0; #endif ifp->if_init = bxe_init; ifp->if_mtu = sc->mtu; ifp->if_hwassist = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6); ifp->if_capabilities = #if __FreeBSD_version < 700000 (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO); #else (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWCSUM | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_WOL_MAGIC); #endif ifp->if_capenable = ifp->if_capabilities; ifp->if_capenable &= ~IFCAP_WOL_MAGIC; /* XXX not yet... */ #if __FreeBSD_version < 1000025 ifp->if_baudrate = 1000000000; #else if_initbaudrate(ifp, IF_Gbps(10)); #endif ifp->if_snd.ifq_drv_maxlen = sc->tx_ring_size; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); sc->ifnet = ifp; /* attach to the Ethernet interface list */ ether_ifattach(ifp, sc->link_params.mac_addr); return (0); } static void bxe_deallocate_bars(struct bxe_softc *sc) { int i; for (i = 0; i < MAX_BARS; i++) { if (sc->bar[i].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->bar[i].rid, sc->bar[i].resource); BLOGD(sc, DBG_LOAD, "Released PCI BAR%d [%02x] memory\n", i, PCIR_BAR(i)); } } } static int bxe_allocate_bars(struct bxe_softc *sc) { u_int flags; int i; memset(sc->bar, 0, sizeof(sc->bar)); for (i = 0; i < MAX_BARS; i++) { /* memory resources reside at BARs 0, 2, 4 */ /* Run `pciconf -lb` to see mappings */ if ((i != 0) && (i != 2) && (i != 4)) { continue; } sc->bar[i].rid = PCIR_BAR(i); flags = RF_ACTIVE; if (i == 0) { flags |= RF_SHAREABLE; } if ((sc->bar[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->bar[i].rid, flags)) == NULL) { #if 0 /* BAR4 doesn't exist for E1 */ BLOGE(sc, "PCI BAR%d [%02x] memory allocation failed\n", i, PCIR_BAR(i)); #endif return (0); } sc->bar[i].tag = rman_get_bustag(sc->bar[i].resource); sc->bar[i].handle = rman_get_bushandle(sc->bar[i].resource); sc->bar[i].kva = (vm_offset_t)rman_get_virtual(sc->bar[i].resource); BLOGI(sc, "PCI BAR%d [%02x] memory allocated: %p-%p (%ld) -> %p\n", i, PCIR_BAR(i), (void *)rman_get_start(sc->bar[i].resource), (void *)rman_get_end(sc->bar[i].resource), rman_get_size(sc->bar[i].resource), (void *)sc->bar[i].kva); } return (0); } static void bxe_get_function_num(struct bxe_softc *sc) { uint32_t val = 0; /* * Read the ME register to get the function number. The ME register * holds the relative-function number and absolute-function number. The * absolute-function number appears only in E2 and above. Before that * these bits always contained zero, therefore we cannot blindly use them. */ val = REG_RD(sc, BAR_ME_REGISTER); sc->pfunc_rel = (uint8_t)((val & ME_REG_PF_NUM) >> ME_REG_PF_NUM_SHIFT); sc->path_id = (uint8_t)((val & ME_REG_ABS_PF_NUM) >> ME_REG_ABS_PF_NUM_SHIFT) & 1; if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { sc->pfunc_abs = ((sc->pfunc_rel << 1) | sc->path_id); } else { sc->pfunc_abs = (sc->pfunc_rel | sc->path_id); } BLOGD(sc, DBG_LOAD, "Relative function %d, Absolute function %d, Path %d\n", sc->pfunc_rel, sc->pfunc_abs, sc->path_id); } static uint32_t bxe_get_shmem_mf_cfg_base(struct bxe_softc *sc) { uint32_t shmem2_size; uint32_t offset; uint32_t mf_cfg_offset_value; /* Non 57712 */ offset = (SHMEM_RD(sc, func_mb) + (MAX_FUNC_NUM * sizeof(struct drv_func_mb))); /* 57712 plus */ if (sc->devinfo.shmem2_base != 0) { shmem2_size = SHMEM2_RD(sc, size); if (shmem2_size > offsetof(struct shmem2_region, mf_cfg_addr)) { mf_cfg_offset_value = SHMEM2_RD(sc, mf_cfg_addr); if (SHMEM_MF_CFG_ADDR_NONE != mf_cfg_offset_value) { offset = mf_cfg_offset_value; } } } return (offset); } static uint32_t bxe_pcie_capability_read(struct bxe_softc *sc, int reg, int width) { int pcie_reg; /* ensure PCIe capability is enabled */ if (pci_find_cap(sc->dev, PCIY_EXPRESS, &pcie_reg) == 0) { if (pcie_reg != 0) { BLOGD(sc, DBG_LOAD, "PCIe capability at 0x%04x\n", pcie_reg); return (pci_read_config(sc->dev, (pcie_reg + reg), width)); } } BLOGE(sc, "PCIe capability NOT FOUND!!!\n"); return (0); } static uint8_t bxe_is_pcie_pending(struct bxe_softc *sc) { return (bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_STA, 2) & PCIM_EXP_STA_TRANSACTION_PND); } /* * Walk the PCI capabiites list for the device to find what features are * supported. These capabilites may be enabled/disabled by firmware so it's * best to walk the list rather than make assumptions. */ static void bxe_probe_pci_caps(struct bxe_softc *sc) { uint16_t link_status; int reg; /* check if PCI Power Management is enabled */ if (pci_find_cap(sc->dev, PCIY_PMG, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found PM capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_PM_CAPABLE_FLAG; sc->devinfo.pcie_pm_cap_reg = (uint16_t)reg; } } link_status = bxe_pcie_capability_read(sc, PCIR_EXPRESS_LINK_STA, 2); /* handle PCIe 2.0 workarounds for 57710 */ if (CHIP_IS_E1(sc)) { /* workaround for 57710 errata E4_57710_27462 */ sc->devinfo.pcie_link_speed = (REG_RD(sc, 0x3d04) & (1 << 24)) ? 2 : 1; /* workaround for 57710 errata E4_57710_27488 */ sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); if (sc->devinfo.pcie_link_speed > 1) { sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4) >> 1; } } else { sc->devinfo.pcie_link_speed = (link_status & PCIM_LINK_STA_SPEED); sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); } BLOGD(sc, DBG_LOAD, "PCIe link speed=%d width=%d\n", sc->devinfo.pcie_link_speed, sc->devinfo.pcie_link_width); sc->devinfo.pcie_cap_flags |= BXE_PCIE_CAPABLE_FLAG; sc->devinfo.pcie_pcie_cap_reg = (uint16_t)reg; /* check if MSI capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSI, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSI_CAPABLE_FLAG; sc->devinfo.pcie_msi_cap_reg = (uint16_t)reg; } } /* check if MSI-X capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSIX, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI-X capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSIX_CAPABLE_FLAG; sc->devinfo.pcie_msix_cap_reg = (uint16_t)reg; } } } static int bxe_get_shmem_mf_cfg_info_sd(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* get the outer vlan if we're in switch-dependent mode */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); mf_info->ext_id = (uint16_t)val; mf_info->multi_vnics_mode = 1; if (!VALID_OVLAN(mf_info->ext_id)) { BLOGE(sc, "Invalid VLAN (%d)\n", mf_info->ext_id); return (1); } /* get the capabilities */ if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_ISCSI) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ISCSI; } else if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_FCOE) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_FCOE; } else { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ETHERNET; } mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static uint32_t bxe_get_shmem_ext_proto_support_flags(struct bxe_softc *sc) { uint32_t retval = 0; uint32_t val; val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); if (val & MACP_FUNC_CFG_FLAGS_ENABLED) { if (val & MACP_FUNC_CFG_FLAGS_ETHERNET) { retval |= MF_PROTO_SUPPORT_ETHERNET; } if (val & MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD) { retval |= MF_PROTO_SUPPORT_ISCSI; } if (val & MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD) { retval |= MF_PROTO_SUPPORT_FCOE; } } return (retval); } static int bxe_get_shmem_mf_cfg_info_si(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* * There is no outer vlan if we're in switch-independent mode. * If the mac is valid then assume multi-function. */ val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); mf_info->multi_vnics_mode = ((val & MACP_FUNC_CFG_FLAGS_MASK) != 0); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_get_shmem_mf_cfg_info_niv(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t e1hov_tag; uint32_t func_config; uint32_t niv_config; mf_info->multi_vnics_mode = 1; e1hov_tag = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); func_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); niv_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].afex_config); mf_info->ext_id = (uint16_t)((e1hov_tag & FUNC_MF_CFG_E1HOV_TAG_MASK) >> FUNC_MF_CFG_E1HOV_TAG_SHIFT); mf_info->default_vlan = (uint16_t)((e1hov_tag & FUNC_MF_CFG_AFEX_VLAN_MASK) >> FUNC_MF_CFG_AFEX_VLAN_SHIFT); mf_info->niv_allowed_priorities = (uint8_t)((niv_config & FUNC_MF_CFG_AFEX_COS_FILTER_MASK) >> FUNC_MF_CFG_AFEX_COS_FILTER_SHIFT); mf_info->niv_default_cos = (uint8_t)((func_config & FUNC_MF_CFG_TRANSMIT_PRIORITY_MASK) >> FUNC_MF_CFG_TRANSMIT_PRIORITY_SHIFT); mf_info->afex_vlan_mode = ((niv_config & FUNC_MF_CFG_AFEX_VLAN_MODE_MASK) >> FUNC_MF_CFG_AFEX_VLAN_MODE_SHIFT); mf_info->niv_mba_enabled = ((niv_config & FUNC_MF_CFG_AFEX_MBA_ENABLED_MASK) >> FUNC_MF_CFG_AFEX_MBA_ENABLED_SHIFT); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_check_valid_mf_cfg(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t mf_cfg1; uint32_t mf_cfg2; uint32_t ovlan1; uint32_t ovlan2; uint8_t i, j; BLOGD(sc, DBG_LOAD, "MF config parameters for function %d\n", SC_PORT(sc)); BLOGD(sc, DBG_LOAD, "\tmf_config=0x%x\n", mf_info->mf_config[SC_VN(sc)]); BLOGD(sc, DBG_LOAD, "\tmulti_vnics_mode=%d\n", mf_info->multi_vnics_mode); BLOGD(sc, DBG_LOAD, "\tvnics_per_port=%d\n", mf_info->vnics_per_port); BLOGD(sc, DBG_LOAD, "\tovlan/vifid=%d\n", mf_info->ext_id); BLOGD(sc, DBG_LOAD, "\tmin_bw=%d/%d/%d/%d\n", mf_info->min_bw[0], mf_info->min_bw[1], mf_info->min_bw[2], mf_info->min_bw[3]); BLOGD(sc, DBG_LOAD, "\tmax_bw=%d/%d/%d/%d\n", mf_info->max_bw[0], mf_info->max_bw[1], mf_info->max_bw[2], mf_info->max_bw[3]); BLOGD(sc, DBG_LOAD, "\tmac_addr: %s\n", sc->mac_addr_str); /* various MF mode sanity checks... */ if (mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_HIDE) { BLOGE(sc, "Enumerated function %d is marked as hidden\n", SC_PORT(sc)); return (1); } if ((mf_info->vnics_per_port > 1) && !mf_info->multi_vnics_mode) { BLOGE(sc, "vnics_per_port=%d multi_vnics_mode=%d\n", mf_info->vnics_per_port, mf_info->multi_vnics_mode); return (1); } if (mf_info->mf_mode == MULTI_FUNCTION_SD) { /* vnic id > 0 must have valid ovlan in switch-dependent mode */ if ((SC_VN(sc) > 0) && !VALID_OVLAN(OVLAN(sc))) { BLOGE(sc, "mf_mode=SD vnic_id=%d ovlan=%d\n", SC_VN(sc), OVLAN(sc)); return (1); } if (!VALID_OVLAN(OVLAN(sc)) && mf_info->multi_vnics_mode) { BLOGE(sc, "mf_mode=SD multi_vnics_mode=%d ovlan=%d\n", mf_info->multi_vnics_mode, OVLAN(sc)); return (1); } /* * Verify all functions are either MF or SF mode. If MF, make sure * sure that all non-hidden functions have a valid ovlan. If SF, * make sure that all non-hidden functions have an invalid ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && (((mf_info->multi_vnics_mode) && !VALID_OVLAN(ovlan1)) || ((!mf_info->multi_vnics_mode) && VALID_OVLAN(ovlan1)))) { BLOGE(sc, "mf_mode=SD function %d MF config " "mismatch, multi_vnics_mode=%d ovlan=%d\n", i, mf_info->multi_vnics_mode, ovlan1); return (1); } } /* Verify all funcs on the same port each have a different ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); /* iterate from the next function on the port to the max func */ for (j = i + 2; j < MAX_FUNC_NUM; j += 2) { mf_cfg2 = MFCFG_RD(sc, func_mf_config[j].config); ovlan2 = MFCFG_RD(sc, func_mf_config[j].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan1) && !(mf_cfg2 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan2) && (ovlan1 == ovlan2)) { BLOGE(sc, "mf_mode=SD functions %d and %d " "have the same ovlan (%d)\n", i, j, ovlan1); return (1); } } } } /* MULTI_FUNCTION_SD */ return (0); } static int bxe_get_mf_cfg_info(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val, mac_upper; uint8_t i, vnic; /* initialize mf_info defaults */ mf_info->vnics_per_port = 1; mf_info->multi_vnics_mode = FALSE; mf_info->path_has_ovlan = FALSE; mf_info->mf_mode = SINGLE_FUNCTION; if (!CHIP_IS_MF_CAP(sc)) { return (0); } if (sc->devinfo.mf_cfg_base == SHMEM_MF_CFG_ADDR_NONE) { BLOGE(sc, "Invalid mf_cfg_base!\n"); return (1); } /* get the MF mode (switch dependent / independent / single-function) */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); switch (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK) { case SHARED_FEAT_CFG_FORCE_SF_MODE_SWITCH_INDEPT: mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); /* check for legal upper mac bytes */ if (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SI; } else { BLOGE(sc, "Invalid config for Switch Independent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_MF_ALLOWED: case SHARED_FEAT_CFG_FORCE_SF_MODE_SPIO4: /* get outer vlan configuration */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); if ((val & FUNC_MF_CFG_E1HOV_TAG_MASK) != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SD; } else { BLOGE(sc, "Invalid config for Switch Dependent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_FORCED_SF: /* not in MF mode, vnics_per_port=1 and multi_vnics_mode=FALSE */ return (0); case SHARED_FEAT_CFG_FORCE_SF_MODE_AFEX_MODE: /* * Mark MF mode as NIV if MCP version includes NPAR-SD support * and the MAC address is valid. */ mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); if ((SHMEM2_HAS(sc, afex_driver_support)) && (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT)) { mf_info->mf_mode = MULTI_FUNCTION_AFEX; } else { BLOGE(sc, "Invalid config for AFEX mode\n"); } break; default: BLOGE(sc, "Unknown MF mode (0x%08x)\n", (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK)); return (1); } /* set path mf_mode (which could be different than function mf_mode) */ if (mf_info->mf_mode == MULTI_FUNCTION_SD) { mf_info->path_has_ovlan = TRUE; } else if (mf_info->mf_mode == SINGLE_FUNCTION) { /* * Decide on path multi vnics mode. If we're not in MF mode and in * 4-port mode, this is good enough to check vnic-0 of the other port * on the same path */ if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { uint8_t other_port = !(PORT_ID(sc) & 1); uint8_t abs_func_other_port = (SC_PATH(sc) + (2 * other_port)); val = MFCFG_RD(sc, func_mf_config[abs_func_other_port].e1hov_tag); mf_info->path_has_ovlan = VALID_OVLAN((uint16_t)val) ? 1 : 0; } } if (mf_info->mf_mode == SINGLE_FUNCTION) { /* invalid MF config */ if (SC_VN(sc) >= 1) { BLOGE(sc, "VNIC ID >= 1 in SF mode\n"); return (1); } return (0); } /* get the MF configuration */ mf_info->mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); switch(mf_info->mf_mode) { case MULTI_FUNCTION_SD: bxe_get_shmem_mf_cfg_info_sd(sc); break; case MULTI_FUNCTION_SI: bxe_get_shmem_mf_cfg_info_si(sc); break; case MULTI_FUNCTION_AFEX: bxe_get_shmem_mf_cfg_info_niv(sc); break; default: BLOGE(sc, "Get MF config failed (mf_mode=0x%08x)\n", mf_info->mf_mode); return (1); } /* get the congestion management parameters */ vnic = 0; FOREACH_ABS_FUNC_IN_PORT(sc, i) { /* get min/max bw */ val = MFCFG_RD(sc, func_mf_config[i].config); mf_info->min_bw[vnic] = ((val & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT); mf_info->max_bw[vnic] = ((val & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); vnic++; } return (bxe_check_valid_mf_cfg(sc)); } static int bxe_get_shmem_info(struct bxe_softc *sc) { int port; uint32_t mac_hi, mac_lo, val; port = SC_PORT(sc); mac_hi = mac_lo = 0; sc->link_params.sc = sc; sc->link_params.port = port; /* get the hardware config info */ sc->devinfo.hw_config = SHMEM_RD(sc, dev_info.shared_hw_config.config); sc->devinfo.hw_config2 = SHMEM_RD(sc, dev_info.shared_hw_config.config2); sc->link_params.hw_led_mode = ((sc->devinfo.hw_config & SHARED_HW_CFG_LED_MODE_MASK) >> SHARED_HW_CFG_LED_MODE_SHIFT); /* get the port feature config */ sc->port.config = SHMEM_RD(sc, dev_info.port_feature_config[port].config), /* get the link params */ sc->link_params.speed_cap_mask[0] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask); sc->link_params.speed_cap_mask[1] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask2); /* get the lane config */ sc->link_params.lane_config = SHMEM_RD(sc, dev_info.port_hw_config[port].lane_config); /* get the link config */ val = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config); sc->port.link_config[ELINK_INT_PHY] = val; sc->link_params.switch_cfg = (val & PORT_FEATURE_CONNECTED_SWITCH_MASK); sc->port.link_config[ELINK_EXT_PHY1] = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config2); /* get the override preemphasis flag and enable it or turn it off */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); if (val & SHARED_FEAT_CFG_OVERRIDE_PREEMPHASIS_CFG_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } else { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } /* get the initial value of the link params */ sc->link_params.multi_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].multi_phy_config); /* get external phy info */ sc->port.ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); /* get the multifunction configuration */ bxe_get_mf_cfg_info(sc); /* get the mac address */ if (IS_MF(sc)) { mac_hi = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); mac_lo = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_lower); } else { mac_hi = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_upper); mac_lo = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_lower); } if ((mac_lo == 0) && (mac_hi == 0)) { *sc->mac_addr_str = 0; BLOGE(sc, "No Ethernet address programmed!\n"); } else { sc->link_params.mac_addr[0] = (uint8_t)(mac_hi >> 8); sc->link_params.mac_addr[1] = (uint8_t)(mac_hi); sc->link_params.mac_addr[2] = (uint8_t)(mac_lo >> 24); sc->link_params.mac_addr[3] = (uint8_t)(mac_lo >> 16); sc->link_params.mac_addr[4] = (uint8_t)(mac_lo >> 8); sc->link_params.mac_addr[5] = (uint8_t)(mac_lo); snprintf(sc->mac_addr_str, sizeof(sc->mac_addr_str), "%02x:%02x:%02x:%02x:%02x:%02x", sc->link_params.mac_addr[0], sc->link_params.mac_addr[1], sc->link_params.mac_addr[2], sc->link_params.mac_addr[3], sc->link_params.mac_addr[4], sc->link_params.mac_addr[5]); BLOGD(sc, DBG_LOAD, "Ethernet address: %s\n", sc->mac_addr_str); } #if 0 if (!IS_MF(sc) && ((sc->port.config & PORT_FEAT_CFG_STORAGE_PERSONALITY_MASK) == PORT_FEAT_CFG_STORAGE_PERSONALITY_FCOE)) { sc->flags |= BXE_NO_ISCSI; } if (!IS_MF(sc) && ((sc->port.config & PORT_FEAT_CFG_STORAGE_PERSONALITY_MASK) == PORT_FEAT_CFG_STORAGE_PERSONALITY_ISCSI)) { sc->flags |= BXE_NO_FCOE_FLAG; } #endif return (0); } static void bxe_get_tunable_params(struct bxe_softc *sc) { /* sanity checks */ if ((bxe_interrupt_mode != INTR_MODE_INTX) && (bxe_interrupt_mode != INTR_MODE_MSI) && (bxe_interrupt_mode != INTR_MODE_MSIX)) { BLOGW(sc, "invalid interrupt_mode value (%d)\n", bxe_interrupt_mode); bxe_interrupt_mode = INTR_MODE_MSIX; } if ((bxe_queue_count < 0) || (bxe_queue_count > MAX_RSS_CHAINS)) { BLOGW(sc, "invalid queue_count value (%d)\n", bxe_queue_count); bxe_queue_count = 0; } if ((bxe_max_rx_bufs < 1) || (bxe_max_rx_bufs > RX_BD_USABLE)) { if (bxe_max_rx_bufs == 0) { bxe_max_rx_bufs = RX_BD_USABLE; } else { BLOGW(sc, "invalid max_rx_bufs (%d)\n", bxe_max_rx_bufs); bxe_max_rx_bufs = 2048; } } if ((bxe_hc_rx_ticks < 1) || (bxe_hc_rx_ticks > 100)) { BLOGW(sc, "invalid hc_rx_ticks (%d)\n", bxe_hc_rx_ticks); bxe_hc_rx_ticks = 25; } if ((bxe_hc_tx_ticks < 1) || (bxe_hc_tx_ticks > 100)) { BLOGW(sc, "invalid hc_tx_ticks (%d)\n", bxe_hc_tx_ticks); bxe_hc_tx_ticks = 50; } if (bxe_max_aggregation_size == 0) { bxe_max_aggregation_size = TPA_AGG_SIZE; } if (bxe_max_aggregation_size > 0xffff) { BLOGW(sc, "invalid max_aggregation_size (%d)\n", bxe_max_aggregation_size); bxe_max_aggregation_size = TPA_AGG_SIZE; } if ((bxe_mrrs < -1) || (bxe_mrrs > 3)) { BLOGW(sc, "invalid mrrs (%d)\n", bxe_mrrs); bxe_mrrs = -1; } if ((bxe_autogreeen < 0) || (bxe_autogreeen > 2)) { BLOGW(sc, "invalid autogreeen (%d)\n", bxe_autogreeen); bxe_autogreeen = 0; } if ((bxe_udp_rss < 0) || (bxe_udp_rss > 1)) { BLOGW(sc, "invalid udp_rss (%d)\n", bxe_udp_rss); bxe_udp_rss = 0; } /* pull in user settings */ sc->interrupt_mode = bxe_interrupt_mode; sc->max_rx_bufs = bxe_max_rx_bufs; sc->hc_rx_ticks = bxe_hc_rx_ticks; sc->hc_tx_ticks = bxe_hc_tx_ticks; sc->max_aggregation_size = bxe_max_aggregation_size; sc->mrrs = bxe_mrrs; sc->autogreeen = bxe_autogreeen; sc->udp_rss = bxe_udp_rss; if (bxe_interrupt_mode == INTR_MODE_INTX) { sc->num_queues = 1; } else { /* INTR_MODE_MSI or INTR_MODE_MSIX */ sc->num_queues = min((bxe_queue_count ? bxe_queue_count : mp_ncpus), MAX_RSS_CHAINS); if (sc->num_queues > mp_ncpus) { sc->num_queues = mp_ncpus; } } BLOGD(sc, DBG_LOAD, "User Config: " "debug=0x%lx " "interrupt_mode=%d " "queue_count=%d " "hc_rx_ticks=%d " "hc_tx_ticks=%d " "rx_budget=%d " "max_aggregation_size=%d " "mrrs=%d " "autogreeen=%d " "udp_rss=%d\n", bxe_debug, sc->interrupt_mode, sc->num_queues, sc->hc_rx_ticks, sc->hc_tx_ticks, bxe_rx_budget, sc->max_aggregation_size, sc->mrrs, sc->autogreeen, sc->udp_rss); } static void bxe_media_detect(struct bxe_softc *sc) { uint32_t phy_idx = bxe_get_cur_phy_idx(sc); switch (sc->link_params.phy[phy_idx].media_type) { case ELINK_ETH_PHY_SFPP_10G_FIBER: case ELINK_ETH_PHY_XFP_FIBER: BLOGI(sc, "Found 10Gb Fiber media.\n"); sc->media = IFM_10G_SR; break; case ELINK_ETH_PHY_SFP_1G_FIBER: BLOGI(sc, "Found 1Gb Fiber media.\n"); sc->media = IFM_1000_SX; break; case ELINK_ETH_PHY_KR: case ELINK_ETH_PHY_CX4: BLOGI(sc, "Found 10GBase-CX4 media.\n"); sc->media = IFM_10G_CX4; break; case ELINK_ETH_PHY_DA_TWINAX: BLOGI(sc, "Found 10Gb Twinax media.\n"); sc->media = IFM_10G_TWINAX; break; case ELINK_ETH_PHY_BASE_T: if (sc->link_params.speed_cap_mask[0] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { BLOGI(sc, "Found 10GBase-T media.\n"); sc->media = IFM_10G_T; } else { BLOGI(sc, "Found 1000Base-T media.\n"); sc->media = IFM_1000_T; } break; case ELINK_ETH_PHY_NOT_PRESENT: BLOGI(sc, "Media not present.\n"); sc->media = 0; break; case ELINK_ETH_PHY_UNSPECIFIED: default: BLOGI(sc, "Unknown media!\n"); sc->media = 0; break; } } #define GET_FIELD(value, fname) \ (((value) & (fname##_MASK)) >> (fname##_SHIFT)) #define IGU_FID(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID) #define IGU_VEC(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR) static int bxe_get_igu_cam_info(struct bxe_softc *sc) { int pfid = SC_FUNC(sc); int igu_sb_id; uint32_t val; uint8_t fid, igu_sb_cnt = 0; sc->igu_base_sb = 0xff; if (CHIP_INT_MODE_IS_BC(sc)) { int vn = SC_VN(sc); igu_sb_cnt = sc->igu_sb_cnt; sc->igu_base_sb = ((CHIP_IS_MODE_4_PORT(sc) ? pfid : vn) * FP_SB_MAX_E1x); sc->igu_dsb_id = (E1HVN_MAX * FP_SB_MAX_E1x + (CHIP_IS_MODE_4_PORT(sc) ? pfid : vn)); return (0); } /* IGU in normal mode - read CAM */ for (igu_sb_id = 0; igu_sb_id < IGU_REG_MAPPING_MEMORY_SIZE; igu_sb_id++) { val = REG_RD(sc, IGU_REG_MAPPING_MEMORY + igu_sb_id * 4); if (!(val & IGU_REG_MAPPING_MEMORY_VALID)) { continue; } fid = IGU_FID(val); if ((fid & IGU_FID_ENCODE_IS_PF)) { if ((fid & IGU_FID_PF_NUM_MASK) != pfid) { continue; } if (IGU_VEC(val) == 0) { /* default status block */ sc->igu_dsb_id = igu_sb_id; } else { if (sc->igu_base_sb == 0xff) { sc->igu_base_sb = igu_sb_id; } igu_sb_cnt++; } } } /* * Due to new PF resource allocation by MFW T7.4 and above, it's optional * that number of CAM entries will not be equal to the value advertised in * PCI. Driver should use the minimal value of both as the actual status * block count */ sc->igu_sb_cnt = min(sc->igu_sb_cnt, igu_sb_cnt); if (igu_sb_cnt == 0) { BLOGE(sc, "CAM configuration error\n"); return (-1); } return (0); } /* * Gather various information from the device config space, the device itself, * shmem, and the user input. */ static int bxe_get_device_info(struct bxe_softc *sc) { uint32_t val; int rc; /* Get the data for the device */ sc->devinfo.vendor_id = pci_get_vendor(sc->dev); sc->devinfo.device_id = pci_get_device(sc->dev); sc->devinfo.subvendor_id = pci_get_subvendor(sc->dev); sc->devinfo.subdevice_id = pci_get_subdevice(sc->dev); /* get the chip revision (chip metal comes from pci config space) */ sc->devinfo.chip_id = sc->link_params.chip_id = (((REG_RD(sc, MISC_REG_CHIP_NUM) & 0xffff) << 16) | ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12) | (((REG_RD(sc, PCICFG_OFFSET + PCI_ID_VAL3) >> 24) & 0xf) << 4) | ((REG_RD(sc, MISC_REG_BOND_ID) & 0xf) << 0)); /* force 57811 according to MISC register */ if (REG_RD(sc, MISC_REG_CHIP_TYPE) & MISC_REG_CHIP_TYPE_57811_MASK) { if (CHIP_IS_57810(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811 << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } else if (CHIP_IS_57810_MF(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811_MF << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } sc->devinfo.chip_id |= 0x1; } BLOGD(sc, DBG_LOAD, "chip_id=0x%08x (num=0x%04x rev=0x%01x metal=0x%02x bond=0x%01x)\n", sc->devinfo.chip_id, ((sc->devinfo.chip_id >> 16) & 0xffff), ((sc->devinfo.chip_id >> 12) & 0xf), ((sc->devinfo.chip_id >> 4) & 0xff), ((sc->devinfo.chip_id >> 0) & 0xf)); val = (REG_RD(sc, 0x2874) & 0x55); if ((sc->devinfo.chip_id & 0x1) || (CHIP_IS_E1(sc) && val) || (CHIP_IS_E1H(sc) && (val == 0x55))) { sc->flags |= BXE_ONE_PORT_FLAG; BLOGD(sc, DBG_LOAD, "single port device\n"); } /* set the doorbell size */ sc->doorbell_size = (1 << BXE_DB_SHIFT); /* determine whether the device is in 2 port or 4 port mode */ sc->devinfo.chip_port_mode = CHIP_PORT_MODE_NONE; /* E1 & E1h*/ if (CHIP_IS_E2E3(sc)) { /* * Read port4mode_en_ovwr[0]: * If 1, four port mode is in port4mode_en_ovwr[1]. * If 0, four port mode is in port4mode_en[0]. */ val = REG_RD(sc, MISC_REG_PORT4MODE_EN_OVWR); if (val & 1) { val = ((val >> 1) & 1); } else { val = REG_RD(sc, MISC_REG_PORT4MODE_EN); } sc->devinfo.chip_port_mode = (val) ? CHIP_4_PORT_MODE : CHIP_2_PORT_MODE; BLOGD(sc, DBG_LOAD, "Port mode = %s\n", (val) ? "4" : "2"); } /* get the function and path info for the device */ bxe_get_function_num(sc); /* get the shared memory base address */ sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); sc->devinfo.shmem2_base = REG_RD(sc, (SC_PATH(sc) ? MISC_REG_GENERIC_CR_1 : MISC_REG_GENERIC_CR_0)); BLOGD(sc, DBG_LOAD, "shmem_base=0x%08x, shmem2_base=0x%08x\n", sc->devinfo.shmem_base, sc->devinfo.shmem2_base); if (!sc->devinfo.shmem_base) { /* this should ONLY prevent upcoming shmem reads */ BLOGI(sc, "MCP not active\n"); sc->flags |= BXE_NO_MCP_FLAG; return (0); } /* make sure the shared memory contents are valid */ val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if ((val & (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) != (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) { BLOGE(sc, "Invalid SHMEM validity signature: 0x%08x\n", val); return (0); } BLOGD(sc, DBG_LOAD, "Valid SHMEM validity signature: 0x%08x\n", val); /* get the bootcode version */ sc->devinfo.bc_ver = SHMEM_RD(sc, dev_info.bc_rev); snprintf(sc->devinfo.bc_ver_str, sizeof(sc->devinfo.bc_ver_str), "%d.%d.%d", ((sc->devinfo.bc_ver >> 24) & 0xff), ((sc->devinfo.bc_ver >> 16) & 0xff), ((sc->devinfo.bc_ver >> 8) & 0xff)); BLOGD(sc, DBG_LOAD, "Bootcode version: %s\n", sc->devinfo.bc_ver_str); /* get the bootcode shmem address */ sc->devinfo.mf_cfg_base = bxe_get_shmem_mf_cfg_base(sc); BLOGD(sc, DBG_LOAD, "mf_cfg_base=0x08%x \n", sc->devinfo.mf_cfg_base); /* clean indirect addresses as they're not used */ pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); if (IS_PF(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F0, 0); if (CHIP_IS_E1x(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F1, 0); } /* * Enable internal target-read (in case we are probed after PF * FLR). Must be done prior to any BAR read access. Only for * 57712 and up */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); } } /* get the nvram size */ val = REG_RD(sc, MCP_REG_MCPR_NVM_CFG4); sc->devinfo.flash_size = (NVRAM_1MB_SIZE << (val & MCPR_NVM_CFG4_FLASH_SIZE)); BLOGD(sc, DBG_LOAD, "nvram flash size: %d\n", sc->devinfo.flash_size); /* get PCI capabilites */ bxe_probe_pci_caps(sc); bxe_set_power_state(sc, PCI_PM_D0); /* get various configuration parameters from shmem */ bxe_get_shmem_info(sc); if (sc->devinfo.pcie_msix_cap_reg != 0) { val = pci_read_config(sc->dev, (sc->devinfo.pcie_msix_cap_reg + PCIR_MSIX_CTRL), 2); sc->igu_sb_cnt = (val & PCIM_MSIXCTRL_TABLE_SIZE); } else { sc->igu_sb_cnt = 1; } sc->igu_base_addr = BAR_IGU_INTMEM; /* initialize IGU parameters */ if (CHIP_IS_E1x(sc)) { sc->devinfo.int_block = INT_BLOCK_HC; sc->igu_dsb_id = DEF_SB_IGU_ID; sc->igu_base_sb = 0; } else { sc->devinfo.int_block = INT_BLOCK_IGU; /* do not allow device reset during IGU info preocessing */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { int tout = 5000; BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode\n"); val &= ~(IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, val); REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x7f); while (tout && REG_RD(sc, IGU_REG_RESET_MEMORIES)) { tout--; DELAY(1000); } if (REG_RD(sc, IGU_REG_RESET_MEMORIES)) { BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode failed!!!\n"); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); return (-1); } } if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { BLOGD(sc, DBG_LOAD, "IGU Backward Compatible Mode\n"); sc->devinfo.int_block |= INT_BLOCK_MODE_BW_COMP; } else { BLOGD(sc, DBG_LOAD, "IGU Normal Mode\n"); } rc = bxe_get_igu_cam_info(sc); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); if (rc) { return (rc); } } /* * Get base FW non-default (fast path) status block ID. This value is * used to initialize the fw_sb_id saved on the fp/queue structure to * determine the id used by the FW. */ if (CHIP_IS_E1x(sc)) { sc->base_fw_ndsb = ((SC_PORT(sc) * FP_SB_MAX_E1x) + SC_L_ID(sc)); } else { /* * 57712+ - We currently use one FW SB per IGU SB (Rx and Tx of * the same queue are indicated on the same IGU SB). So we prefer * FW and IGU SBs to be the same value. */ sc->base_fw_ndsb = sc->igu_base_sb; } BLOGD(sc, DBG_LOAD, "igu_dsb_id=%d igu_base_sb=%d igu_sb_cnt=%d base_fw_ndsb=%d\n", sc->igu_dsb_id, sc->igu_base_sb, sc->igu_sb_cnt, sc->base_fw_ndsb); elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_settings_supported(struct bxe_softc *sc, uint32_t switch_cfg) { uint32_t cfg_size = 0; uint32_t idx; uint8_t port = SC_PORT(sc); /* aggregation of supported attributes of all external phys */ sc->port.supported[0] = 0; sc->port.supported[1] = 0; switch (sc->link_params.num_phys) { case 1: sc->port.supported[0] = sc->link_params.phy[ELINK_INT_PHY].supported; cfg_size = 1; break; case 2: sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; cfg_size = 1; break; case 3: if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } else { sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } cfg_size = 2; break; } if (!(sc->port.supported[0] || sc->port.supported[1])) { BLOGE(sc, "Invalid phy config in NVRAM (PHY1=0x%08x PHY2=0x%08x)\n", SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config), SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config2)); return; } if (CHIP_IS_E3(sc)) sc->port.phy_addr = REG_RD(sc, MISC_REG_WC0_CTRL_PHY_ADDR); else { switch (switch_cfg) { case ELINK_SWITCH_CFG_1G: sc->port.phy_addr = REG_RD(sc, NIG_REG_SERDES0_CTRL_PHY_ADDR + port*0x10); break; case ELINK_SWITCH_CFG_10G: sc->port.phy_addr = REG_RD(sc, NIG_REG_XGXS0_CTRL_PHY_ADDR + port*0x18); break; default: BLOGE(sc, "Invalid switch config in link_config=0x%08x\n", sc->port.link_config[0]); return; } } BLOGD(sc, DBG_LOAD, "PHY addr 0x%08x\n", sc->port.phy_addr); /* mask what we support according to speed_cap_mask per configuration */ for (idx = 0; idx < cfg_size; idx++) { if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_1000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_2500baseX_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_20000baseKR2_Full; } } BLOGD(sc, DBG_LOAD, "PHY supported 0=0x%08x 1=0x%08x\n", sc->port.supported[0], sc->port.supported[1]); } static void bxe_link_settings_requested(struct bxe_softc *sc) { uint32_t link_config; uint32_t idx; uint32_t cfg_size = 0; sc->port.advertising[0] = 0; sc->port.advertising[1] = 0; switch (sc->link_params.num_phys) { case 1: case 2: cfg_size = 1; break; case 3: cfg_size = 2; break; } for (idx = 0; idx < cfg_size; idx++) { sc->link_params.req_duplex[idx] = DUPLEX_FULL; link_config = sc->port.link_config[idx]; switch (link_config & PORT_FEATURE_LINK_SPEED_MASK) { case PORT_FEATURE_LINK_SPEED_AUTO: if (sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] |= sc->port.supported[idx]; if (sc->link_params.phy[ELINK_EXT_PHY1].type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) sc->port.advertising[idx] |= (ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full); } else { /* force 10G, no AN */ sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); continue; } break; case PORT_FEATURE_LINK_SPEED_10M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Half | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Half | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_1G: if (sc->port.supported[idx] & ELINK_SUPPORTED_1000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_1000; sc->port.advertising[idx] |= (ADVERTISED_1000baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_2_5G: if (sc->port.supported[idx] & ELINK_SUPPORTED_2500baseX_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_2500; sc->port.advertising[idx] |= (ADVERTISED_2500baseX_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10G_CX4: if (sc->port.supported[idx] & ELINK_SUPPORTED_10000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_20G: sc->link_params.req_line_speed[idx] = ELINK_SPEED_20000; break; default: BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] = sc->port.supported[idx]; break; } sc->link_params.req_flow_ctrl[idx] = (link_config & PORT_FEATURE_FLOW_CONTROL_MASK); if (sc->link_params.req_flow_ctrl[idx] == ELINK_FLOW_CTRL_AUTO) { if (!(sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg)) { sc->link_params.req_flow_ctrl[idx] = ELINK_FLOW_CTRL_NONE; } else { bxe_set_requested_fc(sc); } } BLOGD(sc, DBG_LOAD, "req_line_speed=%d req_duplex=%d " "req_flow_ctrl=0x%x advertising=0x%x\n", sc->link_params.req_line_speed[idx], sc->link_params.req_duplex[idx], sc->link_params.req_flow_ctrl[idx], sc->port.advertising[idx]); } } static void bxe_get_phy_info(struct bxe_softc *sc) { uint8_t port = SC_PORT(sc); uint32_t config = sc->port.config; uint32_t eee_mode; /* shmem data already read in bxe_get_shmem_info() */ BLOGD(sc, DBG_LOAD, "lane_config=0x%08x speed_cap_mask0=0x%08x " "link_config0=0x%08x\n", sc->link_params.lane_config, sc->link_params.speed_cap_mask[0], sc->port.link_config[0]); bxe_link_settings_supported(sc, sc->link_params.switch_cfg); bxe_link_settings_requested(sc); if (sc->autogreeen == AUTO_GREEN_FORCE_ON) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (sc->autogreeen == AUTO_GREEN_FORCE_OFF) { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (config & PORT_FEAT_CFG_AUTOGREEEN_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } /* configure link feature according to nvram value */ eee_mode = (((SHMEM_RD(sc, dev_info.port_feature_config[port].eee_power_mode)) & PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >> PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT); if (eee_mode != PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED) { sc->link_params.eee_mode = (ELINK_EEE_MODE_ADV_LPI | ELINK_EEE_MODE_ENABLE_LPI | ELINK_EEE_MODE_OUTPUT_TIME); } else { sc->link_params.eee_mode = 0; } /* get the media type */ bxe_media_detect(sc); } static void bxe_get_params(struct bxe_softc *sc) { /* get user tunable params */ bxe_get_tunable_params(sc); /* select the RX and TX ring sizes */ sc->tx_ring_size = TX_BD_USABLE; sc->rx_ring_size = RX_BD_USABLE; /* XXX disable WoL */ sc->wol = 0; } static void bxe_set_modes_bitmap(struct bxe_softc *sc) { uint32_t flags = 0; if (CHIP_REV_IS_FPGA(sc)) { SET_FLAGS(flags, MODE_FPGA); } else if (CHIP_REV_IS_EMUL(sc)) { SET_FLAGS(flags, MODE_EMUL); } else { SET_FLAGS(flags, MODE_ASIC); } if (CHIP_IS_MODE_4_PORT(sc)) { SET_FLAGS(flags, MODE_PORT4); } else { SET_FLAGS(flags, MODE_PORT2); } if (CHIP_IS_E2(sc)) { SET_FLAGS(flags, MODE_E2); } else if (CHIP_IS_E3(sc)) { SET_FLAGS(flags, MODE_E3); if (CHIP_REV(sc) == CHIP_REV_Ax) { SET_FLAGS(flags, MODE_E3_A0); } else /*if (CHIP_REV(sc) == CHIP_REV_Bx)*/ { SET_FLAGS(flags, MODE_E3_B0 | MODE_COS3); } } if (IS_MF(sc)) { SET_FLAGS(flags, MODE_MF); switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: SET_FLAGS(flags, MODE_MF_SD); break; case MULTI_FUNCTION_SI: SET_FLAGS(flags, MODE_MF_SI); break; case MULTI_FUNCTION_AFEX: SET_FLAGS(flags, MODE_MF_AFEX); break; } } else { SET_FLAGS(flags, MODE_SF); } #if defined(__LITTLE_ENDIAN) SET_FLAGS(flags, MODE_LITTLE_ENDIAN); #else /* __BIG_ENDIAN */ SET_FLAGS(flags, MODE_BIG_ENDIAN); #endif INIT_MODE_FLAGS(sc) = flags; } static int bxe_alloc_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; bus_addr_t busaddr; int max_agg_queues; int max_segments; bus_size_t max_size; bus_size_t max_seg_size; char buf[32]; int rc; int i, j; /* XXX zero out all vars here and call bxe_alloc_hsi_mem on error */ /* allocate the parent bus DMA tag */ rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BUS_SPACE_MAXSIZE_32BIT, /* max map size */ BUS_SPACE_UNRESTRICTED, /* num discontinuous */ BUS_SPACE_MAXSIZE_32BIT, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &sc->parent_dma_tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to alloc parent DMA tag (%d)!\n", rc); return (1); } /************************/ /* DEFAULT STATUS BLOCK */ /************************/ if (bxe_dma_alloc(sc, sizeof(struct host_sp_status_block), &sc->def_sb_dma, "default status block") != 0) { /* XXX */ bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->def_sb = (struct host_sp_status_block *)sc->def_sb_dma.vaddr; /***************/ /* EVENT QUEUE */ /***************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->eq_dma, "event queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->eq = (union event_ring_elem * )sc->eq_dma.vaddr; /*************/ /* SLOW PATH */ /*************/ if (bxe_dma_alloc(sc, sizeof(struct bxe_slowpath), &sc->sp_dma, "slow path") != 0) { /* XXX */ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->sp = (struct bxe_slowpath *)sc->sp_dma.vaddr; /*******************/ /* SLOW PATH QUEUE */ /*******************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->spq_dma, "slow path queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->spq = (struct eth_spe *)sc->spq_dma.vaddr; /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ if (bxe_dma_alloc(sc, FW_BUF_SIZE, &sc->gz_buf_dma, "fw decompression buffer") != 0) { /* XXX */ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->gz_buf = (void *)sc->gz_buf_dma.vaddr; if ((sc->gz_strm = malloc(sizeof(*sc->gz_strm), M_DEVBUF, M_NOWAIT)) == NULL) { /* XXX */ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } /*************/ /* FASTPATHS */ /*************/ /* allocate DMA memory for each fastpath structure */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->sc = sc; fp->index = i; /*******************/ /* FP STATUS BLOCK */ /*******************/ snprintf(buf, sizeof(buf), "fp %d status block", i); if (bxe_dma_alloc(sc, sizeof(union bxe_host_hc_status_block), &fp->sb_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { if (CHIP_IS_E2E3(sc)) { fp->status_block.e2_sb = (struct host_hc_status_block_e2 *)fp->sb_dma.vaddr; } else { fp->status_block.e1x_sb = (struct host_hc_status_block_e1x *)fp->sb_dma.vaddr; } } /******************/ /* FP TX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d tx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * TX_BD_NUM_PAGES), &fp->tx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->tx_chain = (union eth_tx_bd_types *)fp->tx_dma.vaddr; } /* link together the tx bd chain pages */ for (j = 1; j <= TX_BD_NUM_PAGES; j++) { /* index into the tx bd chain array to last entry per page */ struct eth_tx_next_bd *tx_next_bd = &fp->tx_chain[TX_BD_TOTAL_PER_PAGE * j - 1].next_bd; /* point to the next page and wrap from last page */ busaddr = (fp->tx_dma.paddr + (BCM_PAGE_SIZE * (j % TX_BD_NUM_PAGES))); tx_next_bd->addr_hi = htole32(U64_HI(busaddr)); tx_next_bd->addr_lo = htole32(U64_LO(busaddr)); } /******************/ /* FP RX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d rx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_BD_NUM_PAGES), &fp->rx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_chain = (struct eth_rx_bd *)fp->rx_dma.vaddr; } /* link together the rx bd chain pages */ for (j = 1; j <= RX_BD_NUM_PAGES; j++) { /* index into the rx bd chain array to last entry per page */ struct eth_rx_bd *rx_bd = &fp->rx_chain[RX_BD_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_dma.paddr + (BCM_PAGE_SIZE * (j % RX_BD_NUM_PAGES))); rx_bd->addr_hi = htole32(U64_HI(busaddr)); rx_bd->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX RCQ CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d rcq chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RCQ_NUM_PAGES), &fp->rcq_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rcq_chain = (union eth_rx_cqe *)fp->rcq_dma.vaddr; } /* link together the rcq chain pages */ for (j = 1; j <= RCQ_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_cqe_next_page *rx_cqe_next = (struct eth_rx_cqe_next_page *) &fp->rcq_chain[RCQ_TOTAL_PER_PAGE * j - 1]; /* point to the next page and wrap from last page */ busaddr = (fp->rcq_dma.paddr + (BCM_PAGE_SIZE * (j % RCQ_NUM_PAGES))); rx_cqe_next->addr_hi = htole32(U64_HI(busaddr)); rx_cqe_next->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX SGE CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d sge chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_SGE_NUM_PAGES), &fp->rx_sge_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_sge_chain = (struct eth_rx_sge *)fp->rx_sge_dma.vaddr; } /* link together the sge chain pages */ for (j = 1; j <= RX_SGE_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_sge *rx_sge = &fp->rx_sge_chain[RX_SGE_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_sge_dma.paddr + (BCM_PAGE_SIZE * (j % RX_SGE_NUM_PAGES))); rx_sge->addr_hi = htole32(U64_HI(busaddr)); rx_sge->addr_lo = htole32(U64_LO(busaddr)); } /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ /* set required sizes before mapping to conserve resources */ if (sc->ifnet->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) { max_size = BXE_TSO_MAX_SIZE; max_segments = BXE_TSO_MAX_SEGMENTS; max_seg_size = BXE_TSO_MAX_SEG_SIZE; } else { max_size = (MCLBYTES * BXE_MAX_SEGMENTS); max_segments = BXE_MAX_SEGMENTS; max_seg_size = MCLBYTES; } /* create a dma tag for the tx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ max_size, /* max map size */ max_segments, /* num discontinuous */ max_seg_size, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->tx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d tx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the tx mbuf clusters */ for (j = 0; j < TX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->tx_mbuf_tag, BUS_DMA_NOWAIT, &fp->tx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d tx mbuf %d' (%d)\n", i, j, rc); return (1); } } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ /* create a dma tag for the rx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ MJUM9BYTES, /* max map size */ 1, /* num discontinuous */ MJUM9BYTES, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the rx mbuf clusters */ for (j = 0; j < RX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ /* create a dma tag for the rx sge mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BCM_PAGE_SIZE, /* max map size */ 1, /* num discontinuous */ BCM_PAGE_SIZE, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_sge_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx sge mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for the rx sge mbuf clusters */ for (j = 0; j < RX_SGE_TOTAL; j++) { if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx sge mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx sge mbuf cluster */ if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx sge mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ /* create dma maps for the rx tpa mbuf clusters */ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info[j].bd.m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx tpa mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx tpa mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx tpa mbuf' (%d)\n", i, rc); return (1); } bxe_init_sge_ring_bit_mask(fp); } return (0); } static void bxe_free_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; int max_agg_queues; int i, j; if (sc->parent_dma_tag == NULL) { return; /* assume nothing was allocated */ } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; /*******************/ /* FP STATUS BLOCK */ /*******************/ bxe_dma_free(sc, &fp->sb_dma); memset(&fp->status_block, 0, sizeof(fp->status_block)); /******************/ /* FP TX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->tx_dma); fp->tx_chain = NULL; /******************/ /* FP RX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->rx_dma); fp->rx_chain = NULL; /*******************/ /* FP RX RCQ CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rcq_dma); fp->rcq_chain = NULL; /*******************/ /* FP RX SGE CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rx_sge_dma); fp->rx_sge_chain = NULL; /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ if (fp->tx_mbuf_tag != NULL) { for (j = 0; j < TX_BD_TOTAL; j++) { if (fp->tx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); } } bus_dma_tag_destroy(fp->tx_mbuf_tag); fp->tx_mbuf_tag = NULL; } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ if (fp->rx_mbuf_tag != NULL) { for (j = 0; j < RX_BD_TOTAL; j++) { if (fp->rx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); } } if (fp->rx_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (fp->rx_tpa_info[j].bd.m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); } } if (fp->rx_tpa_info_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_mbuf_tag); fp->rx_mbuf_tag = NULL; } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ if (fp->rx_sge_mbuf_tag != NULL) { for (j = 0; j < RX_SGE_TOTAL; j++) { if (fp->rx_sge_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); } } if (fp->rx_sge_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_sge_mbuf_tag); fp->rx_sge_mbuf_tag = NULL; } } /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; free(sc->gz_strm, M_DEVBUF); sc->gz_strm = NULL; /*******************/ /* SLOW PATH QUEUE */ /*******************/ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; /*************/ /* SLOW PATH */ /*************/ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; /***************/ /* EVENT QUEUE */ /***************/ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; /************************/ /* DEFAULT STATUS BLOCK */ /************************/ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); sc->parent_dma_tag = NULL; } /* * Previous driver DMAE transaction may have occurred when pre-boot stage * ended and boot began. This would invalidate the addresses of the * transaction, resulting in was-error bit set in the PCI causing all * hw-to-host PCIe transactions to timeout. If this happened we want to clear * the interrupt which detected this from the pglueb and the was-done bit */ static void bxe_prev_interrupted_dmae(struct bxe_softc *sc) { uint32_t val; if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { BLOGD(sc, DBG_LOAD, "Clearing 'was-error' bit that was set in pglueb"); REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, 1 << SC_FUNC(sc)); } } } static int bxe_prev_mcp_done(struct bxe_softc *sc) { uint32_t rc = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET); if (!rc) { BLOGE(sc, "MCP response failure, aborting\n"); return (-1); } return (0); } static struct bxe_prev_list_node * bxe_prev_path_get_entry(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; LIST_FOREACH(tmp, &bxe_prev_list, node) { if ((sc->pcie_bus == tmp->bus) && (sc->pcie_device == tmp->slot) && (SC_PATH(sc) == tmp->path)) { return (tmp); } } return (NULL); } static uint8_t bxe_prev_is_path_marked(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; int rc = FALSE; mtx_lock(&bxe_prev_mtx); tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (tmp->aer) { BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was marked by AER\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { rc = TRUE; BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was already cleaned from previous drivers\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } } mtx_unlock(&bxe_prev_mtx); return (rc); } static int bxe_prev_mark_path(struct bxe_softc *sc, uint8_t after_undi) { struct bxe_prev_list_node *tmp; mtx_lock(&bxe_prev_mtx); /* Check whether the entry for this path already exists */ tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (!tmp->aer) { BLOGD(sc, DBG_LOAD, "Re-marking AER in path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { BLOGD(sc, DBG_LOAD, "Removing AER indication from path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); tmp->aer = 0; } mtx_unlock(&bxe_prev_mtx); return (0); } mtx_unlock(&bxe_prev_mtx); /* Create an entry for this path and add it */ tmp = malloc(sizeof(struct bxe_prev_list_node), M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!tmp) { BLOGE(sc, "Failed to allocate 'bxe_prev_list_node'\n"); return (-1); } tmp->bus = sc->pcie_bus; tmp->slot = sc->pcie_device; tmp->path = SC_PATH(sc); tmp->aer = 0; tmp->undi = after_undi ? (1 << SC_PORT(sc)) : 0; mtx_lock(&bxe_prev_mtx); BLOGD(sc, DBG_LOAD, "Marked path %d/%d/%d - finished previous unload\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); LIST_INSERT_HEAD(&bxe_prev_list, tmp, node); mtx_unlock(&bxe_prev_mtx); return (0); } static int bxe_do_flr(struct bxe_softc *sc) { int i; /* only E2 and onwards support FLR */ if (CHIP_IS_E1x(sc)) { BLOGD(sc, DBG_LOAD, "FLR not supported in E1/E1H\n"); return (-1); } /* only bootcode REQ_BC_VER_4_INITIATE_FLR and onwards support flr */ if (sc->devinfo.bc_ver < REQ_BC_VER_4_INITIATE_FLR) { BLOGD(sc, DBG_LOAD, "FLR not supported by BC_VER: 0x%08x\n", sc->devinfo.bc_ver); return (-1); } /* Wait for Transaction Pending bit clean */ for (i = 0; i < 4; i++) { if (i) { DELAY(((1 << (i - 1)) * 100) * 1000); } if (!bxe_is_pcie_pending(sc)) { goto clear; } } BLOGE(sc, "PCIE transaction is not cleared, " "proceeding with reset anyway\n"); clear: BLOGD(sc, DBG_LOAD, "Initiating FLR\n"); bxe_fw_command(sc, DRV_MSG_CODE_INITIATE_FLR, 0); return (0); } struct bxe_mac_vals { uint32_t xmac_addr; uint32_t xmac_val; uint32_t emac_addr; uint32_t emac_val; uint32_t umac_addr; uint32_t umac_val; uint32_t bmac_addr; uint32_t bmac_val[2]; }; static void bxe_prev_unload_close_mac(struct bxe_softc *sc, struct bxe_mac_vals *vals) { uint32_t val, base_addr, offset, mask, reset_reg; uint8_t mac_stopped = FALSE; uint8_t port = SC_PORT(sc); uint32_t wb_data[2]; /* reset addresses as they also mark which values were changed */ vals->bmac_addr = 0; vals->umac_addr = 0; vals->xmac_addr = 0; vals->emac_addr = 0; reset_reg = REG_RD(sc, MISC_REG_RESET_REG_2); if (!CHIP_IS_E3(sc)) { val = REG_RD(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4); mask = MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port; if ((mask & reset_reg) && val) { BLOGD(sc, DBG_LOAD, "Disable BMAC Rx\n"); base_addr = SC_PORT(sc) ? NIG_REG_INGRESS_BMAC1_MEM : NIG_REG_INGRESS_BMAC0_MEM; offset = CHIP_IS_E2(sc) ? BIGMAC2_REGISTER_BMAC_CONTROL : BIGMAC_REGISTER_BMAC_CONTROL; /* * use rd/wr since we cannot use dmae. This is safe * since MCP won't access the bus due to the request * to unload, and no function on the path can be * loaded at this time. */ wb_data[0] = REG_RD(sc, base_addr + offset); wb_data[1] = REG_RD(sc, base_addr + offset + 0x4); vals->bmac_addr = base_addr + offset; vals->bmac_val[0] = wb_data[0]; vals->bmac_val[1] = wb_data[1]; wb_data[0] &= ~ELINK_BMAC_CONTROL_RX_ENABLE; REG_WR(sc, vals->bmac_addr, wb_data[0]); REG_WR(sc, vals->bmac_addr + 0x4, wb_data[1]); } BLOGD(sc, DBG_LOAD, "Disable EMAC Rx\n"); vals->emac_addr = NIG_REG_NIG_EMAC0_EN + SC_PORT(sc)*4; vals->emac_val = REG_RD(sc, vals->emac_addr); REG_WR(sc, vals->emac_addr, 0); mac_stopped = TRUE; } else { if (reset_reg & MISC_REGISTERS_RESET_REG_2_XMAC) { BLOGD(sc, DBG_LOAD, "Disable XMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; val = REG_RD(sc, base_addr + XMAC_REG_PFC_CTRL_HI); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val & ~(1 << 1)); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val | (1 << 1)); vals->xmac_addr = base_addr + XMAC_REG_CTRL; vals->xmac_val = REG_RD(sc, vals->xmac_addr); REG_WR(sc, vals->xmac_addr, 0); mac_stopped = TRUE; } mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port; if (mask & reset_reg) { BLOGD(sc, DBG_LOAD, "Disable UMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_UMAC1 : GRCBASE_UMAC0; vals->umac_addr = base_addr + UMAC_REG_COMMAND_CONFIG; vals->umac_val = REG_RD(sc, vals->umac_addr); REG_WR(sc, vals->umac_addr, 0); mac_stopped = TRUE; } } if (mac_stopped) { DELAY(20000); } } #define BXE_PREV_UNDI_PROD_ADDR(p) (BAR_TSTRORM_INTMEM + 0x1508 + ((p) << 4)) #define BXE_PREV_UNDI_RCQ(val) ((val) & 0xffff) #define BXE_PREV_UNDI_BD(val) ((val) >> 16 & 0xffff) #define BXE_PREV_UNDI_PROD(rcq, bd) ((bd) << 16 | (rcq)) static void bxe_prev_unload_undi_inc(struct bxe_softc *sc, uint8_t port, uint8_t inc) { uint16_t rcq, bd; uint32_t tmp_reg = REG_RD(sc, BXE_PREV_UNDI_PROD_ADDR(port)); rcq = BXE_PREV_UNDI_RCQ(tmp_reg) + inc; bd = BXE_PREV_UNDI_BD(tmp_reg) + inc; tmp_reg = BXE_PREV_UNDI_PROD(rcq, bd); REG_WR(sc, BXE_PREV_UNDI_PROD_ADDR(port), tmp_reg); BLOGD(sc, DBG_LOAD, "UNDI producer [%d] rings bd -> 0x%04x, rcq -> 0x%04x\n", port, bd, rcq); } static int bxe_prev_unload_common(struct bxe_softc *sc) { uint32_t reset_reg, tmp_reg = 0, rc; uint8_t prev_undi = FALSE; struct bxe_mac_vals mac_vals; uint32_t timer_count = 1000; uint32_t prev_brb; /* * It is possible a previous function received 'common' answer, * but hasn't loaded yet, therefore creating a scenario of * multiple functions receiving 'common' on the same path. */ BLOGD(sc, DBG_LOAD, "Common unload Flow\n"); memset(&mac_vals, 0, sizeof(mac_vals)); if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } reset_reg = REG_RD(sc, MISC_REG_RESET_REG_1); /* Reset should be performed after BRB is emptied */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { /* Close the MAC Rx to prevent BRB from filling up */ bxe_prev_unload_close_mac(sc, &mac_vals); /* close LLH filters towards the BRB */ elink_set_rx_filter(&sc->link_params, 0); /* * Check if the UNDI driver was previously loaded. * UNDI driver initializes CID offset for normal bell to 0x7 */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_DORQ) { tmp_reg = REG_RD(sc, DORQ_REG_NORM_CID_OFST); if (tmp_reg == 0x7) { BLOGD(sc, DBG_LOAD, "UNDI previously loaded\n"); prev_undi = TRUE; /* clear the UNDI indication */ REG_WR(sc, DORQ_REG_NORM_CID_OFST, 0); /* clear possible idle check errors */ REG_RD(sc, NIG_REG_NIG_INT_STS_CLR_0); } } /* wait until BRB is empty */ tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); while (timer_count) { prev_brb = tmp_reg; tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); if (!tmp_reg) { break; } BLOGD(sc, DBG_LOAD, "BRB still has 0x%08x\n", tmp_reg); /* reset timer as long as BRB actually gets emptied */ if (prev_brb > tmp_reg) { timer_count = 1000; } else { timer_count--; } /* If UNDI resides in memory, manually increment it */ if (prev_undi) { bxe_prev_unload_undi_inc(sc, SC_PORT(sc), 1); } DELAY(10); } if (!timer_count) { BLOGE(sc, "Failed to empty BRB\n"); } } /* No packets are in the pipeline, path is ready for reset */ bxe_reset_common(sc); if (mac_vals.xmac_addr) { REG_WR(sc, mac_vals.xmac_addr, mac_vals.xmac_val); } if (mac_vals.umac_addr) { REG_WR(sc, mac_vals.umac_addr, mac_vals.umac_val); } if (mac_vals.emac_addr) { REG_WR(sc, mac_vals.emac_addr, mac_vals.emac_val); } if (mac_vals.bmac_addr) { REG_WR(sc, mac_vals.bmac_addr, mac_vals.bmac_val[0]); REG_WR(sc, mac_vals.bmac_addr + 4, mac_vals.bmac_val[1]); } rc = bxe_prev_mark_path(sc, prev_undi); if (rc) { bxe_prev_mcp_done(sc); return (rc); } return (bxe_prev_mcp_done(sc)); } static int bxe_prev_unload_uncommon(struct bxe_softc *sc) { int rc; BLOGD(sc, DBG_LOAD, "Uncommon unload Flow\n"); /* Test if previous unload process was already finished for this path */ if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } BLOGD(sc, DBG_LOAD, "Path is unmarked\n"); /* * If function has FLR capabilities, and existing FW version matches * the one required, then FLR will be sufficient to clean any residue * left by previous driver */ rc = bxe_nic_load_analyze_req(sc, FW_MSG_CODE_DRV_LOAD_FUNCTION); if (!rc) { /* fw version is good */ BLOGD(sc, DBG_LOAD, "FW version matches our own, attempting FLR\n"); rc = bxe_do_flr(sc); } if (!rc) { /* FLR was performed */ BLOGD(sc, DBG_LOAD, "FLR successful\n"); return (0); } BLOGD(sc, DBG_LOAD, "Could not FLR\n"); /* Close the MCP request, return failure*/ rc = bxe_prev_mcp_done(sc); if (!rc) { rc = BXE_PREV_WAIT_NEEDED; } return (rc); } static int bxe_prev_unload(struct bxe_softc *sc) { int time_counter = 10; uint32_t fw, hw_lock_reg, hw_lock_val; uint32_t rc = 0; /* * Clear HW from errors which may have resulted from an interrupted * DMAE transaction. */ bxe_prev_interrupted_dmae(sc); /* Release previously held locks */ hw_lock_reg = (SC_FUNC(sc) <= 5) ? (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8) : (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8); hw_lock_val = (REG_RD(sc, hw_lock_reg)); if (hw_lock_val) { if (hw_lock_val & HW_LOCK_RESOURCE_NVRAM) { BLOGD(sc, DBG_LOAD, "Releasing previously held NVRAM lock\n"); REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << SC_PORT(sc))); } BLOGD(sc, DBG_LOAD, "Releasing previously held HW lock\n"); REG_WR(sc, hw_lock_reg, 0xffffffff); } else { BLOGD(sc, DBG_LOAD, "No need to release HW/NVRAM locks\n"); } if (MCPR_ACCESS_LOCK_LOCK & REG_RD(sc, MCP_REG_MCPR_ACCESS_LOCK)) { BLOGD(sc, DBG_LOAD, "Releasing previously held ALR\n"); REG_WR(sc, MCP_REG_MCPR_ACCESS_LOCK, 0); } do { /* Lock MCP using an unload request */ fw = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0); if (!fw) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; break; } if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON) { rc = bxe_prev_unload_common(sc); break; } /* non-common reply from MCP night require looping */ rc = bxe_prev_unload_uncommon(sc); if (rc != BXE_PREV_WAIT_NEEDED) { break; } DELAY(20000); } while (--time_counter); if (!time_counter || rc) { BLOGE(sc, "Failed to unload previous driver!\n"); rc = -1; } return (rc); } void bxe_dcbx_set_state(struct bxe_softc *sc, uint8_t dcb_on, uint32_t dcbx_enabled) { if (!CHIP_IS_E1x(sc)) { sc->dcb_state = dcb_on; sc->dcbx_enabled = dcbx_enabled; } else { sc->dcb_state = FALSE; sc->dcbx_enabled = BXE_DCBX_ENABLED_INVALID; } BLOGD(sc, DBG_LOAD, "DCB state [%s:%s]\n", dcb_on ? "ON" : "OFF", (dcbx_enabled == BXE_DCBX_ENABLED_OFF) ? "user-mode" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_OFF) ? "on-chip static" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_ON) ? "on-chip with negotiation" : "invalid"); } /* must be called after sriov-enable */ static int bxe_set_qm_cid_count(struct bxe_softc *sc) { int cid_count = BXE_L2_MAX_CID(sc); if (IS_SRIOV(sc)) { cid_count += BXE_VF_CIDS; } if (CNIC_SUPPORT(sc)) { cid_count += CNIC_CID_MAX; } return (roundup(cid_count, QM_CID_ROUND)); } static void bxe_init_multi_cos(struct bxe_softc *sc) { int pri, cos; uint32_t pri_map = 0; /* XXX change to user config */ for (pri = 0; pri < BXE_MAX_PRIORITY; pri++) { cos = ((pri_map & (0xf << (pri * 4))) >> (pri * 4)); if (cos < sc->max_cos) { sc->prio_to_cos[pri] = cos; } else { BLOGW(sc, "Invalid COS %d for priority %d " "(max COS is %d), setting to 0\n", cos, pri, (sc->max_cos - 1)); sc->prio_to_cos[pri] = 0; } } } static int bxe_sysctl_state(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc; int error, result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) { return (error); } if (result == 1) { sc = (struct bxe_softc *)arg1; BLOGI(sc, "... dumping driver state ...\n"); /* XXX */ } return (error); } static int bxe_sysctl_eth_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats = (uint32_t *)&sc->eth_stats; uint32_t *offset; uint64_t value = 0; int index = (int)arg2; if (index >= BXE_NUM_ETH_STATS) { BLOGE(sc, "bxe_eth_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_stats_arr[index].offset); switch (bxe_eth_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_stats size (index=%d size=%d)\n", index, bxe_eth_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static int bxe_sysctl_eth_q_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats; uint32_t *offset; uint64_t value = 0; uint32_t q_stat = (uint32_t)arg2; uint32_t fp_index = ((q_stat >> 16) & 0xffff); uint32_t index = (q_stat & 0xffff); eth_stats = (uint32_t *)&sc->fp[fp_index].eth_q_stats; if (index >= BXE_NUM_ETH_Q_STATS) { BLOGE(sc, "bxe_eth_q_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_q_stats_arr[index].offset); switch (bxe_eth_q_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_q_stats size (index=%d size=%d)\n", index, bxe_eth_q_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static void bxe_add_sysctls(struct bxe_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *queue_top, *queue; struct sysctl_oid_list *queue_top_children, *queue_children; char queue_num_buf[32]; uint32_t q_stat; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "version", CTLFLAG_RD, BXE_DRIVER_VERSION, 0, "version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bc_version", - CTLFLAG_RD, &sc->devinfo.bc_ver_str, 0, + CTLFLAG_RD, sc->devinfo.bc_ver_str, 0, "bootcode version"); snprintf(sc->fw_ver_str, sizeof(sc->fw_ver_str), "%d.%d.%d.%d", BCM_5710_FW_MAJOR_VERSION, BCM_5710_FW_MINOR_VERSION, BCM_5710_FW_REVISION_VERSION, BCM_5710_FW_ENGINEERING_VERSION); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "fw_version", - CTLFLAG_RD, &sc->fw_ver_str, 0, + CTLFLAG_RD, sc->fw_ver_str, 0, "firmware version"); snprintf(sc->mf_mode_str, sizeof(sc->mf_mode_str), "%s", ((sc->devinfo.mf_info.mf_mode == SINGLE_FUNCTION) ? "Single" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SD) ? "MF-SD" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SI) ? "MF-SI" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_AFEX) ? "MF-AFEX" : "Unknown")); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mf_mode", - CTLFLAG_RD, &sc->mf_mode_str, 0, + CTLFLAG_RD, sc->mf_mode_str, 0, "multifunction mode"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "mf_vnics", CTLFLAG_RD, &sc->devinfo.mf_info.vnics_per_port, 0, "multifunction vnics per port"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mac_addr", - CTLFLAG_RD, &sc->mac_addr_str, 0, + CTLFLAG_RD, sc->mac_addr_str, 0, "mac address"); snprintf(sc->pci_link_str, sizeof(sc->pci_link_str), "%s x%d", ((sc->devinfo.pcie_link_speed == 1) ? "2.5GT/s" : (sc->devinfo.pcie_link_speed == 2) ? "5.0GT/s" : (sc->devinfo.pcie_link_speed == 4) ? "8.0GT/s" : "???GT/s"), sc->devinfo.pcie_link_width); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pci_link", - CTLFLAG_RD, &sc->pci_link_str, 0, + CTLFLAG_RD, sc->pci_link_str, 0, "pci link status"); sc->debug = bxe_debug; - SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "debug", - CTLFLAG_RW, &sc->debug, 0, + SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "debug", + CTLFLAG_RW, &sc->debug, "debug logging mode"); sc->rx_budget = bxe_rx_budget; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_budget", CTLFLAG_RW, &sc->rx_budget, 0, "rx processing budget"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "state", CTLTYPE_UINT | CTLFLAG_RW, sc, 0, bxe_sysctl_state, "IU", "dump driver state"); for (i = 0; i < BXE_NUM_ETH_STATS; i++) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, bxe_eth_stats_arr[i].string, CTLTYPE_U64 | CTLFLAG_RD, sc, i, bxe_sysctl_eth_stat, "LU", bxe_eth_stats_arr[i].string); } /* add a new parent node for all queues "dev.bxe.#.queue" */ queue_top = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "queue", CTLFLAG_RD, NULL, "queue"); queue_top_children = SYSCTL_CHILDREN(queue_top); for (i = 0; i < sc->num_queues; i++) { /* add a new parent node for a single queue "dev.bxe.#.queue.#" */ snprintf(queue_num_buf, sizeof(queue_num_buf), "%d", i); queue = SYSCTL_ADD_NODE(ctx, queue_top_children, OID_AUTO, queue_num_buf, CTLFLAG_RD, NULL, "single queue"); queue_children = SYSCTL_CHILDREN(queue); for (j = 0; j < BXE_NUM_ETH_Q_STATS; j++) { q_stat = ((i << 16) | j); SYSCTL_ADD_PROC(ctx, queue_children, OID_AUTO, bxe_eth_q_stats_arr[j].string, CTLTYPE_U64 | CTLFLAG_RD, sc, q_stat, bxe_sysctl_eth_q_stat, "LU", bxe_eth_q_stats_arr[j].string); } } } /* * Device attach function. * * Allocates device resources, performs secondary chip identification, and * initializes driver instance variables. This function is called from driver * load after a successful probe. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_attach(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting attach...\n"); sc->state = BXE_STATE_CLOSED; sc->dev = dev; sc->unit = device_get_unit(dev); BLOGD(sc, DBG_LOAD, "softc = %p\n", sc); sc->pcie_bus = pci_get_bus(dev); sc->pcie_device = pci_get_slot(dev); sc->pcie_func = pci_get_function(dev); /* enable bus master capability */ pci_enable_busmaster(dev); /* get the BARs */ if (bxe_allocate_bars(sc) != 0) { return (ENXIO); } /* initialize the mutexes */ bxe_init_mutexes(sc); /* prepare the periodic callout */ callout_init(&sc->periodic_callout, 0); /* prepare the chip taskqueue */ sc->chip_tq_flags = CHIP_TQ_NONE; snprintf(sc->chip_tq_name, sizeof(sc->chip_tq_name), "bxe%d_chip_tq", sc->unit); TASK_INIT(&sc->chip_tq_task, 0, bxe_handle_chip_tq, sc); sc->chip_tq = taskqueue_create(sc->chip_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->chip_tq); taskqueue_start_threads(&sc->chip_tq, 1, PWAIT, /* lower priority */ "%s", sc->chip_tq_name); /* get device info and set params */ if (bxe_get_device_info(sc) != 0) { BLOGE(sc, "getting device info\n"); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* get final misc params */ bxe_get_params(sc); /* set the default MTU (changed via ifconfig) */ sc->mtu = ETHERMTU; bxe_set_modes_bitmap(sc); /* XXX * If in AFEX mode and the function is configured for FCoE * then bail... no L2 allowed. */ /* get phy settings from shmem and 'and' against admin settings */ bxe_get_phy_info(sc); /* initialize the FreeBSD ifnet interface */ if (bxe_init_ifnet(sc) != 0) { bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate device interrupts */ if (bxe_interrupt_alloc(sc) != 0) { if (sc->ifnet != NULL) { ether_ifdetach(sc->ifnet); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate ilt */ if (bxe_alloc_ilt_mem(sc) != 0) { bxe_interrupt_free(sc); if (sc->ifnet != NULL) { ether_ifdetach(sc->ifnet); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate the host hardware/software hsi structures */ if (bxe_alloc_hsi_mem(sc) != 0) { bxe_free_ilt_mem(sc); bxe_interrupt_free(sc); if (sc->ifnet != NULL) { ether_ifdetach(sc->ifnet); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* need to reset chip if UNDI was active */ if (IS_PF(sc) && !BXE_NOMCP(sc)) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "prev unload fw_seq 0x%04x\n", sc->fw_seq); bxe_prev_unload(sc); } #if 1 /* XXX */ bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); #else if (SHMEM2_HAS(sc, dcbx_lldp_params_offset) && SHMEM2_HAS(sc, dcbx_lldp_dcbx_stat_offset) && SHMEM2_RD(sc, dcbx_lldp_params_offset) && SHMEM2_RD(sc, dcbx_lldp_dcbx_stat_offset)) { bxe_dcbx_set_state(sc, TRUE, BXE_DCBX_ENABLED_ON_NEG_ON); bxe_dcbx_init_params(sc); } else { bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); } #endif /* calculate qm_cid_count */ sc->qm_cid_count = bxe_set_qm_cid_count(sc); BLOGD(sc, DBG_LOAD, "qm_cid_count=%d\n", sc->qm_cid_count); sc->max_cos = 1; bxe_init_multi_cos(sc); bxe_add_sysctls(sc); return (0); } /* * Device detach function. * * Stops the controller, resets the controller, and releases resources. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_detach(device_t dev) { struct bxe_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting detach...\n"); ifp = sc->ifnet; if (ifp != NULL && ifp->if_vlantrunk != NULL) { BLOGE(sc, "Cannot detach while VLANs are in use.\n"); return(EBUSY); } /* stop the periodic callout */ bxe_periodic_stop(sc); /* stop the chip taskqueue */ atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_NONE); if (sc->chip_tq) { taskqueue_drain(sc->chip_tq, &sc->chip_tq_task); taskqueue_free(sc->chip_tq); sc->chip_tq = NULL; } /* stop and reset the controller if it was open */ if (sc->state != BXE_STATE_CLOSED) { BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, TRUE); BXE_CORE_UNLOCK(sc); } /* release the network interface */ if (ifp != NULL) { ether_ifdetach(ifp); } ifmedia_removeall(&sc->ifmedia); /* XXX do the following based on driver state... */ /* free the host hardware/software hsi structures */ bxe_free_hsi_mem(sc); /* free ilt */ bxe_free_ilt_mem(sc); /* release the interrupts */ bxe_interrupt_free(sc); /* Release the mutexes*/ bxe_release_mutexes(sc); /* Release the PCIe BAR mapped memory */ bxe_deallocate_bars(sc); /* Release the FreeBSD interface. */ if (sc->ifnet != NULL) { if_free(sc->ifnet); } pci_disable_busmaster(dev); return (0); } /* * Device shutdown function. * * Stops and resets the controller. * * Returns: * Nothing */ static int bxe_shutdown(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting shutdown...\n"); /* stop the periodic callout */ bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_NORMAL, FALSE); BXE_CORE_UNLOCK(sc); return (0); } void bxe_igu_ack_sb(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t segment, uint16_t index, uint8_t op, uint8_t update) { uint32_t igu_addr = sc->igu_base_addr; igu_addr += (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8; bxe_igu_ack_sb_gen(sc, igu_sb_id, segment, index, op, update, igu_addr); } static void bxe_igu_clear_sb_gen(struct bxe_softc *sc, uint8_t func, uint8_t idu_sb_id, uint8_t is_pf) { uint32_t data, ctl, cnt = 100; uint32_t igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA; uint32_t igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL; uint32_t igu_addr_ack = IGU_REG_CSTORM_TYPE_0_SB_CLEANUP + (idu_sb_id/32)*4; uint32_t sb_bit = 1 << (idu_sb_id%32); uint32_t func_encode = func | (is_pf ? 1 : 0) << IGU_FID_ENCODE_IS_PF_SHIFT; uint32_t addr_encode = IGU_CMD_E2_PROD_UPD_BASE + idu_sb_id; /* Not supported in BC mode */ if (CHIP_INT_MODE_IS_BC(sc)) { return; } data = ((IGU_USE_REGISTER_cstorm_type_0_sb_cleanup << IGU_REGULAR_CLEANUP_TYPE_SHIFT) | IGU_REGULAR_CLEANUP_SET | IGU_REGULAR_BCLEANUP); ctl = ((addr_encode << IGU_CTRL_REG_ADDRESS_SHIFT) | (func_encode << IGU_CTRL_REG_FID_SHIFT) | (IGU_CTRL_CMD_TYPE_WR << IGU_CTRL_REG_TYPE_SHIFT)); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", data, igu_addr_data); REG_WR(sc, igu_addr_data, data); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", ctl, igu_addr_ctl); REG_WR(sc, igu_addr_ctl, ctl); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); /* wait for clean up to finish */ while (!(REG_RD(sc, igu_addr_ack) & sb_bit) && --cnt) { DELAY(20000); } if (!(REG_RD(sc, igu_addr_ack) & sb_bit)) { BLOGD(sc, DBG_LOAD, "Unable to finish IGU cleanup: " "idu_sb_id %d offset %d bit %d (cnt %d)\n", idu_sb_id, idu_sb_id/32, idu_sb_id%32, cnt); } } static void bxe_igu_clear_sb(struct bxe_softc *sc, uint8_t idu_sb_id) { bxe_igu_clear_sb_gen(sc, SC_FUNC(sc), idu_sb_id, TRUE /*PF*/); } /*******************/ /* ECORE CALLBACKS */ /*******************/ static void bxe_reset_common(struct bxe_softc *sc) { uint32_t val = 0x1400; /* reset_common */ REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR), 0xd3ffff7f); if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR), val); } static void bxe_common_init_phy(struct bxe_softc *sc) { uint32_t shmem_base[2]; uint32_t shmem2_base[2]; /* Avoid common init in case MFW supports LFA */ if (SHMEM2_RD(sc, size) > (uint32_t)offsetof(struct shmem2_region, lfa_host_addr[SC_PORT(sc)])) { return; } shmem_base[0] = sc->devinfo.shmem_base; shmem2_base[0] = sc->devinfo.shmem2_base; if (!CHIP_IS_E1x(sc)) { shmem_base[1] = SHMEM2_RD(sc, other_shmem_base_addr); shmem2_base[1] = SHMEM2_RD(sc, other_shmem2_base_addr); } BXE_PHY_LOCK(sc); elink_common_init_phy(sc, shmem_base, shmem2_base, sc->devinfo.chip_id, 0); BXE_PHY_UNLOCK(sc); } static void bxe_pf_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~IGU_PF_CONF_FUNC_EN; REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 0); } static void bxe_init_pxp(struct bxe_softc *sc) { uint16_t devctl; int r_order, w_order; devctl = bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_CTL, 2); BLOGD(sc, DBG_LOAD, "read 0x%08x from devctl\n", devctl); w_order = ((devctl & PCIM_EXP_CTL_MAX_PAYLOAD) >> 5); if (sc->mrrs == -1) { r_order = ((devctl & PCIM_EXP_CTL_MAX_READ_REQUEST) >> 12); } else { BLOGD(sc, DBG_LOAD, "forcing read order to %d\n", sc->mrrs); r_order = sc->mrrs; } ecore_init_pxp_arb(sc, r_order, w_order); } static uint32_t bxe_get_pretend_reg(struct bxe_softc *sc) { uint32_t base = PXP2_REG_PGL_PRETEND_FUNC_F0; uint32_t stride = (PXP2_REG_PGL_PRETEND_FUNC_F1 - base); return (base + (SC_ABS_FUNC(sc)) * stride); } /* * Called only on E1H or E2. * When pretending to be PF, the pretend value is the function number 0..7. * When pretending to be VF, the pretend val is the PF-num:VF-valid:ABS-VFID * combination. */ static int bxe_pretend_func(struct bxe_softc *sc, uint16_t pretend_func_val) { uint32_t pretend_reg; if (CHIP_IS_E1H(sc) && (pretend_func_val > E1H_FUNC_MAX)) { return (-1); } /* get my own pretend register */ pretend_reg = bxe_get_pretend_reg(sc); REG_WR(sc, pretend_reg, pretend_func_val); REG_RD(sc, pretend_reg); return (0); } static void bxe_iov_init_dmae(struct bxe_softc *sc) { return; #if 0 BLOGD(sc, DBG_LOAD, "SRIOV is %s\n", IS_SRIOV(sc) ? "ON" : "OFF"); if (!IS_SRIOV(sc)) { return; } REG_WR(sc, DMAE_REG_BACKWARD_COMP_EN, 0); #endif } #if 0 static int bxe_iov_init_ilt(struct bxe_softc *sc, uint16_t line) { return (line); #if 0 int i; struct ecore_ilt* ilt = sc->ilt; if (!IS_SRIOV(sc)) { return (line); } /* set vfs ilt lines */ for (i = 0; i < BXE_VF_CIDS/ILT_PAGE_CIDS ; i++) { struct hw_dma *hw_cxt = SC_VF_CXT_PAGE(sc,i); ilt->lines[line+i].page = hw_cxt->addr; ilt->lines[line+i].page_mapping = hw_cxt->mapping; ilt->lines[line+i].size = hw_cxt->size; /* doesn't matter */ } return (line+i); #endif } #endif static void bxe_iov_init_dq(struct bxe_softc *sc) { return; #if 0 if (!IS_SRIOV(sc)) { return; } /* Set the DQ such that the CID reflect the abs_vfid */ REG_WR(sc, DORQ_REG_VF_NORM_VF_BASE, 0); REG_WR(sc, DORQ_REG_MAX_RVFID_SIZE, ilog2(BNX2X_MAX_NUM_OF_VFS)); /* * Set VFs starting CID. If its > 0 the preceding CIDs are belong to * the PF L2 queues */ REG_WR(sc, DORQ_REG_VF_NORM_CID_BASE, BNX2X_FIRST_VF_CID); /* The VF window size is the log2 of the max number of CIDs per VF */ REG_WR(sc, DORQ_REG_VF_NORM_CID_WND_SIZE, BNX2X_VF_CID_WND); /* * The VF doorbell size 0 - *B, 4 - 128B. We set it here to match * the Pf doorbell size although the 2 are independent. */ REG_WR(sc, DORQ_REG_VF_NORM_CID_OFST, BNX2X_DB_SHIFT - BNX2X_DB_MIN_SHIFT); /* * No security checks for now - * configure single rule (out of 16) mask = 0x1, value = 0x0, * CID range 0 - 0x1ffff */ REG_WR(sc, DORQ_REG_VF_TYPE_MASK_0, 1); REG_WR(sc, DORQ_REG_VF_TYPE_VALUE_0, 0); REG_WR(sc, DORQ_REG_VF_TYPE_MIN_MCID_0, 0); REG_WR(sc, DORQ_REG_VF_TYPE_MAX_MCID_0, 0x1ffff); /* set the number of VF alllowed doorbells to the full DQ range */ REG_WR(sc, DORQ_REG_VF_NORM_MAX_CID_COUNT, 0x20000); /* set the VF doorbell threshold */ REG_WR(sc, DORQ_REG_VF_USAGE_CT_LIMIT, 4); #endif } /* send a NIG loopback debug packet */ static void bxe_lb_pckt(struct bxe_softc *sc) { uint32_t wb_write[3]; /* Ethernet source and destination addresses */ wb_write[0] = 0x55555555; wb_write[1] = 0x55555555; wb_write[2] = 0x20; /* SOP */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); /* NON-IP protocol */ wb_write[0] = 0x09000000; wb_write[1] = 0x55555555; wb_write[2] = 0x10; /* EOP, eop_bvalid = 0 */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); } /* * Some of the internal memories are not directly readable from the driver. * To test them we send debug packets. */ static int bxe_int_mem_test(struct bxe_softc *sc) { int factor; int count, i; uint32_t val = 0; if (CHIP_REV_IS_FPGA(sc)) { factor = 120; } else if (CHIP_REV_IS_EMUL(sc)) { factor = 200; } else { factor = 1; } /* disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send Ethernet packet */ bxe_lb_pckt(sc); /* TODO do i reset NIG statistic? */ /* Wait until NIG register shows 1 packet of size 0x10 */ count = 1000 * factor; while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0x10) { break; } DELAY(10000); count--; } if (val != 0x10) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-1); } /* wait until PRS register shows 1 packet */ count = (1000 * factor); while (count) { val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val == 1) { break; } DELAY(10000); count--; } if (val != 0x1) { BLOGE(sc, "PRS timeout val=0x%x\n", val); return (-2); } /* Reset and init BRB, PRS */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); /* Disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* Write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send 10 Ethernet packets */ for (i = 0; i < 10; i++) { bxe_lb_pckt(sc); } /* Wait until NIG register shows 10+1 packets of size 11*0x10 = 0xb0 */ count = (1000 * factor); while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0xb0) { break; } DELAY(10000); count--; } if (val != 0xb0) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-3); } /* Wait until PRS register shows 2 packets */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 2) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* Write 1 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x1); /* Wait until PRS register shows 3 packets */ DELAY(10000 * factor); /* Wait until NIG register shows 1 packet of size 0x10 */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 3) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* clear NIG EOP FIFO */ for (i = 0; i < 11; i++) { REG_RD(sc, NIG_REG_INGRESS_EOP_LB_FIFO); } val = REG_RD(sc, NIG_REG_INGRESS_EOP_LB_EMPTY); if (val != 1) { BLOGE(sc, "clear of NIG failed\n"); return (-4); } /* Reset and init BRB, PRS, NIG */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); if (!CNIC_SUPPORT(sc)) { /* set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); } /* Enable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x7fffffff); REG_WR(sc, TCM_REG_PRS_IFEN, 0x1); REG_WR(sc, CFC_REG_DEBUG0, 0x0); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x1); return (0); } static void bxe_setup_fan_failure_detection(struct bxe_softc *sc) { int is_required; uint32_t val; int port; is_required = 0; val = (SHMEM_RD(sc, dev_info.shared_hw_config.config2) & SHARED_HW_CFG_FAN_FAILURE_MASK); if (val == SHARED_HW_CFG_FAN_FAILURE_ENABLED) { is_required = 1; } /* * The fan failure mechanism is usually related to the PHY type since * the power consumption of the board is affected by the PHY. Currently, * fan is required for most designs with SFX7101, BCM8727 and BCM8481. */ else if (val == SHARED_HW_CFG_FAN_FAILURE_PHY_TYPE) { for (port = PORT_0; port < PORT_MAX; port++) { is_required |= elink_fan_failure_det_req(sc, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, port); } } BLOGD(sc, DBG_LOAD, "fan detection setting: %d\n", is_required); if (is_required == 0) { return; } /* Fan failure is indicated by SPIO 5 */ bxe_set_spio(sc, MISC_SPIO_SPIO5, MISC_SPIO_INPUT_HI_Z); /* set to active low mode */ val = REG_RD(sc, MISC_REG_SPIO_INT); val |= (MISC_SPIO_SPIO5 << MISC_SPIO_INT_OLD_SET_POS); REG_WR(sc, MISC_REG_SPIO_INT, val); /* enable interrupt to signal the IGU */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); val |= MISC_SPIO_SPIO5; REG_WR(sc, MISC_REG_SPIO_EVENT_EN, val); } static void bxe_enable_blocks_attention(struct bxe_softc *sc) { uint32_t val; REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0x40); } else { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0); } REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* * mask read length error interrupts in brb for parser * (parsing unit and 'checksum and crc' unit) * these errors are legal (PU reads fixed length and CAC can cause * read length error on truncated packets) */ REG_WR(sc, BRB1_REG_BRB1_INT_MASK, 0xFC00); REG_WR(sc, QM_REG_QM_INT_MASK, 0); REG_WR(sc, TM_REG_TM_INT_MASK, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_0, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_1, 0); REG_WR(sc, XCM_REG_XCM_INT_MASK, 0); /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_0, 0); */ /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_1, 0); */ REG_WR(sc, USDM_REG_USDM_INT_MASK_0, 0); REG_WR(sc, USDM_REG_USDM_INT_MASK_1, 0); REG_WR(sc, UCM_REG_UCM_INT_MASK, 0); /* REG_WR(sc, USEM_REG_USEM_INT_MASK_0, 0); */ /* REG_WR(sc, USEM_REG_USEM_INT_MASK_1, 0); */ REG_WR(sc, GRCBASE_UPB + PB_REG_PB_INT_MASK, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_0, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_1, 0); REG_WR(sc, CCM_REG_CCM_INT_MASK, 0); /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_0, 0); */ /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_1, 0); */ val = (PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_AFT | PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_OF | PXP2_PXP2_INT_MASK_0_REG_PGL_PCIE_ATTN); if (!CHIP_IS_E1x(sc)) { val |= (PXP2_PXP2_INT_MASK_0_REG_PGL_READ_BLOCKED | PXP2_PXP2_INT_MASK_0_REG_PGL_WRITE_BLOCKED); } REG_WR(sc, PXP2_REG_PXP2_INT_MASK_0, val); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_0, 0); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_1, 0); REG_WR(sc, TCM_REG_TCM_INT_MASK, 0); /* REG_WR(sc, TSEM_REG_TSEM_INT_MASK_0, 0); */ if (!CHIP_IS_E1x(sc)) { /* enable VFC attentions: bits 11 and 12, bits 31:13 reserved */ REG_WR(sc, TSEM_REG_TSEM_INT_MASK_1, 0x07ff); } REG_WR(sc, CDU_REG_CDU_INT_MASK, 0); REG_WR(sc, DMAE_REG_DMAE_INT_MASK, 0); /* REG_WR(sc, MISC_REG_MISC_INT_MASK, 0); */ REG_WR(sc, PBF_REG_PBF_INT_MASK, 0x18); /* bit 3,4 masked */ } /** * bxe_init_hw_common - initialize the HW at the COMMON phase. * * @sc: driver handle */ static int bxe_init_hw_common(struct bxe_softc *sc) { uint8_t abs_func_id; uint32_t val; BLOGD(sc, DBG_LOAD, "starting common init for func %d\n", SC_ABS_FUNC(sc)); /* * take the RESET lock to protect undi_unload flow from accessing * registers while we are resetting the chip */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); bxe_reset_common(sc); REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET), 0xffffffff); val = 0xfffc; if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET), val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); ecore_init_block(sc, BLOCK_MISC, PHASE_COMMON); BLOGD(sc, DBG_LOAD, "after misc block init\n"); if (!CHIP_IS_E1x(sc)) { /* * 4-port mode or 2-port mode we need to turn off master-enable for * everyone. After that we turn it back on for self. So, we disregard * multi-function, and always disable all functions on the given path, * this means 0,2,4,6 for path 0 and 1,3,5,7 for path 1 */ for (abs_func_id = SC_PATH(sc); abs_func_id < (E2_FUNC_MAX * 2); abs_func_id += 2) { if (abs_func_id == SC_ABS_FUNC(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); continue; } bxe_pretend_func(sc, abs_func_id); /* clear pf enable */ bxe_pf_disable(sc); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); } } BLOGD(sc, DBG_LOAD, "after pf disable\n"); ecore_init_block(sc, BLOCK_PXP, PHASE_COMMON); if (CHIP_IS_E1(sc)) { /* * enable HW interrupt from PXP on USDM overflow * bit 16 on INT_MASK_0 */ REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); } ecore_init_block(sc, BLOCK_PXP2, PHASE_COMMON); bxe_init_pxp(sc); #ifdef __BIG_ENDIAN REG_WR(sc, PXP2_REG_RQ_QM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_TM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_SRC_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_CDU_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_DBG_ENDIAN_M, 1); /* make sure this value is 0 */ REG_WR(sc, PXP2_REG_RQ_HC_ENDIAN_M, 0); //REG_WR(sc, PXP2_REG_RD_PBF_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_QM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_TM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_SRC_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_CDURD_SWAP_MODE, 1); #endif ecore_ilt_init_page_size(sc, INITOP_SET); if (CHIP_REV_IS_FPGA(sc) && CHIP_IS_E1H(sc)) { REG_WR(sc, PXP2_REG_PGL_TAGS_LIMIT, 0x1); } /* let the HW do it's magic... */ DELAY(100000); /* finish PXP init */ val = REG_RD(sc, PXP2_REG_RQ_CFG_DONE); if (val != 1) { BLOGE(sc, "PXP2 CFG failed\n"); return (-1); } val = REG_RD(sc, PXP2_REG_RD_INIT_DONE); if (val != 1) { BLOGE(sc, "PXP2 RD_INIT failed\n"); return (-1); } BLOGD(sc, DBG_LOAD, "after pxp init\n"); /* * Timer bug workaround for E2 only. We need to set the entire ILT to have * entries with value "0" and valid bit on. This needs to be done by the * first PF that is loaded in a path (i.e. common phase) */ if (!CHIP_IS_E1x(sc)) { /* * In E2 there is a bug in the timers block that can cause function 6 / 7 * (i.e. vnic3) to start even if it is marked as "scan-off". * This occurs when a different function (func2,3) is being marked * as "scan-off". Real-life scenario for example: if a driver is being * load-unloaded while func6,7 are down. This will cause the timer to access * the ilt, translate to a logical address and send a request to read/write. * Since the ilt for the function that is down is not valid, this will cause * a translation error which is unrecoverable. * The Workaround is intended to make sure that when this happens nothing * fatal will occur. The workaround: * 1. First PF driver which loads on a path will: * a. After taking the chip out of reset, by using pretend, * it will write "0" to the following registers of * the other vnics. * REG_WR(pdev, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); * REG_WR(pdev, CFC_REG_WEAK_ENABLE_PF,0); * REG_WR(pdev, CFC_REG_STRONG_ENABLE_PF,0); * And for itself it will write '1' to * PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER to enable * dmae-operations (writing to pram for example.) * note: can be done for only function 6,7 but cleaner this * way. * b. Write zero+valid to the entire ILT. * c. Init the first_timers_ilt_entry, last_timers_ilt_entry of * VNIC3 (of that port). The range allocated will be the * entire ILT. This is needed to prevent ILT range error. * 2. Any PF driver load flow: * a. ILT update with the physical addresses of the allocated * logical pages. * b. Wait 20msec. - note that this timeout is needed to make * sure there are no requests in one of the PXP internal * queues with "old" ILT addresses. * c. PF enable in the PGLC. * d. Clear the was_error of the PF in the PGLC. (could have * occurred while driver was down) * e. PF enable in the CFC (WEAK + STRONG) * f. Timers scan enable * 3. PF driver unload flow: * a. Clear the Timers scan_en. * b. Polling for scan_on=0 for that PF. * c. Clear the PF enable bit in the PXP. * d. Clear the PF enable in the CFC (WEAK + STRONG) * e. Write zero+valid to all ILT entries (The valid bit must * stay set) * f. If this is VNIC 3 of a port then also init * first_timers_ilt_entry to zero and last_timers_ilt_entry * to the last enrty in the ILT. * * Notes: * Currently the PF error in the PGLC is non recoverable. * In the future the there will be a recovery routine for this error. * Currently attention is masked. * Having an MCP lock on the load/unload process does not guarantee that * there is no Timer disable during Func6/7 enable. This is because the * Timers scan is currently being cleared by the MCP on FLR. * Step 2.d can be done only for PF6/7 and the driver can also check if * there is error before clearing it. But the flow above is simpler and * more general. * All ILT entries are written by zero+valid and not just PF6/7 * ILT entries since in the future the ILT entries allocation for * PF-s might be dynamic. */ struct ilt_client_info ilt_cli; struct ecore_ilt ilt; memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); memset(&ilt, 0, sizeof(struct ecore_ilt)); /* initialize dummy TM client */ ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; /* * Step 1: set zeroes to all ilt page entries with valid bit on * Step 2: set the timers first/last ilt entry to point * to the entire range to prevent ILT range error for 3rd/4th * vnic (this code assumes existence of the vnic) * * both steps performed by call to ecore_ilt_client_init_op() * with dummy TM client * * we must use pretend since PXP2_REG_RQ_##blk##_FIRST_ILT * and his brother are split registers */ bxe_pretend_func(sc, (SC_PATH(sc) + 6)); ecore_ilt_client_init_op_ilt(sc, &ilt, &ilt_cli, INITOP_CLEAR); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_RD, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_SEL, 1); } REG_WR(sc, PXP2_REG_RQ_DISABLE_INPUTS, 0); REG_WR(sc, PXP2_REG_RD_DISABLE_INPUTS, 0); if (!CHIP_IS_E1x(sc)) { int factor = CHIP_REV_IS_EMUL(sc) ? 1000 : (CHIP_REV_IS_FPGA(sc) ? 400 : 0); ecore_init_block(sc, BLOCK_PGLUE_B, PHASE_COMMON); ecore_init_block(sc, BLOCK_ATC, PHASE_COMMON); /* let the HW do it's magic... */ do { DELAY(200000); val = REG_RD(sc, ATC_REG_ATC_INIT_DONE); } while (factor-- && (val != 1)); if (val != 1) { BLOGE(sc, "ATC_INIT failed\n"); return (-1); } } BLOGD(sc, DBG_LOAD, "after pglue and atc init\n"); ecore_init_block(sc, BLOCK_DMAE, PHASE_COMMON); bxe_iov_init_dmae(sc); /* clean the DMAE memory */ sc->dmae_ready = 1; ecore_init_fill(sc, TSEM_REG_PRAM, 0, 8, 1); ecore_init_block(sc, BLOCK_TCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_UCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XCM, PHASE_COMMON); bxe_read_dmae(sc, XSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, CSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, TSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, USEM_REG_PASSIVE_BUFFER, 3); ecore_init_block(sc, BLOCK_QM, PHASE_COMMON); /* QM queues pointers table */ ecore_qm_init_ptr_table(sc, sc->qm_cid_count, INITOP_SET); /* soft reset pulse */ REG_WR(sc, QM_REG_SOFT_RESET, 1); REG_WR(sc, QM_REG_SOFT_RESET, 0); if (CNIC_SUPPORT(sc)) ecore_init_block(sc, BLOCK_TM, PHASE_COMMON); ecore_init_block(sc, BLOCK_DORQ, PHASE_COMMON); REG_WR(sc, DORQ_REG_DPM_CID_OFST, BXE_DB_SHIFT); if (!CHIP_REV_IS_SLOW(sc)) { /* enable hw interrupt from doorbell Q */ REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); } ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); REG_WR(sc, PRS_REG_A_PRSU_20, 0xf); if (!CHIP_IS_E1(sc)) { REG_WR(sc, PRS_REG_E1HOV_MODE, sc->devinfo.mf_info.path_has_ovlan); } if (!CHIP_IS_E1x(sc) && !CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * received in AFEX mode */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PRS_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PRS_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PRS_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PRS_REG_TAG_LEN_0, 0x4); } else { /* * Bit-map indicating which L2 hdrs may appear * after the basic Ethernet header */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } ecore_init_block(sc, BLOCK_TSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSDM, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { /* reset VFC memories */ REG_WR(sc, TSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); REG_WR(sc, XSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); DELAY(20000); } ecore_init_block(sc, BLOCK_TSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSEM, PHASE_COMMON); /* sync semi rtc */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x80000000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x80000000); ecore_init_block(sc, BLOCK_UPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_XPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_PBF, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * sent in AFEX mode */ REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PBF_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PBF_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PBF_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PBF_REG_TAG_LEN_0, 0x4); } else { REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } REG_WR(sc, SRC_REG_SOFT_RST, 1); ecore_init_block(sc, BLOCK_SRC, PHASE_COMMON); if (CNIC_SUPPORT(sc)) { REG_WR(sc, SRC_REG_KEYSEARCH_0, 0x63285672); REG_WR(sc, SRC_REG_KEYSEARCH_1, 0x24b8f2cc); REG_WR(sc, SRC_REG_KEYSEARCH_2, 0x223aef9b); REG_WR(sc, SRC_REG_KEYSEARCH_3, 0x26001e3a); REG_WR(sc, SRC_REG_KEYSEARCH_4, 0x7ae91116); REG_WR(sc, SRC_REG_KEYSEARCH_5, 0x5ce5230b); REG_WR(sc, SRC_REG_KEYSEARCH_6, 0x298d8adf); REG_WR(sc, SRC_REG_KEYSEARCH_7, 0x6eb0ff09); REG_WR(sc, SRC_REG_KEYSEARCH_8, 0x1830f82f); REG_WR(sc, SRC_REG_KEYSEARCH_9, 0x01e46be7); } REG_WR(sc, SRC_REG_SOFT_RST, 0); if (sizeof(union cdu_context) != 1024) { /* we currently assume that a context is 1024 bytes */ BLOGE(sc, "please adjust the size of cdu_context(%ld)\n", (long)sizeof(union cdu_context)); } ecore_init_block(sc, BLOCK_CDU, PHASE_COMMON); val = (4 << 24) + (0 << 12) + 1024; REG_WR(sc, CDU_REG_CDU_GLOBAL_PARAMS, val); ecore_init_block(sc, BLOCK_CFC, PHASE_COMMON); REG_WR(sc, CFC_REG_INIT_REG, 0x7FF); /* enable context validation interrupt from CFC */ REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* set the thresholds to prevent CFC/CDU race */ REG_WR(sc, CFC_REG_DEBUG0, 0x20020000); ecore_init_block(sc, BLOCK_HC, PHASE_COMMON); if (!CHIP_IS_E1x(sc) && BXE_NOMCP(sc)) { REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x36); } ecore_init_block(sc, BLOCK_IGU, PHASE_COMMON); ecore_init_block(sc, BLOCK_MISC_AEU, PHASE_COMMON); /* Reset PCIE errors for debug */ REG_WR(sc, 0x2814, 0xffffffff); REG_WR(sc, 0x3820, 0xffffffff); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_CONTROL_5, (PXPCS_TL_CONTROL_5_ERR_UNSPPORT1 | PXPCS_TL_CONTROL_5_ERR_UNSPPORT)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC345_STAT, (PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT4 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT3 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT2)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC678_STAT, (PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT7 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT6 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT5)); } ecore_init_block(sc, BLOCK_NIG, PHASE_COMMON); if (!CHIP_IS_E1(sc)) { /* in E3 this done in per-port section */ if (!CHIP_IS_E3(sc)) REG_WR(sc, NIG_REG_LLH_MF_MODE, IS_MF(sc)); } if (CHIP_IS_E1H(sc)) { /* not applicable for E2 (and above ...) */ REG_WR(sc, NIG_REG_LLH_E1HOV_MODE, IS_MF_SD(sc)); } if (CHIP_REV_IS_SLOW(sc)) { DELAY(200000); } /* finish CFC init */ val = reg_poll(sc, CFC_REG_LL_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC LL_INIT failed\n"); return (-1); } val = reg_poll(sc, CFC_REG_AC_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC AC_INIT failed\n"); return (-1); } val = reg_poll(sc, CFC_REG_CAM_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC CAM_INIT failed\n"); return (-1); } REG_WR(sc, CFC_REG_DEBUG0, 0); if (CHIP_IS_E1(sc)) { /* read NIG statistic to see if this is our first up since powerup */ bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); /* do internal memory self test */ if ((val == 0) && bxe_int_mem_test(sc)) { BLOGE(sc, "internal mem self test failed\n"); return (-1); } } bxe_setup_fan_failure_detection(sc); /* clear PXP2 attentions */ REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); bxe_enable_blocks_attention(sc); if (!CHIP_REV_IS_SLOW(sc)) { ecore_enable_blocks_parity(sc); } if (!BXE_NOMCP(sc)) { if (CHIP_IS_E1x(sc)) { bxe_common_init_phy(sc); } } return (0); } /** * bxe_init_hw_common_chip - init HW at the COMMON_CHIP phase. * * @sc: driver handle */ static int bxe_init_hw_common_chip(struct bxe_softc *sc) { int rc = bxe_init_hw_common(sc); if (rc) { return (rc); } /* In E2 2-PORT mode, same ext phy is used for the two paths */ if (!BXE_NOMCP(sc)) { bxe_common_init_phy(sc); } return (0); } static int bxe_init_hw_port(struct bxe_softc *sc) { int port = SC_PORT(sc); int init_phase = port ? PHASE_PORT1 : PHASE_PORT0; uint32_t low, high; uint32_t val; BLOGD(sc, DBG_LOAD, "starting port init for port %d\n", port); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); /* * Timers bug workaround: disables the pf_master bit in pglue at * common phase, we need to enable it here before any dmae access are * attempted. Therefore we manually added the enable-master to the * port phase (it also happens in the function phase) */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); } ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); /* QM cid (connection) count */ ecore_qm_init_cid_count(sc, sc->qm_cid_count, INITOP_SET); if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_TM, init_phase); REG_WR(sc, TM_REG_LIN0_SCAN_TIME + port*4, 20); REG_WR(sc, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31); } ecore_init_block(sc, BLOCK_DORQ, init_phase); ecore_init_block(sc, BLOCK_BRB1, init_phase); if (CHIP_IS_E1(sc) || CHIP_IS_E1H(sc)) { if (IS_MF(sc)) { low = (BXE_ONE_PORT(sc) ? 160 : 246); } else if (sc->mtu > 4096) { if (BXE_ONE_PORT(sc)) { low = 160; } else { val = sc->mtu; /* (24*1024 + val*4)/256 */ low = (96 + (val / 64) + ((val % 64) ? 1 : 0)); } } else { low = (BXE_ONE_PORT(sc) ? 80 : 160); } high = (low + 56); /* 14*1024/256 */ REG_WR(sc, BRB1_REG_PAUSE_LOW_THRESHOLD_0 + port*4, low); REG_WR(sc, BRB1_REG_PAUSE_HIGH_THRESHOLD_0 + port*4, high); } if (CHIP_IS_MODE_4_PORT(sc)) { REG_WR(sc, SC_PORT(sc) ? BRB1_REG_MAC_GUARANTIED_1 : BRB1_REG_MAC_GUARANTIED_0, 40); } ecore_init_block(sc, BLOCK_PRS, init_phase); if (CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* configure headers for AFEX mode */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, 0xE); REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_TAG_0_PORT_1 : PRS_REG_HDRS_AFTER_TAG_0_PORT_0, 0x6); REG_WR(sc, SC_PORT(sc) ? PRS_REG_MUST_HAVE_HDRS_PORT_1 : PRS_REG_MUST_HAVE_HDRS_PORT_0, 0xA); } else { /* Ovlan exists only if we are in multi-function + * switch-dependent mode, in switch-independent there * is no ovlan headers */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, (sc->devinfo.mf_info.path_has_ovlan ? 7 : 6)); } } ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (CHIP_IS_E1x(sc)) { /* configure PBF to work without PAUSE mtu 9000 */ REG_WR(sc, PBF_REG_P0_PAUSE_ENABLE + port*4, 0); /* update threshold */ REG_WR(sc, PBF_REG_P0_ARB_THRSH + port*4, (9040/16)); /* update init credit */ REG_WR(sc, PBF_REG_P0_INIT_CRD + port*4, (9040/16) + 553 - 22); /* probe changes */ REG_WR(sc, PBF_REG_INIT_P0 + port*4, 1); DELAY(50); REG_WR(sc, PBF_REG_INIT_P0 + port*4, 0); } if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_SRC, init_phase); } ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (CHIP_IS_E1(sc)) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); ecore_init_block(sc, BLOCK_IGU, init_phase); ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* init aeu_mask_attn_func_0/1: * - SF mode: bits 3-7 are masked. only bits 0-2 are in use * - MF mode: bit 3 is masked. bits 0-2 are in use as in SF * bits 4-7 are used for "per vn group attention" */ val = IS_MF(sc) ? 0xF7 : 0x7; /* Enable DCBX attention for all but E1 */ val |= CHIP_IS_E1(sc) ? 0 : 0x10; REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, val); ecore_init_block(sc, BLOCK_NIG, init_phase); if (!CHIP_IS_E1x(sc)) { /* Bit-map indicating which L2 hdrs may appear after the * basic Ethernet header */ if (IS_MF_AFEX(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, 0xE); } else { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, IS_MF_SD(sc) ? 7 : 6); } if (CHIP_IS_E3(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_LLH1_MF_MODE : NIG_REG_LLH_MF_MODE, IS_MF(sc)); } } if (!CHIP_IS_E3(sc)) { REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port*4, 1); } if (!CHIP_IS_E1(sc)) { /* 0x2 disable mf_ov, 0x1 enable */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + port*4, (IS_MF_SD(sc) ? 0x1 : 0x2)); if (!CHIP_IS_E1x(sc)) { val = 0; switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: val = 1; break; case MULTI_FUNCTION_SI: case MULTI_FUNCTION_AFEX: val = 2; break; } REG_WR(sc, (SC_PORT(sc) ? NIG_REG_LLH1_CLS_TYPE : NIG_REG_LLH0_CLS_TYPE), val); } REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port*4, 0); REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port*4, 0); REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port*4, 1); } /* If SPIO5 is set to generate interrupts, enable it for this port */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); if (val & MISC_SPIO_SPIO5) { uint32_t reg_addr = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0); val = REG_RD(sc, reg_addr); val |= AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_addr, val); } return (0); } static uint32_t bxe_flr_clnup_reg_poll(struct bxe_softc *sc, uint32_t reg, uint32_t expected, uint32_t poll_count) { uint32_t cur_cnt = poll_count; uint32_t val; while ((val = REG_RD(sc, reg)) != expected && cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); } return (val); } static int bxe_flr_clnup_poll_hw_counter(struct bxe_softc *sc, uint32_t reg, char *msg, uint32_t poll_cnt) { uint32_t val = bxe_flr_clnup_reg_poll(sc, reg, 0, poll_cnt); if (val != 0) { BLOGE(sc, "%s usage count=%d\n", msg, val); return (1); } return (0); } /* Common routines with VF FLR cleanup */ static uint32_t bxe_flr_clnup_poll_count(struct bxe_softc *sc) { /* adjust polling timeout */ if (CHIP_REV_IS_EMUL(sc)) { return (FLR_POLL_CNT * 2000); } if (CHIP_REV_IS_FPGA(sc)) { return (FLR_POLL_CNT * 120); } return (FLR_POLL_CNT); } static int bxe_poll_hw_usage_counters(struct bxe_softc *sc, uint32_t poll_cnt) { /* wait for CFC PF usage-counter to zero (includes all the VFs) */ if (bxe_flr_clnup_poll_hw_counter(sc, CFC_REG_NUM_LCIDS_INSIDE_PF, "CFC PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for DQ PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, DORQ_REG_PF_USAGE_CNT, "DQ PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for QM PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, QM_REG_PF_USG_CNT_0 + 4*SC_FUNC(sc), "QM PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for Timer PF usage-counters to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_VNIC_UC + 4*SC_PORT(sc), "Timers VNIC usage counter timed out", poll_cnt)) { return (1); } if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_NUM_SCANS + 4*SC_PORT(sc), "Timers NUM_SCANS usage counter timed out", poll_cnt)) { return (1); } /* Wait DMAE PF usage counter to zero */ if (bxe_flr_clnup_poll_hw_counter(sc, dmae_reg_go_c[INIT_DMAE_C(sc)], "DMAE dommand register timed out", poll_cnt)) { return (1); } return (0); } #define OP_GEN_PARAM(param) \ (((param) << SDM_OP_GEN_COMP_PARAM_SHIFT) & SDM_OP_GEN_COMP_PARAM) #define OP_GEN_TYPE(type) \ (((type) << SDM_OP_GEN_COMP_TYPE_SHIFT) & SDM_OP_GEN_COMP_TYPE) #define OP_GEN_AGG_VECT(index) \ (((index) << SDM_OP_GEN_AGG_VECT_IDX_SHIFT) & SDM_OP_GEN_AGG_VECT_IDX) static int bxe_send_final_clnup(struct bxe_softc *sc, uint8_t clnup_func, uint32_t poll_cnt) { uint32_t op_gen_command = 0; uint32_t comp_addr = (BAR_CSTRORM_INTMEM + CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func)); int ret = 0; if (REG_RD(sc, comp_addr)) { BLOGE(sc, "Cleanup complete was not 0 before sending\n"); return (1); } op_gen_command |= OP_GEN_PARAM(XSTORM_AGG_INT_FINAL_CLEANUP_INDEX); op_gen_command |= OP_GEN_TYPE(XSTORM_AGG_INT_FINAL_CLEANUP_COMP_TYPE); op_gen_command |= OP_GEN_AGG_VECT(clnup_func); op_gen_command |= 1 << SDM_OP_GEN_AGG_VECT_IDX_VALID_SHIFT; BLOGD(sc, DBG_LOAD, "sending FW Final cleanup\n"); REG_WR(sc, XSDM_REG_OPERATION_GEN, op_gen_command); if (bxe_flr_clnup_reg_poll(sc, comp_addr, 1, poll_cnt) != 1) { BLOGE(sc, "FW final cleanup did not succeed\n"); BLOGD(sc, DBG_LOAD, "At timeout completion address contained %x\n", (REG_RD(sc, comp_addr))); bxe_panic(sc, ("FLR cleanup failed\n")); return (1); } /* Zero completion for nxt FLR */ REG_WR(sc, comp_addr, 0); return (ret); } static void bxe_pbf_pN_buf_flushed(struct bxe_softc *sc, struct pbf_pN_buf_regs *regs, uint32_t poll_count) { uint32_t init_crd, crd, crd_start, crd_freed, crd_freed_start; uint32_t cur_cnt = poll_count; crd_freed = crd_freed_start = REG_RD(sc, regs->crd_freed); crd = crd_start = REG_RD(sc, regs->crd); init_crd = REG_RD(sc, regs->init_crd); BLOGD(sc, DBG_LOAD, "INIT CREDIT[%d] : %x\n", regs->pN, init_crd); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : s:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: s:%x\n", regs->pN, crd_freed); while ((crd != init_crd) && ((uint32_t)((int32_t)crd_freed - (int32_t)crd_freed_start) < (init_crd - crd_start))) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); crd = REG_RD(sc, regs->crd); crd_freed = REG_RD(sc, regs->crd_freed); } else { BLOGD(sc, DBG_LOAD, "PBF tx buffer[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : c:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: c:%x\n", regs->pN, crd_freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF tx buffer[%d]\n", poll_count-cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_pbf_pN_cmd_flushed(struct bxe_softc *sc, struct pbf_pN_cmd_regs *regs, uint32_t poll_count) { uint32_t occup, to_free, freed, freed_start; uint32_t cur_cnt = poll_count; occup = to_free = REG_RD(sc, regs->lines_occup); freed = freed_start = REG_RD(sc, regs->lines_freed); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); while (occup && ((uint32_t)((int32_t)freed - (int32_t)freed_start) < to_free)) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); occup = REG_RD(sc, regs->lines_occup); freed = REG_RD(sc, regs->lines_freed); } else { BLOGD(sc, DBG_LOAD, "PBF cmd queue[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF cmd queue[%d]\n", poll_count - cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_tx_hw_flushed(struct bxe_softc *sc, uint32_t poll_count) { struct pbf_pN_cmd_regs cmd_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q0 : PBF_REG_P0_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q0 : PBF_REG_P0_TQ_LINES_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q1 : PBF_REG_P1_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q1 : PBF_REG_P1_TQ_LINES_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_LB_Q : PBF_REG_P4_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_LB_Q : PBF_REG_P4_TQ_LINES_FREED_CNT} }; struct pbf_pN_buf_regs buf_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q0 : PBF_REG_P0_INIT_CRD , (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q0 : PBF_REG_P0_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q0 : PBF_REG_P0_INTERNAL_CRD_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q1 : PBF_REG_P1_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q1 : PBF_REG_P1_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q1 : PBF_REG_P1_INTERNAL_CRD_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_LB_Q : PBF_REG_P4_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_LB_Q : PBF_REG_P4_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_LB_Q : PBF_REG_P4_INTERNAL_CRD_FREED_CNT}, }; int i; /* Verify the command queues are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(cmd_regs); i++) { bxe_pbf_pN_cmd_flushed(sc, &cmd_regs[i], poll_count); } /* Verify the transmission buffers are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(buf_regs); i++) { bxe_pbf_pN_buf_flushed(sc, &buf_regs[i], poll_count); } } static void bxe_hw_enable_status(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, CFC_REG_WEAK_ENABLE_PF); BLOGD(sc, DBG_LOAD, "CFC_REG_WEAK_ENABLE_PF is 0x%x\n", val); val = REG_RD(sc, PBF_REG_DISABLE_PF); BLOGD(sc, DBG_LOAD, "PBF_REG_DISABLE_PF is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSI_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSI_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_FUNC_MASK); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_FUNC_MASK is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER is 0x%x\n", val); } static int bxe_pf_flr_clnup(struct bxe_softc *sc) { uint32_t poll_cnt = bxe_flr_clnup_poll_count(sc); BLOGD(sc, DBG_LOAD, "Cleanup after FLR PF[%d]\n", SC_ABS_FUNC(sc)); /* Re-enable PF target read access */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); /* Poll HW usage counters */ BLOGD(sc, DBG_LOAD, "Polling usage counters\n"); if (bxe_poll_hw_usage_counters(sc, poll_cnt)) { return (-1); } /* Zero the igu 'trailing edge' and 'leading edge' */ /* Send the FW cleanup command */ if (bxe_send_final_clnup(sc, (uint8_t)SC_FUNC(sc), poll_cnt)) { return (-1); } /* ATC cleanup */ /* Verify TX hw is flushed */ bxe_tx_hw_flushed(sc, poll_cnt); /* Wait 100ms (not adjusted according to platform) */ DELAY(100000); /* Verify no pending pci transactions */ if (bxe_is_pcie_pending(sc)) { BLOGE(sc, "PCIE Transactions still pending\n"); } /* Debug */ bxe_hw_enable_status(sc); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); return (0); } #if 0 static void bxe_init_searcher(struct bxe_softc *sc) { int port = SC_PORT(sc); ecore_src_init_t2(sc, sc->t2, sc->t2_mapping, SRC_CONN_NUM); /* T1 hash bits value determines the T1 number of entries */ REG_WR(sc, SRC_REG_NUMBER_HASH_BITS0 + port*4, SRC_HASH_BITS); } #endif static int bxe_init_hw_func(struct bxe_softc *sc) { int port = SC_PORT(sc); int func = SC_FUNC(sc); int init_phase = PHASE_PF0 + func; struct ecore_ilt *ilt = sc->ilt; uint16_t cdu_ilt_start; uint32_t addr, val; uint32_t main_mem_base, main_mem_size, main_mem_prty_clr; int i, main_mem_width, rc; BLOGD(sc, DBG_LOAD, "starting func init for func %d\n", func); /* FLR cleanup */ if (!CHIP_IS_E1x(sc)) { rc = bxe_pf_flr_clnup(sc); if (rc) { BLOGE(sc, "FLR cleanup failed!\n"); // XXX bxe_fw_dump(sc); // XXX bxe_idle_chk(sc); return (rc); } } /* set MSI reconfigure capability */ if (sc->devinfo.int_block == INT_BLOCK_HC) { addr = (port ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0); val = REG_RD(sc, addr); val |= HC_CONFIG_0_REG_MSI_ATTN_EN_0; REG_WR(sc, addr, val); } ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); ilt = sc->ilt; cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; #if 0 if (IS_SRIOV(sc)) { cdu_ilt_start += BXE_FIRST_VF_CID/ILT_PAGE_CIDS; } cdu_ilt_start = bxe_iov_init_ilt(sc, cdu_ilt_start); #if (BXE_FIRST_VF_CID > 0) /* * If BXE_FIRST_VF_CID > 0 then the PF L2 cids precedes * those of the VFs, so start line should be reset */ cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; #endif #endif for (i = 0; i < L2_ILT_LINES(sc); i++) { ilt->lines[cdu_ilt_start + i].page = sc->context[i].vcxt; ilt->lines[cdu_ilt_start + i].page_mapping = sc->context[i].vcxt_dma.paddr; ilt->lines[cdu_ilt_start + i].size = sc->context[i].size; } ecore_ilt_init_op(sc, INITOP_SET); #if 0 if (!CONFIGURE_NIC_MODE(sc)) { bxe_init_searcher(sc); REG_WR(sc, PRS_REG_NIC_MODE, 0); BLOGD(sc, DBG_LOAD, "NIC MODE disabled\n"); } else #endif { /* Set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); BLOGD(sc, DBG_LOAD, "NIC MODE configured\n"); } if (!CHIP_IS_E1x(sc)) { uint32_t pf_conf = IGU_PF_CONF_FUNC_EN; /* Turn on a single ISR mode in IGU if driver is going to use * INT#x or MSI */ if (sc->interrupt_mode != INTR_MODE_MSIX) { pf_conf |= IGU_PF_CONF_SINGLE_ISR_EN; } /* * Timers workaround bug: function init part. * Need to wait 20msec after initializing ILT, * needed to make sure there are no requests in * one of the PXP internal queues with "old" ILT addresses */ DELAY(20000); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function * init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); /* Enable the function in IGU */ REG_WR(sc, IGU_REG_PF_CONFIGURATION, pf_conf); } sc->dmae_ready = 1; ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, func); ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_NIG, init_phase); ecore_init_block(sc, BLOCK_SRC, init_phase); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, QM_REG_PF_EN, 1); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, TSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, USEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, CSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, XSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); } ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TM, init_phase); ecore_init_block(sc, BLOCK_DORQ, init_phase); bxe_iov_init_dq(sc); ecore_init_block(sc, BLOCK_BRB1, init_phase); ecore_init_block(sc, BLOCK_PRS, init_phase); ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PBF_REG_DISABLE_PF, 0); ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 1); if (IS_MF(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); REG_WR(sc, NIG_REG_LLH0_FUNC_VLAN_ID + port*8, OVLAN(sc)); } ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* HC init per function */ if (sc->devinfo.int_block == INT_BLOCK_HC) { if (CHIP_IS_E1H(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); } else { int num_segs, sb_idx, prod_offset; REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } ecore_init_block(sc, BLOCK_IGU, init_phase); if (!CHIP_IS_E1x(sc)) { int dsb_idx = 0; /** * Producer memory: * E2 mode: address 0-135 match to the mapping memory; * 136 - PF0 default prod; 137 - PF1 default prod; * 138 - PF2 default prod; 139 - PF3 default prod; * 140 - PF0 attn prod; 141 - PF1 attn prod; * 142 - PF2 attn prod; 143 - PF3 attn prod; * 144-147 reserved. * * E1.5 mode - In backward compatible mode; * for non default SB; each even line in the memory * holds the U producer and each odd line hold * the C producer. The first 128 producers are for * NDSB (PF0 - 0-31; PF1 - 32-63 and so on). The last 20 * producers are for the DSB for each PF. * Each PF has five segments: (the order inside each * segment is PF0; PF1; PF2; PF3) - 128-131 U prods; * 132-135 C prods; 136-139 X prods; 140-143 T prods; * 144-147 attn prods; */ /* non-default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_NDSB_NUM_SEGS : IGU_NORM_NDSB_NUM_SEGS; for (sb_idx = 0; sb_idx < sc->igu_sb_cnt; sb_idx++) { prod_offset = (sc->igu_base_sb + sb_idx) * num_segs; for (i = 0; i < num_segs; i++) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i) * 4; REG_WR(sc, addr, 0); } /* send consumer update with value 0 */ bxe_ack_sb(sc, sc->igu_base_sb + sb_idx, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_igu_clear_sb(sc, sc->igu_base_sb + sb_idx); } /* default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_DSB_NUM_SEGS : IGU_NORM_DSB_NUM_SEGS; if (CHIP_IS_MODE_4_PORT(sc)) dsb_idx = SC_FUNC(sc); else dsb_idx = SC_VN(sc); prod_offset = (CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_BASE_DSB_PROD + dsb_idx : IGU_NORM_BASE_DSB_PROD + dsb_idx); /* * igu prods come in chunks of E1HVN_MAX (4) - * does not matters what is the current chip mode */ for (i = 0; i < (num_segs * E1HVN_MAX); i += E1HVN_MAX) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i)*4; REG_WR(sc, addr, 0); } /* send consumer update with 0 */ if (CHIP_INT_MODE_IS_BC(sc)) { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, CSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, XSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, TSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } else { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } bxe_igu_clear_sb(sc, sc->igu_dsb_id); /* !!! these should become driver const once rf-tool supports split-68 const */ REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_MSB, 0); REG_WR(sc, IGU_REG_SB_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_MASK_MSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_LSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_MSB, 0); } } /* Reset PCIE errors for debug */ REG_WR(sc, 0x2114, 0xffffffff); REG_WR(sc, 0x2120, 0xffffffff); if (CHIP_IS_E1x(sc)) { main_mem_size = HC_REG_MAIN_MEMORY_SIZE / 2; /*dwords*/ main_mem_base = HC_REG_MAIN_MEMORY + SC_PORT(sc) * (main_mem_size * 4); main_mem_prty_clr = HC_REG_HC_PRTY_STS_CLR; main_mem_width = 8; val = REG_RD(sc, main_mem_prty_clr); if (val) { BLOGD(sc, DBG_LOAD, "Parity errors in HC block during function init (0x%x)!\n", val); } /* Clear "false" parity errors in MSI-X table */ for (i = main_mem_base; i < main_mem_base + main_mem_size * 4; i += main_mem_width) { bxe_read_dmae(sc, i, main_mem_width / 4); bxe_write_dmae(sc, BXE_SP_MAPPING(sc, wb_data), i, main_mem_width / 4); } /* Clear HC parity attention */ REG_RD(sc, main_mem_prty_clr); } #if 1 /* Enable STORMs SP logging */ REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); #endif elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_reset(struct bxe_softc *sc) { if (!BXE_NOMCP(sc)) { BXE_PHY_LOCK(sc); elink_lfa_reset(&sc->link_params, &sc->link_vars); BXE_PHY_UNLOCK(sc); } else { if (!CHIP_REV_IS_SLOW(sc)) { BLOGW(sc, "Bootcode is missing - cannot reset link\n"); } } } static void bxe_reset_port(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; /* reset physical Link */ bxe_link_reset(sc); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); /* Do not rcv packets to BRB */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK + port*4, 0x0); /* Do not direct rcv packets that are not for MCP to the BRB */ REG_WR(sc, (port ? NIG_REG_LLH1_BRB1_NOT_MCP : NIG_REG_LLH0_BRB1_NOT_MCP), 0x0); /* Configure AEU */ REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, 0); DELAY(100000); /* Check for BRB port occupancy */ val = REG_RD(sc, BRB1_REG_PORT_NUM_OCC_BLOCKS_0 + port*4); if (val) { BLOGD(sc, DBG_LOAD, "BRB1 is not empty, %d blocks are occupied\n", val); } /* TODO: Close Doorbell port? */ } static void bxe_ilt_wr(struct bxe_softc *sc, uint32_t index, bus_addr_t addr) { int reg; uint32_t wb_write[2]; if (CHIP_IS_E1(sc)) { reg = PXP2_REG_RQ_ONCHIP_AT + index*8; } else { reg = PXP2_REG_RQ_ONCHIP_AT_B0 + index*8; } wb_write[0] = ONCHIP_ADDR1(addr); wb_write[1] = ONCHIP_ADDR2(addr); REG_WR_DMAE(sc, reg, wb_write, 2); } static void bxe_clear_func_ilt(struct bxe_softc *sc, uint32_t func) { uint32_t i, base = FUNC_ILT_BASE(func); for (i = base; i < base + ILT_PER_FUNC; i++) { bxe_ilt_wr(sc, i, 0); } } static void bxe_reset_func(struct bxe_softc *sc) { struct bxe_fastpath *fp; int port = SC_PORT(sc); int func = SC_FUNC(sc); int i; /* Disable the function in the FW */ REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(func), 0); /* FP SBs */ FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET(fp->fw_sb_id), SB_DISABLED); } #if 0 if (CNIC_LOADED(sc)) { /* CNIC SB */ REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET (bxe_cnic_fw_sb_id(sc)), SB_DISABLED); } #endif /* SP SB */ REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_STATE_OFFSET(func), SB_DISABLED); for (i = 0; i < XSTORM_SPQ_DATA_SIZE / 4; i++) { REG_WR(sc, BAR_XSTRORM_INTMEM + XSTORM_SPQ_DATA_OFFSET(func), 0); } /* Configure IGU */ if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } else { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } if (CNIC_LOADED(sc)) { /* Disable Timer scan */ REG_WR(sc, TM_REG_EN_LINEAR0_TIMER + port*4, 0); /* * Wait for at least 10ms and up to 2 second for the timers * scan to complete */ for (i = 0; i < 200; i++) { DELAY(10000); if (!REG_RD(sc, TM_REG_LIN0_SCAN_ON + port*4)) break; } } /* Clear ILT */ bxe_clear_func_ilt(sc, func); /* * Timers workaround bug for E2: if this is vnic-3, * we need to set the entire ilt range for this timers. */ if (!CHIP_IS_E1x(sc) && SC_VN(sc) == 3) { struct ilt_client_info ilt_cli; /* use dummy TM client */ memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; ecore_ilt_boundry_init_op(sc, &ilt_cli, 0, INITOP_CLEAR); } /* this assumes that reset_port() called before reset_func()*/ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } sc->dmae_ready = 0; } static int bxe_gunzip_init(struct bxe_softc *sc) { return (0); } static void bxe_gunzip_end(struct bxe_softc *sc) { return; } static int bxe_init_firmware(struct bxe_softc *sc) { if (CHIP_IS_E1(sc)) { ecore_init_e1_firmware(sc); sc->iro_array = e1_iro_arr; } else if (CHIP_IS_E1H(sc)) { ecore_init_e1h_firmware(sc); sc->iro_array = e1h_iro_arr; } else if (!CHIP_IS_E1x(sc)) { ecore_init_e2_firmware(sc); sc->iro_array = e2_iro_arr; } else { BLOGE(sc, "Unsupported chip revision\n"); return (-1); } return (0); } static void bxe_release_firmware(struct bxe_softc *sc) { /* Do nothing */ return; } static int ecore_gunzip(struct bxe_softc *sc, const uint8_t *zbuf, int len) { /* XXX : Implement... */ BLOGD(sc, DBG_LOAD, "ECORE_GUNZIP NOT IMPLEMENTED\n"); return (FALSE); } static void ecore_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { bxe_reg_wr_ind(sc, addr, val); } static void ecore_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { bxe_write_dmae_phys_len(sc, phys_addr, addr, len); } void ecore_storm_memset_struct(struct bxe_softc *sc, uint32_t addr, size_t size, uint32_t *data) { uint8_t i; for (i = 0; i < size/4; i++) { REG_WR(sc, addr + (i * 4), data[i]); } } Index: stable/9/sys/dev/cxgb/cxgb_sge.c =================================================================== --- stable/9/sys/dev/cxgb/cxgb_sge.c (revision 273911) +++ stable/9/sys/dev/cxgb/cxgb_sge.c (revision 273912) @@ -1,3726 +1,3726 @@ /************************************************************************** Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int txq_fills = 0; int multiq_tx_enable = 1; #ifdef TCP_OFFLOAD CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); #endif extern struct sysctl_oid_list sysctl__hw_cxgb_children; int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, "size of per-queue mbuf ring"); static int cxgb_tx_coalesce_force = 0; TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, &cxgb_tx_coalesce_force, 0, "coalesce small packets into a single work request regardless of ring state"); #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", &cxgb_tx_coalesce_enable_start); SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, &cxgb_tx_coalesce_enable_start, 0, "coalesce enable threshold"); static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, &cxgb_tx_coalesce_enable_stop, 0, "coalesce disable threshold"); static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, &cxgb_tx_reclaim_threshold, 0, "tx cleaning minimum threshold"); /* * XXX don't re-enable this until TOE stops assuming * we have an m_ext */ static int recycle_enable = 0; extern int cxgb_use_16k_clusters; extern int nmbjumbop; extern int nmbjumbo9; extern int nmbjumbo16; #define USE_GTS 0 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_RX_DROP_THRES 16 #define SGE_RX_COPY_THRES 128 /* * Period of the Tx buffer reclaim timer. This timer does not need to run * frequently as Tx buffers are usually reclaimed by new Tx packets. */ #define TX_RECLAIM_PERIOD (hz >> 1) /* * Values for sge_txq.flags */ enum { TXQ_RUNNING = 1 << 0, /* fetch engine is running */ TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ }; struct tx_desc { uint64_t flit[TX_DESC_FLITS]; } __packed; struct rx_desc { uint32_t addr_lo; uint32_t len_gen; uint32_t gen2; uint32_t addr_hi; } __packed; struct rsp_desc { /* response queue descriptor */ struct rss_header rss_hdr; uint32_t flags; uint32_t len_cq; uint8_t imm_data[47]; uint8_t intr_gen; } __packed; #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) struct tx_sw_desc { /* SW state per Tx descriptor */ struct mbuf *m; bus_dmamap_t map; int flags; }; struct rx_sw_desc { /* SW state per Rx descriptor */ caddr_t rxsd_cl; struct mbuf *m; bus_dmamap_t map; int flags; }; struct txq_state { unsigned int compl; unsigned int gen; unsigned int pidx; }; struct refill_fl_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is * * desc = 1 + (flits - 2) / (WR_FLITS - 1). * * HW allows up to 4 descriptors to be combined into a WR. */ static uint8_t flit_desc_map[] = { 0, #if SGE_NUM_GENBITS == 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 #elif SGE_NUM_GENBITS == 2 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, #else # error "SGE_NUM_GENBITS must be 1 or 2" #endif }; #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_NEEDS_ENQUEUE(qs) \ drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) #define TXQ_RING_DEQUEUE(qs) \ drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) int cxgb_debug = 0; static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static void sge_txq_reclaim_handler(void *arg, int ncount); static void cxgb_start_locked(struct sge_qset *qs); /* * XXX need to cope with bursty scheduling by looking at a wider * window than we are now for determining the need for coalescing * */ static __inline uint64_t check_pkt_coalesce(struct sge_qset *qs) { struct adapter *sc; struct sge_txq *txq; uint8_t *fill; if (__predict_false(cxgb_tx_coalesce_force)) return (1); txq = &qs->txq[TXQ_ETH]; sc = qs->port->adapter; fill = &sc->tunq_fill[qs->idx]; if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; /* * if the hardware transmit queue is more than 1/8 full * we mark it as coalescing - we drop back from coalescing * when we go below 1/32 full and there are no packets enqueued, * this provides us with some degree of hysteresis */ if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) *fill = 0; else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) *fill = 1; return (sc->tunq_coalesce); } #ifdef __LP64__ static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { uint64_t wr_hilo; #if _BYTE_ORDER == _LITTLE_ENDIAN wr_hilo = wr_hi; wr_hilo |= (((uint64_t)wr_lo)<<32); #else wr_hilo = wr_lo; wr_hilo |= (((uint64_t)wr_hi)<<32); #endif wrp->wrh_hilo = wr_hilo; } #else static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { wrp->wrh_hi = wr_hi; wmb(); wrp->wrh_lo = wr_lo; } #endif struct coalesce_info { int count; int nbytes; }; static int coalesce_check(struct mbuf *m, void *arg) { struct coalesce_info *ci = arg; int *count = &ci->count; int *nbytes = &ci->nbytes; if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && (*count < 7) && (m->m_next == NULL))) { *count += 1; *nbytes += m->m_len; return (1); } return (0); } static struct mbuf * cxgb_dequeue(struct sge_qset *qs) { struct mbuf *m, *m_head, *m_tail; struct coalesce_info ci; if (check_pkt_coalesce(qs) == 0) return TXQ_RING_DEQUEUE(qs); m_head = m_tail = NULL; ci.count = ci.nbytes = 0; do { m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); if (m_head == NULL) { m_tail = m_head = m; } else if (m != NULL) { m_tail->m_nextpkt = m; m_tail = m; } } while (m != NULL); if (ci.count > 7) panic("trying to coalesce %d packets in to one WR", ci.count); return (m_head); } /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter * @q: the Tx queue to reclaim completed descriptors from * * Reclaims Tx descriptors that the SGE has indicated it has processed, * and frees the associated buffers if possible. Called with the Tx * queue's lock held. */ static __inline int reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) { struct sge_txq *q = &qs->txq[queue]; int reclaim = desc_reclaimable(q); if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; if (reclaim < reclaim_min) return (0); mtx_assert(&qs->lock, MA_OWNED); if (reclaim > 0) { t3_free_tx_desc(qs, reclaim, queue); q->cleaned += reclaim; q->in_use -= reclaim; } if (isset(&qs->txq_stopped, TXQ_ETH)) clrbit(&qs->txq_stopped, TXQ_ETH); return (reclaim); } /** * should_restart_tx - are there enough resources to restart a Tx queue? * @q: the Tx queue * * Checks if there are enough descriptors to restart a suspended Tx queue. */ static __inline int should_restart_tx(const struct sge_txq *q) { unsigned int r = q->processed - q->cleaned; return q->in_use - r < (q->size >> 1); } /** * t3_sge_init - initialize SGE * @adap: the adapter * @p: the SGE parameters * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queue sets here, instead the driver * top-level must request those individually. We also do not enable DMA * here, that should be done after the queues have been set up. */ void t3_sge_init(adapter_t *adap, struct sge_params *p) { u_int ctrl, ups; ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; #if SGE_NUM_GENBITS == 1 ctrl |= F_EGRGENCTRL; #endif if (adap->params.rev > 0) { if (!(adap->flags & (USING_MSIX | USING_MSI))) ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; } t3_write_reg(adap, A_SG_CONTROL, ctrl); t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | V_LORCQDRBTHRSH(512)); t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | V_TIMEOUT(200 * core_ticks_per_usec(adap))); t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, adap->params.rev < T3_REV_C ? 1000 : 500); t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); } /** * sgl_len - calculates the size of an SGL of the given capacity * @n: the number of SGL entries * * Calculates the number of flits needed for a scatter/gather list that * can hold the given number of entries. */ static __inline unsigned int sgl_len(unsigned int n) { return ((3 * n) / 2 + (n & 1)); } /** * get_imm_packet - return the next ingress packet buffer from a response * @resp: the response descriptor containing the packet data * * Return a packet containing the immediate data of the given response. */ static int get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) { if (resp->rss_hdr.opcode == CPL_RX_DATA) { const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else m->m_len = IMMED_PKT_SIZE; m->m_ext.ext_buf = NULL; m->m_ext.ext_type = 0; memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); return (0); } static __inline u_int flits_to_desc(u_int n) { return (flit_desc_map[n]); } #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ F_HIRCQPARITYERROR) #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ F_RSPQDISABLED) /** * t3_sge_err_intr_handler - SGE async event interrupt handler * @adapter: the adapter * * Interrupt handler for SGE asynchronous (non-data) events. */ void t3_sge_err_intr_handler(adapter_t *adapter) { unsigned int v, status; status = t3_read_reg(adapter, A_SG_INT_CAUSE); if (status & SGE_PARERR) CH_ALERT(adapter, "SGE parity error (0x%x)\n", status & SGE_PARERR); if (status & SGE_FRAMINGERR) CH_ALERT(adapter, "SGE framing error (0x%x)\n", status & SGE_FRAMINGERR); if (status & F_RSPQCREDITOVERFOW) CH_ALERT(adapter, "SGE response queue credit overflow\n"); if (status & F_RSPQDISABLED) { v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); CH_ALERT(adapter, "packet delivered to disabled response queue (0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff); } t3_write_reg(adapter, A_SG_INT_CAUSE, status); if (status & SGE_FATALERR) t3_fatal_err(adapter); } void t3_sge_prep(adapter_t *adap, struct sge_params *p) { int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); nqsets *= adap->params.nports; fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); while (!powerof2(fl_q_size)) fl_q_size--; use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : is_offload(adap); #if __FreeBSD_version >= 700111 if (use_16k) { jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM16BYTES; } else { jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM9BYTES; } #else jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUMPAGESIZE; #endif while (!powerof2(jumbo_q_size)) jumbo_q_size--; if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) device_printf(adap->dev, "Insufficient clusters and/or jumbo buffers.\n"); p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); for (i = 0; i < SGE_QSETS; ++i) { struct qset_params *q = p->qset + i; if (adap->params.nports > 2) { q->coalesce_usecs = 50; } else { #ifdef INVARIANTS q->coalesce_usecs = 10; #else q->coalesce_usecs = 5; #endif } q->polling = 0; q->rspq_size = RSPQ_Q_SIZE; q->fl_size = fl_q_size; q->jumbo_size = jumbo_q_size; q->jumbo_buf_size = jumbo_buf_size; q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; q->cong_thres = 0; } } int t3_sge_alloc(adapter_t *sc) { /* The parent tag. */ if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ BUS_SPACE_UNRESTRICTED, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock, lockarg */ &sc->parent_dmat)) { device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); return (ENOMEM); } /* * DMA tag for normal sized RX frames */ if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); return (ENOMEM); } /* * DMA tag for jumbo sized RX frames. */ if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); return (ENOMEM); } /* * DMA tag for TX frames. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->tx_dmat)) { device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); return (ENOMEM); } return (0); } int t3_sge_free(struct adapter * sc) { if (sc->tx_dmat != NULL) bus_dma_tag_destroy(sc->tx_dmat); if (sc->rx_jumbo_dmat != NULL) bus_dma_tag_destroy(sc->rx_jumbo_dmat); if (sc->rx_dmat != NULL) bus_dma_tag_destroy(sc->rx_dmat); if (sc->parent_dmat != NULL) bus_dma_tag_destroy(sc->parent_dmat); return (0); } void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); qs->rspq.polling = 0 /* p->polling */; } #if !defined(__i386__) && !defined(__amd64__) static void refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct refill_fl_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #endif /** * refill_fl - refill an SGE free-buffer list * @sc: the controller softc * @q: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an SGE free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void refill_fl(adapter_t *sc, struct sge_fl *q, int n) { struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; struct mbuf *m; caddr_t cl; int err; cb_arg.error = 0; while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. */ if (q->zone == zone_pack) { if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) break; cl = m->m_ext.ext_buf; } else { if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) break; if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { uma_zfree(q->zone, cl); break; } } if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); uma_zfree(q->zone, cl); goto done; } sd->flags |= RX_SW_DESC_MAP_CREATED; } #if !defined(__i386__) && !defined(__amd64__) err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { if (q->zone == zone_pack) uma_zfree(q->zone, cl); m_free(m); goto done; } #else cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); #endif sd->flags |= RX_SW_DESC_INUSE; sd->rxsd_cl = cl; sd->m = m; d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); d->gen2 = htobe32(V_FLD_GEN2(q->gen)); d++; sd++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; sd = q->sdesc; d = q->desc; } q->credits++; q->db_pending++; } done: if (q->db_pending >= 32) { q->db_pending = 0; t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } } /** * free_rx_bufs - free the Rx buffers on an SGE free list * @sc: the controle softc * @q: the SGE free list to clean up * * Release the buffers on an SGE free-buffer Rx queue. HW fetching from * this queue should be stopped before calling this function. */ static void free_rx_bufs(adapter_t *sc, struct sge_fl *q) { u_int cidx = q->cidx; while (q->credits--) { struct rx_sw_desc *d = &q->sdesc[cidx]; if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); if (q->zone == zone_pack) { m_init(d->m, zone_pack, MCLBYTES, M_NOWAIT, MT_DATA, M_EXT); uma_zfree(zone_pack, d->m); } else { m_init(d->m, zone_mbuf, MLEN, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, d->m); uma_zfree(q->zone, d->rxsd_cl); } } d->rxsd_cl = NULL; d->m = NULL; if (++cidx == q->size) cidx = 0; } } static __inline void __refill_fl(adapter_t *adap, struct sge_fl *fl) { refill_fl(adap, fl, min(16U, fl->size - fl->credits)); } static __inline void __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) { uint32_t reclaimable = fl->size - fl->credits; if (reclaimable > 0) refill_fl(adap, fl, min(max, reclaimable)); } /** * recycle_rx_buf - recycle a receive buffer * @adapter: the adapter * @q: the SGE free list * @idx: index of buffer to recycle * * Recycles the specified buffer on the given free list by adding it at * the next available slot on the list. */ static void recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) { struct rx_desc *from = &q->desc[idx]; struct rx_desc *to = &q->desc[q->pidx]; q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise wmb(); /* necessary ? */ to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; } t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } static void alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { uint32_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) { size_t len = nelem * elem_size; void *s = NULL; void *p = NULL; int err; if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor tag\n"); return (ENOMEM); } if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, map)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor memory\n"); return (ENOMEM); } bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); bzero(p, len); *(void **)desc = p; if (sw_size) { len = nelem * sw_size; s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); *(void **)sdesc = s; } if (parent_entry_tag == NULL) return (0); if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, entry_tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); return (ENOMEM); } return (0); } static void sge_slow_intr_handler(void *arg, int ncount) { adapter_t *sc = arg; t3_slow_intr_handler(sc); t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); } /** * sge_timer_cb - perform periodic maintenance of an SGE qset * @data: the SGE queue set to maintain * * Runs periodically from a timer to perform maintenance of an SGE queue * set. It performs two tasks: * * a) Cleans up any completed Tx descriptors that may still be pending. * Normal descriptor cleanup happens when new packets are added to a Tx * queue so this timer is relatively infrequent and does any cleanup only * if the Tx queue has not seen any new packets in a while. We make a * best effort attempt to reclaim descriptors, in that we don't wait * around if we cannot get a queue's lock (which most likely is because * someone else is queueing new packets and so will also handle the clean * up). Since control queues use immediate data exclusively we don't * bother cleaning them up here. * * b) Replenishes Rx queues that have run out due to memory shortage. * Normally new Rx buffers are added when existing ones are consumed but * when out of memory a queue can become empty. We try to add only a few * buffers here, the queue will be replenished fully as these new buffers * are used up if memory shortage has subsided. * * c) Return coalesced response queue credits in case a response queue is * starved. * * d) Ring doorbells for T304 tunnel queues since we have seen doorbell * fifo overflows and the FW doesn't implement any recovery scheme yet. */ static void sge_timer_cb(void *arg) { adapter_t *sc = arg; if ((sc->flags & USING_MSIX) == 0) { struct port_info *pi; struct sge_qset *qs; struct sge_txq *txq; int i, j; int reclaim_ofl, refill_rx; if (sc->open_device_map == 0) return; for (i = 0; i < sc->params.nports; i++) { pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++) { qs = &sc->sge.qs[pi->first_qset + j]; txq = &qs->txq[0]; reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || (qs->fl[1].credits < qs->fl[1].size)); if (reclaim_ofl || refill_rx) { taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); break; } } } } if (sc->params.nports > 2) { int i; for_each_port(sc, i) { struct port_info *pi = &sc->port[i]; t3_write_reg(sc, A_SG_KDOORBELL, F_SELEGRCNTX | (FW_TUNNEL_SGEEC_START + pi->first_qset)); } } if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && sc->open_device_map != 0) callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); } /* * This is meant to be a catch-all function to keep sge state private * to sge.c * */ int t3_sge_init_adapter(adapter_t *sc) { callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); return (0); } int t3_sge_reset_adapter(adapter_t *sc) { callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); return (0); } int t3_sge_init_port(struct port_info *pi) { TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); return (0); } /** * refill_rspq - replenish an SGE response queue * @adapter: the adapter * @q: the response queue to replenish * @credits: how many new responses to make available * * Replenishes a response queue by making the supplied number of responses * available to HW. */ static __inline void refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) { /* mbufs are allocated on demand when a rspq entry is processed. */ t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } static void sge_txq_reclaim_handler(void *arg, int ncount) { struct sge_qset *qs = arg; int i; for (i = 0; i < 3; i++) reclaim_completed_tx(qs, 16, i); } static void sge_timer_reclaim(void *arg, int ncount) { struct port_info *pi = arg; int i, nqsets = pi->nqsets; adapter_t *sc = pi->adapter; struct sge_qset *qs; struct mtx *lock; KASSERT((sc->flags & USING_MSIX) == 0, ("can't call timer reclaim for msi-x")); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[pi->first_qset + i]; reclaim_completed_tx(qs, 16, TXQ_OFLD); lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; if (mtx_trylock(lock)) { /* XXX currently assume that we are *NOT* polling */ uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size - 16) __refill_fl(sc, &qs->fl[0]); if (qs->fl[1].credits < qs->fl[1].size - 16) __refill_fl(sc, &qs->fl[1]); if (status & (1 << qs->rspq.cntxt_id)) { if (qs->rspq.credits) { refill_rspq(sc, &qs->rspq, 1); qs->rspq.credits--; t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1 << qs->rspq.cntxt_id); } } mtx_unlock(lock); } } } /** * init_qset_cntxt - initialize an SGE queue set context info * @qs: the queue set * @id: the queue set id * * Initializes the TIDs and context ids for the queues of a queue set. */ static void init_qset_cntxt(struct sge_qset *qs, u_int id) { qs->rspq.cntxt_id = id; qs->fl[0].cntxt_id = 2 * id; qs->fl[1].cntxt_id = 2 * id + 1; qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; mbufq_init(&qs->txq[TXQ_ETH].sendq); mbufq_init(&qs->txq[TXQ_OFLD].sendq); mbufq_init(&qs->txq[TXQ_CTRL].sendq); } static void txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) { txq->in_use += ndesc; /* * XXX we don't handle stopping of queue * presumably start handles this when we bump against the end */ txqs->gen = txq->gen; txq->unacked += ndesc; txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); txq->unacked &= 31; txqs->pidx = txq->pidx; txq->pidx += ndesc; #ifdef INVARIANTS if (((txqs->pidx > txq->cidx) && (txq->pidx < txqs->pidx) && (txq->pidx >= txq->cidx)) || ((txqs->pidx < txq->cidx) && (txq->pidx >= txq-> cidx)) || ((txqs->pidx < txq->cidx) && (txq->cidx < txqs->pidx))) panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", txqs->pidx, txq->pidx, txq->cidx); #endif if (txq->pidx >= txq->size) { txq->pidx -= txq->size; txq->gen ^= 1; } } /** * calc_tx_descs - calculate the number of Tx descriptors for a packet * @m: the packet mbufs * @nsegs: the number of segments * * Returns the number of Tx descriptors needed for the given Ethernet * packet. Ethernet packets require addition of WR and CPL headers. */ static __inline unsigned int calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; if (m->m_pkthdr.len <= PIO_LEN) return 1; flits = sgl_len(nsegs) + 2; if (m->m_pkthdr.csum_flags & CSUM_TSO) flits++; return flits_to_desc(flits); } /** * make_sgl - populate a scatter/gather list for a packet * @sgp: the SGL to populate * @segs: the packet dma segments * @nsegs: the number of segments * * Generates a scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ static __inline void make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) { int i, idx; for (idx = 0, i = 0; i < nsegs; i++) { /* * firmware doesn't like empty segments */ if (segs[i].ds_len == 0) continue; if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ds_len); sgp->addr[idx] = htobe64(segs[i].ds_addr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } } /** * check_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * * Ring the doorbell if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. * * When GTS is disabled we unconditionally ring the doorbell. */ static __inline void check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) { #if USE_GTS clear_bit(TXQ_LAST_PKT_DB, &q->flags); if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { set_bit(TXQ_LAST_PKT_DB, &q->flags); #ifdef T3_TRACE T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", q->cntxt_id); #endif t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } #else if (mustring || ++q->db_pending >= 32) { wmb(); /* write descriptors before telling HW */ t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); q->db_pending = 0; } #endif } static __inline void wr_gen2(struct tx_desc *d, unsigned int gen) { #if SGE_NUM_GENBITS == 2 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); #endif } /** * write_wr_hdr_sgl - write a WR header and, optionally, SGL * @ndesc: number of Tx descriptors spanned by the SGL * @txd: first Tx descriptor to be written * @txqs: txq state (generation and producer index) * @txq: the SGE Tx queue * @sgl: the SGL * @flits: number of flits to the start of the SGL in the first descriptor * @sgl_flits: the SGL size in flits * @wr_hi: top 32 bits of WR header based on WR type (big endian) * @wr_lo: low 32 bits of WR header based on WR type (big endian) * * Write a work request header and an associated SGL. If the SGL is * small enough to fit into one Tx descriptor it has already been written * and we just need to write the WR header. Otherwise we distribute the * SGL across the number of descriptors it spans. */ static void write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) { struct work_request_hdr *wrp = (struct work_request_hdr *)txd; struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; if (__predict_true(ndesc == 1)) { set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi, htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | wr_lo); wr_gen2(txd, txqs->gen); } else { unsigned int ogen = txqs->gen; const uint64_t *fp = (const uint64_t *)sgl; struct work_request_hdr *wp = wrp; wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; while (sgl_flits) { unsigned int avail = WR_FLITS - flits; if (avail > sgl_flits) avail = sgl_flits; memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); sgl_flits -= avail; ndesc--; if (!sgl_flits) break; fp += avail; txd++; txsd++; if (++txqs->pidx == txq->size) { txqs->pidx = 0; txqs->gen ^= 1; txd = txq->desc; txsd = txq->sdesc; } /* * when the head of the mbuf chain * is freed all clusters will be freed * with it */ wrp = (struct work_request_hdr *)txd; wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | V_WR_SGLSFLT(1)) | wr_hi; wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, sgl_flits + 1)) | V_WR_GEN(txqs->gen)) | wr_lo; wr_gen2(txd, txqs->gen); flits = 1; } wrp->wrh_hi |= htonl(F_WR_EOP); wmb(); wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; wr_gen2((struct tx_desc *)wp, ogen); } } /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) #define GET_VTAG(cntrl, m) \ do { \ if ((m)->m_flags & M_VLANTAG) \ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ } while (0) static int t3_encap(struct sge_qset *qs, struct mbuf **m) { adapter_t *sc; struct mbuf *m0; struct sge_txq *txq; struct txq_state txqs; struct port_info *pi; unsigned int ndesc, flits, cntrl, mlen; int err, nsegs, tso_info = 0; struct work_request_hdr *wrp; struct tx_sw_desc *txsd; struct sg_ent *sgp, *sgl; uint32_t wr_hi, wr_lo, sgl_flits; bus_dma_segment_t segs[TX_MAX_SEGS]; struct tx_desc *txd; pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; txd = &txq->desc[txq->pidx]; txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; prefetch(txd); m0 = *m; mtx_assert(&qs->lock, MA_OWNED); cntrl = V_TXPKT_INTF(pi->txpkt_intf); KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); if (m0->m_nextpkt == NULL && m0->m_next != NULL && m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); if (m0->m_nextpkt != NULL) { busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); ndesc = 1; mlen = 0; } else { if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, &m0, segs, &nsegs))) { if (cxgb_debug) printf("failed ... err=%d\n", err); return (err); } mlen = m0->m_pkthdr.len; ndesc = calc_tx_descs(m0, nsegs); } txq_prod(txq, ndesc, &txqs); KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); txsd->m = m0; if (m0->m_nextpkt != NULL) { struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; int i, fidx; if (nsegs > 7) panic("trying to coalesce %d packets in to one WR", nsegs); txq->txq_coalesced += nsegs; wrp = (struct work_request_hdr *)txd; flits = nsegs*2 + 1; for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { struct cpl_tx_pkt_batch_entry *cbe; uint64_t flit; uint32_t *hflit = (uint32_t *)&flit; int cflags = m0->m_pkthdr.csum_flags; cntrl = V_TXPKT_INTF(pi->txpkt_intf); GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(cflags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; hflit[0] = htonl(cntrl); hflit[1] = htonl(segs[i].ds_len | 0x80000000); flit |= htobe64(1 << 24); cbe = &cpl_batch->pkt_entry[i]; cbe->cntrl = hflit[0]; cbe->len = hflit[1]; cbe->addr = htobe64(segs[i].ds_addr); } wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); set_wr_hdr(wrp, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); return (0); } else if (tso_info) { uint16_t eth_type; struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; struct ether_header *eh; void *l3hdr; struct tcphdr *tcp; txd->flit[2] = 0; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); hdr->len = htonl(mlen | 0x80000000); if (__predict_false(mlen < TCPPKTHDRSIZE)) { printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); panic("tx tso packet too small"); } /* Make sure that ether, ip, tcp headers are all in m0 */ if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { m0 = m_pullup(m0, TCPPKTHDRSIZE); if (__predict_false(m0 == NULL)) { /* XXX panic probably an overreaction */ panic("couldn't fit header into mbuf"); } } eh = mtod(m0, struct ether_header *); eth_type = eh->ether_type; if (eth_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = (void *)eh; tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); l3hdr = evh + 1; eth_type = evh->evl_proto; } else { tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); l3hdr = eh + 1; } if (eth_type == htons(ETHERTYPE_IP)) { struct ip *ip = l3hdr; tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); tcp = (struct tcphdr *)(ip + 1); } else if (eth_type == htons(ETHERTYPE_IPV6)) { struct ip6_hdr *ip6 = l3hdr; KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO with ip6_nxt %d", __func__, ip6->ip6_nxt)); tso_info |= F_LSO_IPV6; tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); tcp = (struct tcphdr *)(ip6 + 1); } else panic("%s: CSUM_TSO but neither ip nor ip6", __func__); tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); if (__predict_false(mlen <= PIO_LEN)) { /* * pkt not undersized but fits in PIO_LEN * Indicates a TSO bug at the higher levels. */ txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); flits = (mlen + 7) / 8 + 3; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&hdr->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 3; } else { struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; cpl->cntrl = htonl(cntrl); cpl->len = htonl(mlen | 0x80000000); if (mlen <= PIO_LEN) { txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); flits = (mlen + 7) / 8 + 2; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&cpl->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 2; } wrp = (struct work_request_hdr *)txd; sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); ETHER_BPF_MTAP(pi->ifp, m0); KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(sc, txq, 0); return (0); } void cxgb_tx_watchdog(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs)) qs->coalescing = 0; else if (qs->coalescing == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_FLUSHING; cxgb_start_locked(qs); qs->qs_flags &= ~QS_FLUSHING; TXQ_UNLOCK(qs); } if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } static void cxgb_tx_timeout(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_TIMEOUT; cxgb_start_locked(qs); qs->qs_flags &= ~QS_TIMEOUT; TXQ_UNLOCK(qs); } } static void cxgb_start_locked(struct sge_qset *qs) { struct mbuf *m_head = NULL; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct port_info *pi = qs->port; struct ifnet *ifp = pi->ifp; if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) reclaim_completed_tx(qs, 0, TXQ_ETH); if (!pi->link_config.link_ok) { TXQ_RING_FLUSH(qs); return; } TXQ_LOCK_ASSERT(qs); while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && pi->link_config.link_ok) { reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (txq->size - txq->in_use <= TX_MAX_DESC) break; if ((m_head = cxgb_dequeue(qs)) == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (t3_encap(qs, &m_head) || m_head == NULL) break; m_head = NULL; } if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && pi->link_config.link_ok) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); if (m_head != NULL) m_freem(m_head); } static int cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) { struct port_info *pi = qs->port; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct buf_ring *br = txq->txq_mr; int error, avail; avail = txq->size - txq->in_use; TXQ_LOCK_ASSERT(qs); /* * We can only do a direct transmit if the following are true: * - we aren't coalescing (ring < 3/4 full) * - the link is up -- checked in caller * - there are no packets enqueued already * - there is space in hardware transmit queue */ if (check_pkt_coalesce(qs) == 0 && !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { if (t3_encap(qs, &m)) { if (m != NULL && (error = drbr_enqueue(ifp, br, m)) != 0) return (error); } else { if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); /* * We've bypassed the buf ring so we need to update * the stats directly */ txq->txq_direct_packets++; txq->txq_direct_bytes += m->m_pkthdr.len; } } else if ((error = drbr_enqueue(ifp, br, m)) != 0) return (error); reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) cxgb_start_locked(qs); else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); return (0); } int cxgb_transmit(struct ifnet *ifp, struct mbuf *m) { struct sge_qset *qs; struct port_info *pi = ifp->if_softc; int error, qidx = pi->first_qset; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||(!pi->link_config.link_ok)) { m_freem(m); return (0); } if (m->m_flags & M_FLOWID) qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; qs = &pi->adapter->sge.qs[qidx]; if (TXQ_TRYLOCK(qs)) { /* XXX running */ error = cxgb_transmit_locked(ifp, qs, m); TXQ_UNLOCK(qs); } else error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); return (error); } void cxgb_qflush(struct ifnet *ifp) { /* * flush any enqueued mbufs in the buf_rings * and in the transmit queues * no-op for now */ return; } /** * write_imm - write a packet into a Tx descriptor as immediate data * @d: the Tx descriptor to write * @m: the packet * @len: the length of packet data to write as immediate data * @gen: the generation bit value to write * * Writes a packet as immediate data into a Tx descriptor. The packet * contains a work request at its beginning. We must write the packet * carefully so the SGE doesn't read accidentally before it's written in * its entirety. */ static __inline void write_imm(struct tx_desc *d, caddr_t src, unsigned int len, unsigned int gen) { struct work_request_hdr *from = (struct work_request_hdr *)src; struct work_request_hdr *to = (struct work_request_hdr *)d; uint32_t wr_hi, wr_lo; KASSERT(len <= WR_LEN && len >= sizeof(*from), ("%s: invalid len %d", __func__, len)); memcpy(&to[1], &from[1], len - sizeof(*from)); wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); set_wr_hdr(to, wr_hi, wr_lo); wmb(); wr_gen2(d, gen); } /** * check_desc_avail - check descriptor availability on a send queue * @adap: the adapter * @q: the TX queue * @m: the packet needing the descriptors * @ndesc: the number of Tx descriptors needed * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) * * Checks if the requested number of Tx descriptors is available on an * SGE send queue. If the queue is already suspended or not enough * descriptors are available the packet is queued for later transmission. * Must be called with the Tx queue locked. * * Returns 0 if enough descriptors are available, 1 if there aren't * enough descriptors and the packet has been queued, and 2 if the caller * needs to retry because there weren't enough descriptors at the * beginning of the call but some freed up in the mean time. */ static __inline int check_desc_avail(adapter_t *adap, struct sge_txq *q, struct mbuf *m, unsigned int ndesc, unsigned int qid) { /* * XXX We currently only use this for checking the control queue * the control queue is only used for binding qsets which happens * at init time so we are guaranteed enough descriptors */ if (__predict_false(!mbufq_empty(&q->sendq))) { addq_exit: mbufq_tail(&q->sendq, m); return 1; } if (__predict_false(q->size - q->in_use < ndesc)) { struct sge_qset *qs = txq_to_qset(q, qid); setbit(&qs->txq_stopped, qid); if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; q->stops++; goto addq_exit; } return 0; } /** * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * * This is a variant of reclaim_completed_tx() that is used for Tx queues * that send only immediate data (presently just the control queues) and * thus do not have any mbufs */ static __inline void reclaim_completed_tx_imm(struct sge_txq *q) { unsigned int reclaim = q->processed - q->cleaned; q->in_use -= reclaim; q->cleaned += reclaim; } /** * ctrl_xmit - send a packet through an SGE control Tx queue * @adap: the adapter * @q: the control queue * @m: the packet * * Send a packet through an SGE control Tx queue. Packets sent through * a control queue must fit entirely as immediate data in a single Tx * descriptor and have no page fragments. */ static int ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); struct sge_txq *q = &qs->txq[TXQ_CTRL]; KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); wrp->wrh_lo = htonl(V_WR_TID(q->token)); TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (ENOSPC); } goto again; } write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); q->in_use++; if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); m_free(m); return (0); } /** * restart_ctrlq - restart a suspended control queue * @qs: the queue set cotaining the control queue * * Resumes transmission on a suspended Tx control queue. */ static void restart_ctrlq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; adapter_t *adap = qs->port->adapter; TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); while (q->in_use < q->size && (m = mbufq_dequeue(&q->sendq)) != NULL) { write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); m_free(m); if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } q->in_use++; } if (!mbufq_empty(&q->sendq)) { setbit(&qs->txq_stopped, TXQ_CTRL); if (should_restart_tx(q) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) goto again; q->stops++; } TXQ_UNLOCK(qs); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /* * Send a management message through control queue 0 */ int t3_mgmt_tx(struct adapter *adap, struct mbuf *m) { return ctrl_xmit(adap, &adap->sge.qs[0], m); } /** * free_qset - free the resources of an SGE queue set * @sc: the controller owning the queue set * @q: the queue set * * Release the HW and SW resources associated with an SGE queue set, such * as HW contexts, packet buffers, and descriptor rings. Traffic to the * queue set must be quiesced prior to calling this. */ static void t3_free_qset(adapter_t *sc, struct sge_qset *q) { int i; reclaim_completed_tx(q, 0, TXQ_ETH); if (q->txq[TXQ_ETH].txq_mr != NULL) buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); if (q->txq[TXQ_ETH].txq_ifq != NULL) { ifq_delete(q->txq[TXQ_ETH].txq_ifq); free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { if (q->fl[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_fl(sc, q->fl[i].cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, q->fl[i].desc_map); bus_dma_tag_destroy(q->fl[i].desc_tag); bus_dma_tag_destroy(q->fl[i].entry_tag); } if (q->fl[i].sdesc) { free_rx_bufs(sc, &q->fl[i]); free(q->fl[i].sdesc, M_DEVBUF); } } mtx_unlock(&q->lock); MTX_DESTROY(&q->lock); for (i = 0; i < SGE_TXQ_PER_SET; i++) { if (q->txq[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->txq[i].desc_tag, q->txq[i].desc_map); bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, q->txq[i].desc_map); bus_dma_tag_destroy(q->txq[i].desc_tag); bus_dma_tag_destroy(q->txq[i].entry_tag); } if (q->txq[i].sdesc) { free(q->txq[i].sdesc, M_DEVBUF); } } if (q->rspq.desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, q->rspq.desc_map); bus_dma_tag_destroy(q->rspq.desc_tag); MTX_DESTROY(&q->rspq.lock); } #if defined(INET6) || defined(INET) tcp_lro_free(&q->lro.ctrl); #endif bzero(q, sizeof(*q)); } /** * t3_free_sge_resources - free SGE resources * @sc: the adapter softc * * Frees resources used by the SGE queue sets. */ void t3_free_sge_resources(adapter_t *sc, int nqsets) { int i; for (i = 0; i < nqsets; ++i) { TXQ_LOCK(&sc->sge.qs[i]); t3_free_qset(sc, &sc->sge.qs[i]); } } /** * t3_sge_start - enable SGE * @sc: the controller softc * * Enables the SGE for DMAs. This is the last step in starting packet * transfers. */ void t3_sge_start(adapter_t *sc) { t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); } /** * t3_sge_stop - disable SGE operation * @sc: the adapter * * Disables the DMA engine. This can be called in emeregencies (e.g., * from error interrupts) or from normal process context. In the latter * case it also disables any pending queue restart tasklets. Note that * if it is called in interrupt context it cannot disable the restart * tasklets as it cannot wait, however the tasklets will have no effect * since the doorbells are disabled and the driver will call this again * later from process context, at which time the tasklets will be stopped * if they are still running. */ void t3_sge_stop(adapter_t *sc) { int i, nqsets; t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); if (sc->tq == NULL) return; for (nqsets = i = 0; i < (sc)->params.nports; i++) nqsets += sc->port[i].nqsets; #ifdef notyet /* * * XXX */ for (i = 0; i < nqsets; ++i) { struct sge_qset *qs = &sc->sge.qs[i]; taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } #endif } /** * t3_free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter * @q: the Tx queue to reclaim descriptors from * @reclaimable: the number of descriptors to reclaim * @m_vec_size: maximum number of buffers to reclaim * @desc_reclaimed: returns the number of descriptors reclaimed * * Reclaims Tx descriptors from an SGE Tx queue and frees the associated * Tx buffers. Called with the Tx queue lock held. * * Returns number of buffers of reclaimed */ void t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) { struct tx_sw_desc *txsd; unsigned int cidx, mask; struct sge_txq *q = &qs->txq[queue]; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); #endif cidx = q->cidx; mask = q->size - 1; txsd = &q->sdesc[cidx]; mtx_assert(&qs->lock, MA_OWNED); while (reclaimable--) { prefetch(q->sdesc[(cidx + 1) & mask].m); prefetch(q->sdesc[(cidx + 2) & mask].m); if (txsd->m != NULL) { if (txsd->flags & TX_SW_DESC_MAPPED) { bus_dmamap_unload(q->entry_tag, txsd->map); txsd->flags &= ~TX_SW_DESC_MAPPED; } m_freem_list(txsd->m); txsd->m = NULL; } else q->txq_skipped++; ++txsd; if (++cidx == q->size) { cidx = 0; txsd = q->sdesc; } } q->cidx = cidx; } /** * is_new_response - check if a response is newly written * @r: the response descriptor * @q: the response queue * * Returns true if a response descriptor contains a yet unprocessed * response. */ static __inline int is_new_response(const struct rsp_desc *r, const struct sge_rspq *q) { return (r->intr_gen & F_RSPD_GEN2) == q->gen; } #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ #define NOMEM_INTR_DELAY 2500 #ifdef TCP_OFFLOAD /** * write_ofld_wr - write an offload work request * @adap: the adapter * @m: the packet to send * @q: the Tx queue * @pidx: index of the first Tx descriptor to write * @gen: the generation value to use * @ndesc: number of descriptors the packet will occupy * * Write an offload work request to send the supplied packet. The packet * data already carry the work request with most fields populated. */ static void write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, unsigned int pidx, unsigned int gen, unsigned int ndesc) { unsigned int sgl_flits, flits; int i, idx, nsegs, wrlen; struct work_request_hdr *from; struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; struct tx_desc *d = &q->desc[pidx]; struct txq_state txqs; struct sglist_seg *segs; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sglist *sgl; from = (void *)(oh + 1); /* Start of WR within mbuf */ wrlen = m->m_len - sizeof(*oh); if (!(oh->flags & F_HDR_SGL)) { write_imm(d, (caddr_t)from, wrlen, gen); /* * mbuf with "real" immediate tx data will be enqueue_wr'd by * t3_push_frames and freed in wr_ack. Others, like those sent * down by close_conn, t3_send_reset, etc. should be freed here. */ if (!(oh->flags & F_HDR_DF)) m_free(m); return; } memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); sgl = oh->sgl; flits = wrlen / 8; sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; nsegs = sgl->sg_nseg; segs = sgl->sg_segs; for (idx = 0, i = 0; i < nsegs; i++) { KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ss_len); sgp->addr[idx] = htobe64(segs[i].ss_paddr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } sgl_flits = sgl_len(nsegs); txqs.gen = gen; txqs.pidx = pidx; txqs.compl = 0; write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, from->wrh_hi, from->wrh_lo); } /** * ofld_xmit - send a packet through an offload queue * @adap: the adapter * @q: the Tx offload queue * @m: the packet * * Send an offload packet through an SGE offload queue. */ static int ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; unsigned int ndesc; unsigned int pidx, gen; struct sge_txq *q = &qs->txq[TXQ_OFLD]; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); ndesc = G_HDR_NDESC(oh->flags); TXQ_LOCK(qs); again: reclaim_completed_tx(qs, 16, TXQ_OFLD); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (EINTR); } goto again; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } write_ofld_wr(adap, m, q, pidx, gen, ndesc); check_ring_tx_db(adap, q, 1); TXQ_UNLOCK(qs); return (0); } /** * restart_offloadq - restart a suspended offload queue * @qs: the queue set cotaining the offload queue * * Resumes transmission on a suspended Tx offload queue. */ static void restart_offloadq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = data; struct sge_txq *q = &qs->txq[TXQ_OFLD]; adapter_t *adap = qs->port->adapter; int cleaned; TXQ_LOCK(qs); again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); while ((m = mbufq_peek(&q->sendq)) != NULL) { unsigned int gen, pidx; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); unsigned int ndesc = G_HDR_NDESC(oh->flags); if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; q->stops++; break; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } (void)mbufq_dequeue(&q->sendq); TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc); TXQ_LOCK(qs); } #if USE_GTS set_bit(TXQ_RUNNING, &q->flags); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** * t3_offload_tx - send an offload packet * @m: the packet * * Sends an offload packet. We use the packet priority to select the * appropriate Tx queue as follows: bit 0 indicates whether the packet * should be sent as regular or control, bits 1-3 select the queue set. */ int t3_offload_tx(struct adapter *sc, struct mbuf *m) { struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; if (oh->flags & F_HDR_CTRL) { m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ return (ctrl_xmit(sc, qs, m)); } else return (ofld_xmit(sc, qs, m)); } #endif static void restart_tx(struct sge_qset *qs) { struct adapter *sc = qs->port->adapter; if (isset(&qs->txq_stopped, TXQ_OFLD) && should_restart_tx(&qs->txq[TXQ_OFLD]) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { qs->txq[TXQ_OFLD].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); } if (isset(&qs->txq_stopped, TXQ_CTRL) && should_restart_tx(&qs->txq[TXQ_CTRL]) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { qs->txq[TXQ_CTRL].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } } /** * t3_sge_alloc_qset - initialize an SGE queue set * @sc: the controller softc * @id: the queue set id * @nports: how many Ethernet ports will be using this queue set * @irq_vec_idx: the IRQ vector index for response queue interrupts * @p: configuration parameters for this queue set * @ntxq: number of Tx queues for the queue set * @pi: port info for queue set * * Allocate resources and initialize an SGE queue set. A queue set * comprises a response queue, two Rx free-buffer queues, and up to 3 * Tx queues. The Tx queues are assigned roles in the order Ethernet * queue, offload queue, and control queue. */ int t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct port_info *pi) { struct sge_qset *q = &sc->sge.qs[id]; int i, ret = 0; MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); q->port = pi; q->adap = sc; if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { device_printf(sc->dev, "failed to allocate mbuf ring\n"); goto err; } if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) { device_printf(sc->dev, "failed to allocate ifq\n"); goto err; } ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); callout_init(&q->txq[TXQ_ETH].txq_timer, 1); callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; init_qset_cntxt(q, id); q->idx = id; if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, &q->fl[0].desc, &q->fl[0].sdesc, &q->fl[0].desc_tag, &q->fl[0].desc_map, sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { printf("error %d from alloc ring fl0\n", ret); goto err; } if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, &q->fl[1].desc, &q->fl[1].sdesc, &q->fl[1].desc_tag, &q->fl[1].desc_map, sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { printf("error %d from alloc ring fl1\n", ret); goto err; } if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, &q->rspq.phys_addr, &q->rspq.desc, NULL, &q->rspq.desc_tag, &q->rspq.desc_map, NULL, NULL)) != 0) { printf("error %d from alloc ring rspq\n", ret); goto err; } snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", device_get_unit(sc->dev), irq_vec_idx); MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); for (i = 0; i < ntxq; ++i) { size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], sizeof(struct tx_desc), sz, &q->txq[i].phys_addr, &q->txq[i].desc, &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map, sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { printf("error %d from alloc ring tx %i\n", ret, i); goto err; } mbufq_init(&q->txq[i].sendq); q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; } #ifdef TCP_OFFLOAD TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); #endif TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; q->fl[1].size = p->jumbo_size; q->rspq.gen = 1; q->rspq.cidx = 0; q->rspq.size = p->rspq_size; q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); q->fl[0].buf_size = MCLBYTES; q->fl[0].zone = zone_pack; q->fl[0].type = EXT_PACKET; if (p->jumbo_buf_size == MJUM16BYTES) { q->fl[1].zone = zone_jumbo16; q->fl[1].type = EXT_JUMBO16; } else if (p->jumbo_buf_size == MJUM9BYTES) { q->fl[1].zone = zone_jumbo9; q->fl[1].type = EXT_JUMBO9; } else if (p->jumbo_buf_size == MJUMPAGESIZE) { q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; } else { KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); ret = EDOOFUS; goto err; } q->fl[1].buf_size = p->jumbo_buf_size; /* Allocate and setup the lro_ctrl structure */ q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); #if defined(INET6) || defined(INET) ret = tcp_lro_init(&q->lro.ctrl); if (ret) { printf("error %d from tcp_lro_init\n", ret); goto err; } #endif q->lro.ctrl.ifp = pi->ifp; mtx_lock_spin(&sc->sge.reg_lock); ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, q->rspq.phys_addr, q->rspq.size, q->fl[0].buf_size, 1, 0); if (ret) { printf("error %d from t3_sge_init_rspcntxt\n", ret); goto err_unlock; } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, q->fl[i].phys_addr, q->fl[i].size, q->fl[i].buf_size, p->cong_thres, 1, 0); if (ret) { printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); goto err_unlock; } } ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } if (ntxq > 1) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, USE_GTS, SGE_CNTXT_OFLD, id, q->txq[TXQ_OFLD].phys_addr, q->txq[TXQ_OFLD].size, 0, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } if (ntxq > 2) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, SGE_CNTXT_CTRL, id, q->txq[TXQ_CTRL].phys_addr, q->txq[TXQ_CTRL].size, q->txq[TXQ_CTRL].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } mtx_unlock_spin(&sc->sge.reg_lock); t3_update_qset_coalesce(q, p); refill_fl(sc, &q->fl[0], q->fl[0].size); refill_fl(sc, &q->fl[1], q->fl[1].size); refill_rspq(sc, &q->rspq, q->rspq.size - 1); t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | V_NEWTIMER(q->rspq.holdoff_tmr)); return (0); err_unlock: mtx_unlock_spin(&sc->sge.reg_lock); err: TXQ_LOCK(q); t3_free_qset(sc, q); return (ret); } /* * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with * ethernet data. Hardware assistance with various checksums and any vlan tag * will also be taken into account here. */ void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) { struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; struct ifnet *ifp = pi->ifp; if (cpl->vlan_valid) { m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); m->m_flags |= M_VLANTAG; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; /* * adjust after conversion to mbuf chain */ m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); m->m_len -= (sizeof(*cpl) + ethpad); m->m_data += (sizeof(*cpl) + ethpad); if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { struct ether_header *eh = mtod(m, void *); uint16_t eh_type; if (eh->ether_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = mtod(m, void *); eh_type = evh->evl_proto; } else eh_type = eh->ether_type; if (ifp->if_capenable & IFCAP_RXCSUM && eh_type == htons(ETHERTYPE_IP)) { m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && eh_type == htons(ETHERTYPE_IPV6)) { m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } } } /** * get_packet - return the next ingress packet buffer from a free list * @adap: the adapter that received the packet * @drop_thres: # of remaining buffers before we start dropping packets * @qs: the qset that the SGE free list holding the packet belongs to * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain * @r: response descriptor * * Get the next packet from a free list and complete setup of the * sk_buff. If the packet is small we make a copy and recycle the * original buffer, otherwise we use the original buffer itself. If a * positive drop threshold is supplied packets are dropped and their * buffers recycled if (a) the number of remaining buffers is under the * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, struct t3_mbuf_hdr *mh, struct rsp_desc *r) { unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; int mask, cidx = fl->cidx; struct rx_sw_desc *sd = &fl->sdesc[cidx]; uint32_t len = G_RSPD_LEN(len_cq); uint32_t flags = M_EXT; uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); caddr_t cl; struct mbuf *m; int ret = 0; mask = fl->size - 1; prefetch(fl->sdesc[(cidx + 1) & mask].m); prefetch(fl->sdesc[(cidx + 2) & mask].m); prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) goto skip_recycle; cl = mtod(m, void *); memcpy(cl, sd->rxsd_cl, len); recycle_rx_buf(adap, fl, fl->cidx); m->m_pkthdr.len = m->m_len = len; m->m_flags = 0; mh->mh_head = mh->mh_tail = m; ret = 1; goto done; } else { skip_recycle: bus_dmamap_unload(fl->entry_tag, sd->map); cl = sd->rxsd_cl; m = sd->m; if ((sopeop == RSPQ_SOP_EOP) || (sopeop == RSPQ_SOP)) flags |= M_PKTHDR; m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); if (fl->zone == zone_pack) { /* * restore clobbered data pointer */ m->m_data = m->m_ext.ext_buf; } else { m_cljset(m, cl, fl->type); } m->m_len = len; } switch(sopeop) { case RSPQ_SOP_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_SOP: mh->mh_head = mh->mh_tail = m; m->m_pkthdr.len = len; break; case RSPQ_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_NSOP_NEOP: if (mh->mh_tail == NULL) { log(LOG_ERR, "discarding intermediate descriptor entry\n"); m_freem(m); break; } mh->mh_tail->m_next = m; mh->mh_tail = m; mh->mh_head->m_pkthdr.len += len; break; } if (cxgb_debug) printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); done: if (++fl->cidx == fl->size) fl->cidx = 0; return (ret); } /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response * @flags: the response control flags * * Handles the control information of an SGE response, such as GTS * indications and completion credits for the queue set's Tx queues. * HW coalesces credits, we don't do any extra SW coalescing. */ static __inline void handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) { unsigned int credits; #if USE_GTS if (flags & F_RSPD_TXQ0_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); #endif credits = G_RSPD_TXQ0_CR(flags); if (credits) qs->txq[TXQ_ETH].processed += credits; credits = G_RSPD_TXQ2_CR(flags); if (credits) qs->txq[TXQ_CTRL].processed += credits; # if USE_GTS if (flags & F_RSPD_TXQ1_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); # endif credits = G_RSPD_TXQ1_CR(flags); if (credits) qs->txq[TXQ_OFLD].processed += credits; } static void check_ring_db(adapter_t *adap, struct sge_qset *qs, unsigned int sleeping) { ; } /** * process_responses - process responses from an SGE response queue * @adap: the adapter * @qs: the queue set to which the response queue belongs * @budget: how many responses can be processed in this round * * Process responses from an SGE response queue up to the supplied budget. * Responses include received packets as well as credits and other events * for the queues that belong to the response queue's queue set. * A negative budget is effectively unlimited. * * Additionally choose the interrupt holdoff time for the next interrupt * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ static int process_responses(adapter_t *adap, struct sge_qset *qs, int budget) { struct sge_rspq *rspq = &qs->rspq; struct rsp_desc *r = &rspq->desc[rspq->cidx]; int budget_left = budget; unsigned int sleeping = 0; #if defined(INET6) || defined(INET) int lro_enabled = qs->lro.enabled; int skip_lro; struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; #endif struct t3_mbuf_hdr *mh = &rspq->rspq_mh; #ifdef DEBUG static int last_holdoff = 0; if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { printf("next_holdoff=%d\n", rspq->holdoff_tmr); last_holdoff = rspq->holdoff_tmr; } #endif rspq->next_holdoff = rspq->holdoff_tmr; while (__predict_true(budget_left && is_new_response(r, rspq))) { int eth, eop = 0, ethpad = 0; uint32_t flags = ntohl(r->flags); uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); uint8_t opcode = r->rss_hdr.opcode; eth = (opcode == CPL_RX_PKT); if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { struct mbuf *m; if (cxgb_debug) printf("async notification\n"); if (mh->mh_head == NULL) { mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); m = mh->mh_head; } else { m = m_gethdr(M_NOWAIT, MT_DATA); } if (m == NULL) goto no_mem; memcpy(mtod(m, char *), r, AN_PKT_SIZE); m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; *mtod(m, char *) = CPL_ASYNC_NOTIF; opcode = CPL_ASYNC_NOTIF; eop = 1; rspq->async_notif++; goto skip; } else if (flags & F_RSPD_IMM_DATA_VALID) { struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { no_mem: rspq->next_holdoff = NOMEM_INTR_DELAY; budget_left--; break; } if (mh->mh_head == NULL) mh->mh_head = m; else mh->mh_tail->m_next = m; mh->mh_tail = m; get_imm_packet(adap, r, m); mh->mh_head->m_pkthdr.len += m->m_len; eop = 1; rspq->imm_data++; } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; eop = get_packet(adap, drop_thresh, qs, mh, r); if (eop) { if (r->rss_hdr.hash_type && !adap->timestamp) mh->mh_head->m_flags |= M_FLOWID; mh->mh_head->m_pkthdr.flowid = rss_hash; } ethpad = 2; } else { rspq->pure_rsps++; } skip: if (flags & RSPD_CTRL_MASK) { sleeping |= flags & RSPD_GTS_MASK; handle_rsp_cntrl_info(qs, flags); } if (!eth && eop) { rspq->offload_pkts++; #ifdef TCP_OFFLOAD adap->cpl_handler[opcode](qs, r, mh->mh_head); #else m_freem(mh->mh_head); #endif mh->mh_head = NULL; } else if (eth && eop) { struct mbuf *m = mh->mh_head; t3_rx_eth(adap, m, ethpad); /* * The T304 sends incoming packets on any qset. If LRO * is also enabled, we could end up sending packet up * lro_ctrl->ifp's input. That is incorrect. * * The mbuf's rcvif was derived from the cpl header and * is accurate. Skip LRO and just use that. */ #if defined(INET6) || defined(INET) skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && (tcp_lro_rx(lro_ctrl, m, 0) == 0) ) { /* successfully queue'd for LRO */ } else #endif { /* * LRO not enabled, packet unsuitable for LRO, * or unable to queue. Pass it up right now in * either case. */ struct ifnet *ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); } mh->mh_head = NULL; } r++; if (__predict_false(++rspq->cidx == rspq->size)) { rspq->cidx = 0; rspq->gen ^= 1; r = rspq->desc; } if (++rspq->credits >= 64) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } __refill_fl_lt(adap, &qs->fl[0], 32); __refill_fl_lt(adap, &qs->fl[1], 32); --budget_left; } #if defined(INET6) || defined(INET) /* Flush LRO */ while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); tcp_lro_flush(lro_ctrl, queued); } #endif if (sleeping) check_ring_db(adap, qs, sleeping); mb(); /* commit Tx queue processed updates */ if (__predict_false(qs->txq_stopped > 1)) restart_tx(qs); __refill_fl_lt(adap, &qs->fl[0], 512); __refill_fl_lt(adap, &qs->fl[1], 512); budget -= budget_left; return (budget); } /* * A helper function that processes responses and issues GTS. */ static __inline int process_responses_gts(adapter_t *adap, struct sge_rspq *rq) { int work; static int last_holdoff = 0; work = process_responses(adap, rspq_to_qset(rq), -1); if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { printf("next_holdoff=%d\n", rq->next_holdoff); last_holdoff = rq->next_holdoff; } t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); return (work); } /* * Interrupt handler for legacy INTx interrupts for T3B-based cards. * Handles data events from SGE response queues as well as error and other * async events as they all use the same interrupt pin. We use one SGE * response queue per port in this mode and protect all response queues with * queue 0's lock. */ void t3b_intr(void *data) { uint32_t i, map; adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); if (!map) return; if (__predict_false(map & F_ERRINTR)) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } mtx_lock(&q0->lock); for_each_port(adap, i) if (map & (1 << i)) process_responses_gts(adap, &adap->sge.qs[i].rspq); mtx_unlock(&q0->lock); } /* * The MSI interrupt handler. This needs to handle data events from SGE * response queues as well as error and other async events as they all use * the same MSI vector. We use one SGE response queue per port in this mode * and protect all response queues with queue 0's lock. */ void t3_intr_msi(void *data) { adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; int i, new_packets = 0; mtx_lock(&q0->lock); for_each_port(adap, i) if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) new_packets = 1; mtx_unlock(&q0->lock); if (new_packets == 0) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } } void t3_intr_msix(void *data) { struct sge_qset *qs = data; adapter_t *adap = qs->port->adapter; struct sge_rspq *rspq = &qs->rspq; if (process_responses_gts(adap, rspq) == 0) rspq->unhandled_irqs++; } #define QDUMP_SBUF_SIZE 32 * 400 static int t3_dump_rspq(SYSCTL_HANDLER_ARGS) { struct sge_rspq *rspq; struct sge_qset *qs; int i, err, dump_end, idx; struct sbuf *sb; struct rsp_desc *rspd; uint32_t data[4]; rspq = arg1; qs = rspq_to_qset(rspq); if (rspq->rspq_dump_count == 0) return (0); if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", rspq->rspq_dump_count); rspq->rspq_dump_count = 0; return (EINVAL); } if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", rspq->rspq_dump_start); rspq->rspq_dump_start = 0; return (EINVAL); } err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; for (i = rspq->rspq_dump_start; i < dump_end; i++) { idx = i & (RSPQ_Q_SIZE-1); rspd = &rspq->desc[idx]; sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), be32toh(rspd->len_cq), rspd->intr_gen); } err = sbuf_finish(sb); /* Output a trailing NUL. */ if (err == 0) err = SYSCTL_OUT(req, "", 1); sbuf_delete(sb); return (err); } static int t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; uint32_t data[4]; txq = arg1; qs = txq_to_qset(txq, TXQ_ETH); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > TX_ETH_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); /* Output a trailing NUL. */ if (err == 0) err = SYSCTL_OUT(req, "", 1); sbuf_delete(sb); return (err); } static int t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; txq = arg1; qs = txq_to_qset(txq, TXQ_CTRL); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > 256) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > 255) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = sysctl_wire_old_buffer(req, 0); if (err != 0) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & 255); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (255)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); /* Output a trailing NUL. */ if (err == 0) err = SYSCTL_OUT(req, "", 1); sbuf_delete(sb); return (err); } static int t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; struct qset_params *qsp = &sc->params.sge.qset[0]; int coalesce_usecs; struct sge_qset *qs; int i, j, err, nqsets = 0; struct mtx *lock; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); coalesce_usecs = qsp->coalesce_usecs; err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); if (err != 0) { return (err); } if (coalesce_usecs == qsp->coalesce_usecs) return (0); for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) nqsets++; coalesce_usecs = max(1, coalesce_usecs); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; qsp = &sc->params.sge.qset[i]; qsp->coalesce_usecs = coalesce_usecs; lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; mtx_lock(lock); t3_update_qset_coalesce(qs, qsp); t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | V_NEWTIMER(qs->rspq.holdoff_tmr)); mtx_unlock(lock); } return (0); } static int t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; int rc, timestamp; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); timestamp = sc->timestamp; rc = sysctl_handle_int(oidp, ×tamp, arg2, req); if (rc != 0) return (rc); if (timestamp != sc->timestamp) { t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, timestamp ? F_ENABLERXPKTTMSTPRSS : 0); sc->timestamp = timestamp; } return (0); } void t3_add_attach_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", - CTLFLAG_RD, &sc->fw_version, + CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, &sc->params.rev, 0, "chip model"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "port_types", - CTLFLAG_RD, &sc->port_types, + CTLFLAG_RD, sc->port_types, 0, "type of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "enable_debug", CTLFLAG_RW, &cxgb_debug, 0, "enable verbose debugging output"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", CTLFLAG_RD, &sc->tunq_coalesce, "#tunneled packets freed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "txq_overrun", CTLFLAG_RD, &txq_fills, 0, "#times txq overrun"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)"); } static const char *rspq_name = "rspq"; static const char *txq_names[] = { "txq_eth", "txq_ofld", "txq_ctrl" }; static int sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) { struct port_info *p = arg1; uint64_t *parg; if (!p) return (EINVAL); parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); PORT_LOCK(p); t3_mac_update_stats(&p->mac); PORT_UNLOCK(p); return (sysctl_handle_64(oidp, parg, 0, req)); } void t3_add_configured_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal", CTLTYPE_INT|CTLFLAG_RW, sc, 0, t3_set_coalesce_usecs, "I", "interrupt coalescing timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pkt_timestamp", CTLTYPE_INT | CTLFLAG_RW, sc, 0, t3_pkt_timestamp, "I", "provide packet timestamp instead of connection hash"); for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct sysctl_oid *poid; struct sysctl_oid_list *poidlist; struct mac_stats *mstats = &pi->mac.stats; snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); poidlist = SYSCTL_CHILDREN(poid); SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, "nqsets", CTLFLAG_RD, &pi->nqsets, 0, "#queue sets"); for (j = 0; j < pi->nqsets; j++) { struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid; struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist; struct sge_txq *txq = &qs->txq[TXQ_ETH]; snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); qspoidlist = SYSCTL_CHILDREN(qspoid); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", CTLFLAG_RD, &qs->fl[0].empty, 0, "freelist #0 empty"); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", CTLFLAG_RD, &qs->fl[1].empty, 0, "freelist #1 empty"); rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); txqpoidlist = SYSCTL_CHILDREN(txqpoid); ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); lropoidlist = SYSCTL_CHILDREN(lropoid); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", CTLFLAG_RD, &qs->rspq.size, 0, "#entries in response queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", CTLFLAG_RD, &qs->rspq.cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", CTLFLAG_RD, &qs->rspq.credits, 0, "#credits"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", CTLFLAG_RD, &qs->rspq.starved, 0, "#times starved"); - SYSCTL_ADD_ULONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", + SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &qs->rspq.phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->rspq.rspq_dump_start, 0, "start rspq dump entry"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->rspq.rspq_dump_count, 0, "#rspq entries to dump"); SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 0, t3_dump_rspq, "A", "dump of the response queue"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, "#tunneled packets dropped"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 0, "#tunneled packets waiting to be sent"); #if 0 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 0, "#tunneled packets queue producer index"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 0, "#tunneled packets queue consumer index"); #endif SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 0, "#tunneled packets processed by the card"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", CTLFLAG_RD, &txq->cleaned, 0, "#tunneled packets cleaned"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", CTLFLAG_RD, &txq->in_use, 0, "#tunneled packet slots in use"); - SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", + SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", CTLFLAG_RD, &txq->txq_frees, "#tunneled packets freed"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", CTLFLAG_RD, &txq->txq_skipped, 0, "#tunneled packet descriptors skipped"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", CTLFLAG_RD, &txq->txq_coalesced, "#tunneled packets coalesced"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", CTLFLAG_RD, &txq->txq_enqueued, 0, "#tunneled packets enqueued to hardware"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", CTLFLAG_RD, &qs->txq_stopped, 0, "tx queues stopped"); - SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", + SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &txq->phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 0, "txq generation"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", CTLFLAG_RD, &txq->cidx, 0, "hardware queue cidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", CTLFLAG_RD, &txq->pidx, 0, "hardware queue pidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 0, "txq start idx for dump"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 0, "txq #entries to dump"); SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 0, "ctrlq start idx for dump"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 0, "ctrl #entries to dump"); SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); } /* Now add a node for mac stats. */ poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC statistics"); poidlist = SYSCTL_CHILDREN(poid); /* * We (ab)use the length argument (arg2) to pass on the offset * of the data that we are interested in. This is only required * for the quad counters that are updated from the hardware (we * make sure that we return the latest value). * sysctl_handle_macstat first updates *all* the counters from * the hardware, and then returns the latest value of the * requested counter. Best would be to update only the * requested counter from hardware, but t3_mac_update_stats() * hides all the register details and we don't want to dive into * all that here. */ #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ sysctl_handle_macstat, "QU", 0) CXGB_SYSCTL_ADD_QUAD(tx_octets); CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); CXGB_SYSCTL_ADD_QUAD(tx_frames); CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_pause); CXGB_SYSCTL_ADD_QUAD(tx_deferred); CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); CXGB_SYSCTL_ADD_QUAD(tx_underrun); CXGB_SYSCTL_ADD_QUAD(tx_len_errs); CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(tx_frames_64); CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); CXGB_SYSCTL_ADD_QUAD(rx_octets); CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); CXGB_SYSCTL_ADD_QUAD(rx_frames); CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_pause); CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(rx_align_errs); CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); CXGB_SYSCTL_ADD_QUAD(rx_data_errs); CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); CXGB_SYSCTL_ADD_QUAD(rx_runt); CXGB_SYSCTL_ADD_QUAD(rx_jabber); CXGB_SYSCTL_ADD_QUAD(rx_short); CXGB_SYSCTL_ADD_QUAD(rx_too_long); CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); CXGB_SYSCTL_ADD_QUAD(rx_frames_64); CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); #undef CXGB_SYSCTL_ADD_QUAD #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ CTLFLAG_RD, &mstats->a, 0) CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); CXGB_SYSCTL_ADD_ULONG(num_toggled); CXGB_SYSCTL_ADD_ULONG(num_resets); CXGB_SYSCTL_ADD_ULONG(link_faults); #undef CXGB_SYSCTL_ADD_ULONG } } /** * t3_get_desc - dump an SGE descriptor for debugging purposes * @qs: the queue set * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) * @idx: the descriptor index in the queue * @data: where to dump the descriptor contents * * Dumps the contents of a HW descriptor of an SGE queue. Returns the * size of the descriptor. */ int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data) { if (qnum >= 6) return (EINVAL); if (qnum < 3) { if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) return -EINVAL; memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); return sizeof(struct tx_desc); } if (qnum == 3) { if (!qs->rspq.desc || idx >= qs->rspq.size) return (EINVAL); memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); return sizeof(struct rsp_desc); } qnum -= 4; if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) return (EINVAL); memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); return sizeof(struct rx_desc); } Index: stable/9/sys/dev/cxgbe/t4_main.c =================================================================== --- stable/9/sys/dev/cxgbe/t4_main.c (revision 273911) +++ stable/9/sys/dev/cxgbe/t4_main.c (revision 273912) @@ -1,8119 +1,8119 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) || defined(__amd64__) #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_ioctl.h" #include "t4_l2t.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); static device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; static d_ioctl_t t4_ioctl; static d_open_t t4_open; static d_close_t t4_close; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; static struct cdevsw t5_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t5nex", }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct mtx t4_list_lock; static SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct mtx t4_uld_list_lock; static SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -1 as an indication to tweak_tunables() that it should * provide a reasonable default when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, 10G and 1G, NIC and offload. */ #define NTXQ_10G 16 static int t4_ntxq10g = -1; TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g); #define NRXQ_10G 8 static int t4_nrxq10g = -1; TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g); #define NTXQ_1G 4 static int t4_ntxq1g = -1; TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); #define NRXQ_1G 2 static int t4_nrxq1g = -1; TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ_10G 8 static int t4_nofldtxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g); #define NOFLDRXQ_10G 2 static int t4_nofldrxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g); #define NOFLDTXQ_1G 2 static int t4_nofldtxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g); #define NOFLDRXQ_1G 1 static int t4_nofldrxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g); #endif /* * Holdoff parameters for 10G and 1G ports. */ #define TMR_IDX_10G 1 static int t4_tmr_idx_10g = TMR_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g); #define PKTC_IDX_10G (-1) static int t4_pktc_idx_10g = PKTC_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g); #define TMR_IDX_1G 1 static int t4_tmr_idx_1g = TMR_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g); #define PKTC_IDX_1G (-1) static int t4_pktc_idx_1g = PKTC_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g); /* * Size (# of entries) of each tx and rx queue. */ static unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); static unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = 0; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_iscsicaps_allowed = 0; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 0; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); struct intrs_and_queues { int intr_type; /* INTx, MSI, or MSI-X */ int nirq; /* Number of vectors */ int intr_flags; int ntxq10g; /* # of NIC txq's for each 10G port */ int nrxq10g; /* # of NIC rxq's for each 10G port */ int ntxq1g; /* # of NIC txq's for each 1G port */ int nrxq1g; /* # of NIC rxq's for each 1G port */ int rsrv_noflowq; /* Flag whether to reserve queue 0 */ #ifdef TCP_OFFLOAD int nofldtxq10g; /* # of TOE txq's for each 10G port */ int nofldrxq10g; /* # of TOE rxq's for each 10G port */ int nofldtxq1g; /* # of TOE txq's for each 1G port */ int nofldrxq1g; /* # of TOE rxq's for each 1G port */ #endif }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), XGMAC_VLANEX = (1 << 3), XGMAC_UCADDR = (1 << 4), XGMAC_MCADDRS = (1 << 5), XGMAC_ALL = 0xffff }; static int map_bars_0_and_4(struct adapter *); static int map_bar_2(struct adapter *); static void setup_memwin(struct adapter *); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static void memwin_info(struct adapter *, int, uint32_t *, uint32_t *); static uint32_t position_memwin(struct adapter *, int, uint32_t); static int cfg_itype_and_nqueues(struct adapter *, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *); static int update_mac_settings(struct port_info *, int); static int cxgbe_init_synchronized(struct port_info *); static int cxgbe_uninit_synchronized(struct port_info *); static int setup_intr_handlers(struct adapter *); static int adapter_full_init(struct adapter *); static int adapter_full_uninit(struct adapter *); static int port_full_init(struct port_info *); static int port_full_uninit(struct port_info *); static void quiesce_eq(struct adapter *, struct sge_eq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void reg_block_dump(struct adapter *, uint8_t *, unsigned int, unsigned int); static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static int cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *); static int fw_msg_not_handled(struct adapter *, const __be64 *); static int t4_sysctls(struct adapter *); static int cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); #endif static inline void txq_start(struct ifnet *, struct sge_txq *); static uint32_t fconf_to_mode(uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t fspec_to_fconf(struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); static int set_sched_class(struct adapter *, struct t4_sched_params *); static int set_sched_queue(struct adapter *, struct t4_sched_queue *); #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *, int); #endif static int mod_event(module_t, int, void *); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif /* No easy way to include t4_msg.h before adapter.h so we check this way */ CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS); CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES); CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, n10g, n1g, rqidx, tqidx; struct intrs_and_queues iaq; struct sge *s; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif sc = device_get_softc(dev); sc->dev = dev; pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); mtx_lock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); mtx_unlock(&t4_list_lock); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init(&sc->sfl_callout, CALLOUT_MPSAFE); rc = map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI)); sc->mbox = sc->pf; memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); sc->an_handler = an_not_handled; for (i = 0; i < nitems(sc->cpl_handler); i++) sc->cpl_handler[i] = cpl_not_handled; for (i = 0; i < nitems(sc->fw_msg_handler); i++) sc->fw_msg_handler[i] = fw_msg_not_handled; t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); t4_init_sge_cpl_handlers(sc); /* Prepare the adapter for operation */ rc = -t4_prep_adapter(sc); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s", device_get_nameunit(dev)); if (sc->cdev == NULL) device_printf(dev, "failed to create nexus char device.\n"); else sc->cdev->si_drv1 = sc; /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. We also figure * out whether a port is 10G or 1G and use that information when * calculating how many interrupts to attempt to allocate. */ n10g = n1g = 0; for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* Allocate the vi and initialize parameters like mac addr */ rc = -t4_port_init(pi, sc->mbox, sc->pf, 0); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); if (is_10G_port(pi) || is_40G_port(pi)) { n10g++; pi->tmr_idx = t4_tmr_idx_10g; pi->pktc_idx = t4_pktc_idx_10g; } else { n1g++; pi->tmr_idx = t4_tmr_idx_1g; pi->pktc_idx = t4_pktc_idx_1g; } pi->xact_addr_filt = -1; pi->linkdnrc = -1; pi->qsize_rxq = t4_qsize_rxq; pi->qsize_txq = t4_qsize_txq; pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq); if (rc != 0) goto done; /* error message displayed already */ sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; sc->flags |= iaq.intr_flags; s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g; s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g; s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; if (pi == NULL) continue; pi->first_rxq = rqidx; pi->first_txq = tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { pi->nrxq = iaq.nrxq10g; pi->ntxq = iaq.ntxq10g; } else { pi->nrxq = iaq.nrxq1g; pi->ntxq = iaq.ntxq1g; } if (pi->ntxq > 1) pi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0; else pi->rsrv_noflowq = 0; rqidx += pi->nrxq; tqidx += pi->ntxq; #ifdef TCP_OFFLOAD if (is_offload(sc)) { pi->first_ofld_rxq = ofld_rqidx; pi->first_ofld_txq = ofld_tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { pi->nofldrxq = iaq.nofldrxq10g; pi->nofldtxq = iaq.nofldtxq10g; } else { pi->nofldrxq = iaq.nofldrxq1g; pi->nofldtxq = iaq.nofldtxq1g; } ofld_rqidx += pi->nofldrxq; ofld_tqidx += pi->nofldtxq; } #endif } rc = setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach(dev); else t4_sysctls(sc); return (rc); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); if (sc->flags & FULL_INIT_DONE) t4_intr_disable(sc); if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(pi->adapter, sc->mbox, sc->pf, 0, pi->viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi, M_CXGBE); } } if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if (sc->flags & FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { mtx_lock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); mtx_unlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct ifnet *ifp; /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } pi->ifp = ifp; ifp->if_softc = pi; callout_init(&pi->tick, CALLOUT_MPSAFE); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (is_offload(pi->adapter)) ifp->if_capabilities |= IFCAP_TOE; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; /* Initialize ifmedia for this port */ ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); build_medialist(pi); pi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, pi->hw_addr); #ifdef TCP_OFFLOAD if (is_offload(pi->adapter)) { device_printf(dev, "%d txq, %d rxq (NIC); %d txq, %d rxq (TOE)\n", pi->ntxq, pi->nrxq, pi->nofldtxq, pi->nofldrxq); } else #endif device_printf(dev, "%d txq, %d rxq\n", pi->ntxq, pi->nrxq); cxgbe_sysctls(pi); return (0); } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; /* Tell if_ioctl and if_init that the port is going away */ ADAPTER_LOCK(sc); SET_DOOMED(pi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; #endif ADAPTER_UNLOCK(sc); if (pi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, pi->vlan_c); PORT_LOCK(pi); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; callout_stop(&pi->tick); PORT_UNLOCK(pi); callout_drain(&pi->tick); /* Let detach proceed even if these fail. */ cxgbe_uninit_synchronized(pi); port_full_uninit(pi); ifmedia_removeall(&pi->media); ether_ifdetach(pi->ifp); if_free(pi->ifp); ADAPTER_LOCK(sc); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); return (0); } static void cxgbe_init(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(pi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags; struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) return (EINVAL); rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (pi->flags & PORT_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(pi, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = pi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { rc = update_mac_settings(pi, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else rc = cxgbe_init_synchronized(pi); pi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = cxgbe_uninit_synchronized(pi); end_synchronized_op(sc, 0); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, pi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(pi, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(pi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(pi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(pi, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifmedia_ioctl(ifp, ifr, &pi->media, cmd); break; default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct sge_txq *txq = &sc->sge.txq[pi->first_txq]; struct buf_ring *br; int rc; M_ASSERTPKTHDR(m); if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } if (m->m_flags & M_FLOWID) txq += ((m->m_pkthdr.flowid % (pi->ntxq - pi->rsrv_noflowq)) + pi->rsrv_noflowq); br = txq->br; if (TXQ_TRYLOCK(txq) == 0) { struct sge_eq *eq = &txq->eq; /* * It is possible that t4_eth_tx finishes up and releases the * lock between the TRYLOCK above and the drbr_enqueue here. We * need to make sure that this mbuf doesn't just sit there in * the drbr. */ rc = drbr_enqueue(ifp, br, m); if (rc == 0 && callout_pending(&eq->tx_callout) == 0 && !(eq->flags & EQ_DOOMED)) callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); return (rc); } /* * txq->m is the mbuf that is held up due to a temporary shortage of * resources and it should be put on the wire first. Then what's in * drbr and finally the mbuf that was just passed in to us. * * Return code should indicate the fate of the mbuf that was passed in * this time. */ TXQ_LOCK_ASSERT_OWNED(txq); if (drbr_needs_enqueue(ifp, br) || txq->m) { /* Queued for transmission. */ rc = drbr_enqueue(ifp, br, m); m = txq->m ? txq->m : drbr_dequeue(ifp, br); (void) t4_eth_tx(ifp, txq, m); TXQ_UNLOCK(txq); return (rc); } /* Direct transmission. */ rc = t4_eth_tx(ifp, txq, m); if (rc != 0 && txq->m) rc = 0; /* held, will be transmitted soon (hopefully) */ TXQ_UNLOCK(txq); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; struct sge_txq *txq; int i; struct mbuf *m; /* queues do not exist if !PORT_INIT_DONE. */ if (pi->flags & PORT_INIT_DONE) { for_each_txq(pi, i, txq) { TXQ_LOCK(txq); m_freem(txq->m); txq->m = NULL; while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) m_freem(m); TXQ_UNLOCK(txq); } } if_qflush(ifp); } static int cxgbe_media_change(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; device_printf(pi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct port_info *pi = ifp->if_softc; struct ifmedia_entry *cur = pi->media.ifm_cur; int speed = pi->link_cfg.speed; int data = (pi->port_type << 8) | pi->mod_type; if (cur->ifm_data != data) { build_medialist(pi); cur = pi->media.ifm_cur; } ifmr->ifm_status = IFM_AVALID; if (!pi->link_cfg.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* active and current will differ iff current media is autoselect. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == SPEED_10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == SPEED_1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == SPEED_100) ifmr->ifm_active |= IFM_100_TX; else if (speed == SPEED_10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } static int map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } static int map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (is_t5(sc)) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | V_STATMODE(0)); } #endif } return (0); } static const struct memwin t4_memwin[] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin t5_memwin[] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin *mw; int i, n; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw = &t4_memwin[0]; n = nitems(t4_memwin); } else { /* T5 uses the relative offset inside the PCIe BAR */ bar0 = 0; mw = &t5_memwin[0]; n = nitems(t5_memwin); } for (i = 0; i < n; i++, mw++) { t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->aperture) - 10)); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Verify that the memory range specified by the addr/len pair is valid and lies * entirely within a single region (EDCx or MCx). */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { uint32_t em, addr_len, maddr, mlen; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len == 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; mlen = G_EDRAM0_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; mlen = G_EDRAM1_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; mlen = G_EXT_MEM_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } if (!is_t4(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; mlen = G_EXT_MEM1_SIZE(addr_len) << 20; if (mlen > 0 && addr >= maddr && addr < maddr + mlen && addr + len <= maddr + mlen) return (0); } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr, mlen; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; mlen = G_EDRAM0_SIZE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; mlen = G_EDRAM1_SIZE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; mlen = G_EXT_MEM_SIZE(addr_len) << 20; break; case MEM_MC1: if (is_t4(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; mlen = G_EXT_MEM1_SIZE(addr_len) << 20; break; default: return (EINVAL); } if (mlen > 0 && off < mlen && off + len <= mlen) { *addr = maddr + off; /* global address */ return (0); } return (EFAULT); } static void memwin_info(struct adapter *sc, int win, uint32_t *base, uint32_t *aperture) { const struct memwin *mw; if (is_t4(sc)) { KASSERT(win >= 0 && win < nitems(t4_memwin), ("%s: incorrect memwin# (%d)", __func__, win)); mw = &t4_memwin[win]; } else { KASSERT(win >= 0 && win < nitems(t5_memwin), ("%s: incorrect memwin# (%d)", __func__, win)); mw = &t5_memwin[win]; } if (base != NULL) *base = mw->base; if (aperture != NULL) *aperture = mw->aperture; } /* * Positions the memory window such that it can be used to access the specified * address in the chip's address space. The return value is the offset of addr * from the start of the window. */ static uint32_t position_memwin(struct adapter *sc, int n, uint32_t addr) { uint32_t start, pf; uint32_t reg; KASSERT(n >= 0 && n <= 3, ("%s: invalid window %d.", __func__, n)); KASSERT((addr & 3) == 0, ("%s: addr (0x%x) is not at a 4B boundary.", __func__, addr)); if (is_t4(sc)) { pf = 0; start = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); start = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, n); t4_write_reg(sc, reg, start | pf); t4_read_reg(sc, reg); return (addr - start); } static int cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, struct intrs_and_queues *iaq) { int rc, itype, navail, nrxq10g, nrxq1g, n; int nofldrxq10g = 0, nofldrxq1g = 0; bzero(iaq, sizeof(*iaq)); iaq->ntxq10g = t4_ntxq10g; iaq->ntxq1g = t4_ntxq1g; iaq->nrxq10g = nrxq10g = t4_nrxq10g; iaq->nrxq1g = nrxq1g = t4_nrxq1g; iaq->rsrv_noflowq = t4_rsrv_noflowq; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq10g = t4_nofldtxq10g; iaq->nofldtxq1g = t4_nofldtxq1g; iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g; iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g; } #endif for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; iaq->intr_type = itype; iaq->intr_flags = 0; /* * Best option: an interrupt vector for errors, one for the * firmware event queue, and one each for each rxq (NIC as well * as offload). */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g * (nrxq10g + nofldrxq10g); iaq->nirq += n1g * (nrxq1g + nofldrxq1g); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { iaq->intr_flags |= INTR_DIRECT; goto allocate; } /* * Second best option: an interrupt vector for errors, one for * the firmware event queue, and one each for either NIC or * offload rxq's. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g * max(nrxq10g, nofldrxq10g); iaq->nirq += n1g * max(nrxq1g, nofldrxq1g); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) goto allocate; /* * Next best option: an interrupt vector for errors, one for the * firmware event queue, and at least one per port. At this * point we know we'll have to downsize nrxq or nofldrxq to fit * what's available to us. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g + n1g; if (iaq->nirq <= navail) { int leftover = navail - iaq->nirq; if (n10g > 0) { int target = max(nrxq10g, nofldrxq10g); n = 1; while (n < target && leftover >= n10g) { leftover -= n10g; iaq->nirq += n10g; n++; } iaq->nrxq10g = min(n, nrxq10g); #ifdef TCP_OFFLOAD if (is_offload(sc)) iaq->nofldrxq10g = min(n, nofldrxq10g); #endif } if (n1g > 0) { int target = max(nrxq1g, nofldrxq1g); n = 1; while (n < target && leftover >= n1g) { leftover -= n1g; iaq->nirq += n1g; n++; } iaq->nrxq1g = min(n, nrxq1g); #ifdef TCP_OFFLOAD if (is_offload(sc)) iaq->nofldrxq1g = min(n, nofldrxq1g); #endif } if (itype != INTR_MSI || powerof2(iaq->nirq)) goto allocate; } /* * Least desirable option: one interrupt vector for everything. */ iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1; #ifdef TCP_OFFLOAD if (is_offload(sc)) iaq->nofldrxq10g = iaq->nofldrxq1g = 1; #endif allocate: navail = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &navail); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &navail); if (rc == 0) { if (navail == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate (it's in navail). */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, navail); pci_release_msi(sc->dev); goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, navail); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); return (rc); } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); return (EDOOFUS); } /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* We're using whatever's on the card and it's known to be good. */ sc->params.fw_vers = ntohl(card_fw->fw_ver); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); t4_get_tp_version(sc, &sc->params.tp_vers); /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen, i, n; const uint32_t *cfdata; uint32_t param, val, addr, off, mw_base, mw_aperture; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } memwin_info(sc, 2, &mw_base, &mw_aperture); while (cflen) { off = position_memwin(sc, 2, addr); n = min(cflen, mw_aperture - off); for (i = 0; i < n; i += 4) t4_write_reg(sc, mw_base + off + i, *cfdata++); cflen -= n; addr += n; } } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(linkcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; struct fw_devlog_cmd cmd; struct devlog_params *dlog = &sc->params.devlog; param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ bzero(&cmd, sizeof(cmd)); cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); cmd.retval_len16 = htobe32(FW_LEN16(cmd)); rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd); if (rc != 0) { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); bzero(dlog, sizeof (*dlog)); rc = 0; /* devlog isn't critical for device operation */ } else { val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog); dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]); dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4; dlog->size = be32toh(cmd.memsize_devlog); } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(linkcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, " "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "", chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi) { struct ifmedia *media = &pi->media; int data, m; PORT_LOCK(pi); ifmedia_removeall(media); m = IFM_ETHER | IFM_FDX; data = (pi->port_type << 8) | pi->mod_type; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: ifmedia_add(media, m | IFM_10G_T, data, NULL); break; case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, data, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, data, NULL); ifmedia_add(media, m | IFM_100_TX, data, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, data, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, data, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, data, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, data, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, data, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, data, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, data, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, data, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, data, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, data, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, data, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } PORT_UNLOCK(pi); } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ static int update_mac_settings(struct port_info *pi, int flags) { int rc; struct ifnet *ifp = pi->ifp; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; rc = -t4_set_rxmode(sc, sc->mbox, pi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, pi->viid, pi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { pi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i++] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, pi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, pi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, pi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } int begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) pause("t4slptst", 1); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (pi && IS_DOOMED(pi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; int rc = 0; ASSERT_SYNCHRONIZED_OP(sc); if (isset(&sc->open_device_map, pi->port_id)) { KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, ("mismatch between open_device_map and if_drv_flags")); return (0); /* already running */ } if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(pi->flags & PORT_INIT_DONE) && ((rc = port_full_init(pi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(pi, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg); if (rc != 0) { if_printf(ifp, "start_link failed: %d\n", rc); goto done; } rc = -t4_enable_vi(sc, sc->mbox, pi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* all ok */ setbit(&sc->open_device_map, pi->port_id); PORT_LOCK(pi); ifp->if_drv_flags |= IFF_DRV_RUNNING; PORT_UNLOCK(pi); callout_reset(&pi->tick, hz, cxgbe_tick, pi); done: if (rc != 0) cxgbe_uninit_synchronized(pi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; int rc; ASSERT_SYNCHRONIZED_OP(sc); /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } clrbit(&sc->open_device_map, pi->port_id); PORT_LOCK(pi); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->linkdnrc = -1; t4_os_link_changed(sc, pi->port_id, 0, -1); return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ static int setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q; char s[8]; struct irq *irq; struct port_info *pi; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) { KASSERT(!(sc->flags & INTR_DIRECT), ("%s: single interrupt && INTR_DIRECT?", __func__)); rc = t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"); if (rc != 0) return (rc); } else { /* Multiple interrupts. */ KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; /* The second one is always the firmware event queue */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; /* * Note that if INTR_DIRECT is not set then either the NIC rx * queues or (exclusive or) the TOE rx queueus will be taking * direct interrupts. * * There is no need to check for is_offload(sc) as nofldrxq * will be 0 if offload is disabled. */ for_each_port(sc, p) { pi = sc->port[p]; #ifdef TCP_OFFLOAD /* * Skip over the NIC queues if they aren't taking direct * interrupts. */ if (!(sc->flags & INTR_DIRECT) && pi->nofldrxq > pi->nrxq) goto ofld_queues; #endif rxq = &sc->sge.rxq[pi->first_rxq]; for (q = 0; q < pi->nrxq; q++, rxq++) { snprintf(s, sizeof(s), "%d.%d", p, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); irq++; rid++; } #ifdef TCP_OFFLOAD /* * Skip over the offload queues if they aren't taking * direct interrupts. */ if (!(sc->flags & INTR_DIRECT)) continue; ofld_queues: ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; for (q = 0; q < pi->nofldrxq; q++, ofld_rxq++) { snprintf(s, sizeof(s), "%d,%d", p, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; } #endif } } return (0); } static int adapter_full_init(struct adapter *sc) { int rc, i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } static int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } static int port_full_init(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j; ASSERT_SYNCHRONIZED_OP(sc); KASSERT((pi->flags & PORT_INIT_DONE) == 0, ("%s: PORT_INIT_DONE already", __func__)); sysctl_ctx_init(&pi->ctx); pi->flags |= PORT_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this port. */ rc = t4_setup_port_queues(pi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this port. Save a copy of the RSS table for later use. */ rss = malloc(pi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < pi->rss_size;) { for_each_rxq(pi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == pi->rss_size) break; } } rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0, pi->rss_size, rss, pi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } pi->rss = rss; pi->flags |= PORT_INIT_DONE; done: if (rc != 0) port_full_uninit(pi); return (rc); } /* * Idempotent. */ static int port_full_uninit(struct port_info *pi) { struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (pi->flags & PORT_INIT_DONE) { /* Need to quiesce queues. XXX: ctrl queues? */ for_each_txq(pi, i, txq) { quiesce_eq(sc, &txq->eq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(pi, i, ofld_txq) { quiesce_eq(sc, &ofld_txq->eq); } #endif for_each_rxq(pi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(pi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(pi->rss, M_CXGBE); } t4_teardown_port_queues(pi); pi->flags &= ~PORT_INIT_DONE; return (0); } static void quiesce_eq(struct adapter *sc, struct sge_eq *eq) { EQ_LOCK(eq); eq->flags |= EQ_DOOMED; /* * Wait for the response to a credit flush if one's * pending. */ while (eq->flags & EQ_CRFLUSHED) mtx_sleep(eq, &eq->eq_lock, 0, "crflush", 0); EQ_UNLOCK(eq); callout_drain(&eq->tx_callout); /* XXX: iffy */ pause("callout", 10); /* Still iffy */ taskqueue_drain(sc->tq[eq->tx_chan], &eq->tx_task); } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); mtx_unlock(&sc->sfl_lock); callout_drain(&sc->sfl_callout); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void reg_block_dump(struct adapter *sc, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)(buf + start); for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t4_read_reg(sc, start); } static void t4_get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { int i, n; const unsigned int *reg_ranges; static const unsigned int t4_reg_ranges[] = { 0x1008, 0x1108, 0x1180, 0x11b4, 0x11fc, 0x123c, 0x1300, 0x173c, 0x1800, 0x18fc, 0x3000, 0x30d8, 0x30e0, 0x5924, 0x5960, 0x59d4, 0x5a00, 0x5af8, 0x6000, 0x6098, 0x6100, 0x6150, 0x6200, 0x6208, 0x6240, 0x6248, 0x6280, 0x6338, 0x6370, 0x638c, 0x6400, 0x643c, 0x6500, 0x6524, 0x6a00, 0x6a38, 0x6a60, 0x6a78, 0x6b00, 0x6b84, 0x6bf0, 0x6c84, 0x6cf0, 0x6d84, 0x6df0, 0x6e84, 0x6ef0, 0x6f84, 0x6ff0, 0x7084, 0x70f0, 0x7184, 0x71f0, 0x7284, 0x72f0, 0x7384, 0x73f0, 0x7450, 0x7500, 0x7530, 0x7600, 0x761c, 0x7680, 0x76cc, 0x7700, 0x7798, 0x77c0, 0x77fc, 0x7900, 0x79fc, 0x7b00, 0x7c38, 0x7d00, 0x7efc, 0x8dc0, 0x8e1c, 0x8e30, 0x8e78, 0x8ea0, 0x8f6c, 0x8fc0, 0x9074, 0x90fc, 0x90fc, 0x9400, 0x9458, 0x9600, 0x96bc, 0x9800, 0x9808, 0x9820, 0x983c, 0x9850, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, 0x9d80, 0x9dec, 0x9e00, 0x9e6c, 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0x9fec, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0xea7c, 0xf000, 0x11110, 0x11118, 0x11190, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x19124, 0x19150, 0x191b0, 0x191d0, 0x191e8, 0x19238, 0x1924c, 0x193f8, 0x19474, 0x19490, 0x194f8, 0x19800, 0x19f30, 0x1a000, 0x1a06c, 0x1a0b0, 0x1a120, 0x1a128, 0x1a138, 0x1a190, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e040, 0x1e04c, 0x1e284, 0x1e28c, 0x1e2c0, 0x1e2c0, 0x1e2e0, 0x1e2e0, 0x1e300, 0x1e384, 0x1e3c0, 0x1e3c8, 0x1e440, 0x1e44c, 0x1e684, 0x1e68c, 0x1e6c0, 0x1e6c0, 0x1e6e0, 0x1e6e0, 0x1e700, 0x1e784, 0x1e7c0, 0x1e7c8, 0x1e840, 0x1e84c, 0x1ea84, 0x1ea8c, 0x1eac0, 0x1eac0, 0x1eae0, 0x1eae0, 0x1eb00, 0x1eb84, 0x1ebc0, 0x1ebc8, 0x1ec40, 0x1ec4c, 0x1ee84, 0x1ee8c, 0x1eec0, 0x1eec0, 0x1eee0, 0x1eee0, 0x1ef00, 0x1ef84, 0x1efc0, 0x1efc8, 0x1f040, 0x1f04c, 0x1f284, 0x1f28c, 0x1f2c0, 0x1f2c0, 0x1f2e0, 0x1f2e0, 0x1f300, 0x1f384, 0x1f3c0, 0x1f3c8, 0x1f440, 0x1f44c, 0x1f684, 0x1f68c, 0x1f6c0, 0x1f6c0, 0x1f6e0, 0x1f6e0, 0x1f700, 0x1f784, 0x1f7c0, 0x1f7c8, 0x1f840, 0x1f84c, 0x1fa84, 0x1fa8c, 0x1fac0, 0x1fac0, 0x1fae0, 0x1fae0, 0x1fb00, 0x1fb84, 0x1fbc0, 0x1fbc8, 0x1fc40, 0x1fc4c, 0x1fe84, 0x1fe8c, 0x1fec0, 0x1fec0, 0x1fee0, 0x1fee0, 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x20000, 0x2002c, 0x20100, 0x2013c, 0x20190, 0x201c8, 0x20200, 0x20318, 0x20400, 0x20528, 0x20540, 0x20614, 0x21000, 0x21040, 0x2104c, 0x21060, 0x210c0, 0x210ec, 0x21200, 0x21268, 0x21270, 0x21284, 0x212fc, 0x21388, 0x21400, 0x21404, 0x21500, 0x21518, 0x2152c, 0x2153c, 0x21550, 0x21554, 0x21600, 0x21600, 0x21608, 0x21628, 0x21630, 0x2163c, 0x21700, 0x2171c, 0x21780, 0x2178c, 0x21800, 0x21c38, 0x21c80, 0x21d7c, 0x21e00, 0x21e04, 0x22000, 0x2202c, 0x22100, 0x2213c, 0x22190, 0x221c8, 0x22200, 0x22318, 0x22400, 0x22528, 0x22540, 0x22614, 0x23000, 0x23040, 0x2304c, 0x23060, 0x230c0, 0x230ec, 0x23200, 0x23268, 0x23270, 0x23284, 0x232fc, 0x23388, 0x23400, 0x23404, 0x23500, 0x23518, 0x2352c, 0x2353c, 0x23550, 0x23554, 0x23600, 0x23600, 0x23608, 0x23628, 0x23630, 0x2363c, 0x23700, 0x2371c, 0x23780, 0x2378c, 0x23800, 0x23c38, 0x23c80, 0x23d7c, 0x23e00, 0x23e04, 0x24000, 0x2402c, 0x24100, 0x2413c, 0x24190, 0x241c8, 0x24200, 0x24318, 0x24400, 0x24528, 0x24540, 0x24614, 0x25000, 0x25040, 0x2504c, 0x25060, 0x250c0, 0x250ec, 0x25200, 0x25268, 0x25270, 0x25284, 0x252fc, 0x25388, 0x25400, 0x25404, 0x25500, 0x25518, 0x2552c, 0x2553c, 0x25550, 0x25554, 0x25600, 0x25600, 0x25608, 0x25628, 0x25630, 0x2563c, 0x25700, 0x2571c, 0x25780, 0x2578c, 0x25800, 0x25c38, 0x25c80, 0x25d7c, 0x25e00, 0x25e04, 0x26000, 0x2602c, 0x26100, 0x2613c, 0x26190, 0x261c8, 0x26200, 0x26318, 0x26400, 0x26528, 0x26540, 0x26614, 0x27000, 0x27040, 0x2704c, 0x27060, 0x270c0, 0x270ec, 0x27200, 0x27268, 0x27270, 0x27284, 0x272fc, 0x27388, 0x27400, 0x27404, 0x27500, 0x27518, 0x2752c, 0x2753c, 0x27550, 0x27554, 0x27600, 0x27600, 0x27608, 0x27628, 0x27630, 0x2763c, 0x27700, 0x2771c, 0x27780, 0x2778c, 0x27800, 0x27c38, 0x27c80, 0x27d7c, 0x27e00, 0x27e04 }; static const unsigned int t5_reg_ranges[] = { 0x1008, 0x1148, 0x1180, 0x11b4, 0x11fc, 0x123c, 0x1280, 0x173c, 0x1800, 0x18fc, 0x3000, 0x3028, 0x3060, 0x30d8, 0x30e0, 0x30fc, 0x3140, 0x357c, 0x35a8, 0x35cc, 0x35ec, 0x35ec, 0x3600, 0x5624, 0x56cc, 0x575c, 0x580c, 0x5814, 0x5890, 0x58bc, 0x5940, 0x59dc, 0x59fc, 0x5a18, 0x5a60, 0x5a9c, 0x5b94, 0x5bfc, 0x6000, 0x6040, 0x6058, 0x614c, 0x7700, 0x7798, 0x77c0, 0x78fc, 0x7b00, 0x7c54, 0x7d00, 0x7efc, 0x8dc0, 0x8de0, 0x8df8, 0x8e84, 0x8ea0, 0x8f84, 0x8fc0, 0x90f8, 0x9400, 0x9470, 0x9600, 0x96f4, 0x9800, 0x9808, 0x9820, 0x983c, 0x9850, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, 0x9d80, 0x9dec, 0x9e00, 0x9e6c, 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0xa020, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0x11088, 0x1109c, 0x11110, 0x11118, 0x1117c, 0x11190, 0x11204, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x19124, 0x19150, 0x191b0, 0x191d0, 0x191e8, 0x19238, 0x19290, 0x193f8, 0x19474, 0x19490, 0x194cc, 0x194f0, 0x194f8, 0x19c00, 0x19c60, 0x19c94, 0x19e10, 0x19e50, 0x19f34, 0x19f40, 0x19f50, 0x19f90, 0x19fe4, 0x1a000, 0x1a06c, 0x1a0b0, 0x1a120, 0x1a128, 0x1a138, 0x1a190, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e008, 0x1e00c, 0x1e040, 0x1e04c, 0x1e284, 0x1e290, 0x1e2c0, 0x1e2c0, 0x1e2e0, 0x1e2e0, 0x1e300, 0x1e384, 0x1e3c0, 0x1e3c8, 0x1e408, 0x1e40c, 0x1e440, 0x1e44c, 0x1e684, 0x1e690, 0x1e6c0, 0x1e6c0, 0x1e6e0, 0x1e6e0, 0x1e700, 0x1e784, 0x1e7c0, 0x1e7c8, 0x1e808, 0x1e80c, 0x1e840, 0x1e84c, 0x1ea84, 0x1ea90, 0x1eac0, 0x1eac0, 0x1eae0, 0x1eae0, 0x1eb00, 0x1eb84, 0x1ebc0, 0x1ebc8, 0x1ec08, 0x1ec0c, 0x1ec40, 0x1ec4c, 0x1ee84, 0x1ee90, 0x1eec0, 0x1eec0, 0x1eee0, 0x1eee0, 0x1ef00, 0x1ef84, 0x1efc0, 0x1efc8, 0x1f008, 0x1f00c, 0x1f040, 0x1f04c, 0x1f284, 0x1f290, 0x1f2c0, 0x1f2c0, 0x1f2e0, 0x1f2e0, 0x1f300, 0x1f384, 0x1f3c0, 0x1f3c8, 0x1f408, 0x1f40c, 0x1f440, 0x1f44c, 0x1f684, 0x1f690, 0x1f6c0, 0x1f6c0, 0x1f6e0, 0x1f6e0, 0x1f700, 0x1f784, 0x1f7c0, 0x1f7c8, 0x1f808, 0x1f80c, 0x1f840, 0x1f84c, 0x1fa84, 0x1fa90, 0x1fac0, 0x1fac0, 0x1fae0, 0x1fae0, 0x1fb00, 0x1fb84, 0x1fbc0, 0x1fbc8, 0x1fc08, 0x1fc0c, 0x1fc40, 0x1fc4c, 0x1fe84, 0x1fe90, 0x1fec0, 0x1fec0, 0x1fee0, 0x1fee0, 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x30000, 0x30030, 0x30100, 0x30144, 0x30190, 0x301d0, 0x30200, 0x30318, 0x30400, 0x3052c, 0x30540, 0x3061c, 0x30800, 0x30834, 0x308c0, 0x30908, 0x30910, 0x309ac, 0x30a00, 0x30a2c, 0x30a44, 0x30a50, 0x30a74, 0x30c24, 0x30d00, 0x30d00, 0x30d08, 0x30d14, 0x30d1c, 0x30d20, 0x30d3c, 0x30d50, 0x31200, 0x3120c, 0x31220, 0x31220, 0x31240, 0x31240, 0x31600, 0x3160c, 0x31a00, 0x31a1c, 0x31e00, 0x31e20, 0x31e38, 0x31e3c, 0x31e80, 0x31e80, 0x31e88, 0x31ea8, 0x31eb0, 0x31eb4, 0x31ec8, 0x31ed4, 0x31fb8, 0x32004, 0x32200, 0x32200, 0x32208, 0x32240, 0x32248, 0x32280, 0x32288, 0x322c0, 0x322c8, 0x322fc, 0x32600, 0x32630, 0x32a00, 0x32abc, 0x32b00, 0x32b70, 0x33000, 0x33048, 0x33060, 0x3309c, 0x330f0, 0x33148, 0x33160, 0x3319c, 0x331f0, 0x332e4, 0x332f8, 0x333e4, 0x333f8, 0x33448, 0x33460, 0x3349c, 0x334f0, 0x33548, 0x33560, 0x3359c, 0x335f0, 0x336e4, 0x336f8, 0x337e4, 0x337f8, 0x337fc, 0x33814, 0x33814, 0x3382c, 0x3382c, 0x33880, 0x3388c, 0x338e8, 0x338ec, 0x33900, 0x33948, 0x33960, 0x3399c, 0x339f0, 0x33ae4, 0x33af8, 0x33b10, 0x33b28, 0x33b28, 0x33b3c, 0x33b50, 0x33bf0, 0x33c10, 0x33c28, 0x33c28, 0x33c3c, 0x33c50, 0x33cf0, 0x33cfc, 0x34000, 0x34030, 0x34100, 0x34144, 0x34190, 0x341d0, 0x34200, 0x34318, 0x34400, 0x3452c, 0x34540, 0x3461c, 0x34800, 0x34834, 0x348c0, 0x34908, 0x34910, 0x349ac, 0x34a00, 0x34a2c, 0x34a44, 0x34a50, 0x34a74, 0x34c24, 0x34d00, 0x34d00, 0x34d08, 0x34d14, 0x34d1c, 0x34d20, 0x34d3c, 0x34d50, 0x35200, 0x3520c, 0x35220, 0x35220, 0x35240, 0x35240, 0x35600, 0x3560c, 0x35a00, 0x35a1c, 0x35e00, 0x35e20, 0x35e38, 0x35e3c, 0x35e80, 0x35e80, 0x35e88, 0x35ea8, 0x35eb0, 0x35eb4, 0x35ec8, 0x35ed4, 0x35fb8, 0x36004, 0x36200, 0x36200, 0x36208, 0x36240, 0x36248, 0x36280, 0x36288, 0x362c0, 0x362c8, 0x362fc, 0x36600, 0x36630, 0x36a00, 0x36abc, 0x36b00, 0x36b70, 0x37000, 0x37048, 0x37060, 0x3709c, 0x370f0, 0x37148, 0x37160, 0x3719c, 0x371f0, 0x372e4, 0x372f8, 0x373e4, 0x373f8, 0x37448, 0x37460, 0x3749c, 0x374f0, 0x37548, 0x37560, 0x3759c, 0x375f0, 0x376e4, 0x376f8, 0x377e4, 0x377f8, 0x377fc, 0x37814, 0x37814, 0x3782c, 0x3782c, 0x37880, 0x3788c, 0x378e8, 0x378ec, 0x37900, 0x37948, 0x37960, 0x3799c, 0x379f0, 0x37ae4, 0x37af8, 0x37b10, 0x37b28, 0x37b28, 0x37b3c, 0x37b50, 0x37bf0, 0x37c10, 0x37c28, 0x37c28, 0x37c3c, 0x37c50, 0x37cf0, 0x37cfc, 0x38000, 0x38030, 0x38100, 0x38144, 0x38190, 0x381d0, 0x38200, 0x38318, 0x38400, 0x3852c, 0x38540, 0x3861c, 0x38800, 0x38834, 0x388c0, 0x38908, 0x38910, 0x389ac, 0x38a00, 0x38a2c, 0x38a44, 0x38a50, 0x38a74, 0x38c24, 0x38d00, 0x38d00, 0x38d08, 0x38d14, 0x38d1c, 0x38d20, 0x38d3c, 0x38d50, 0x39200, 0x3920c, 0x39220, 0x39220, 0x39240, 0x39240, 0x39600, 0x3960c, 0x39a00, 0x39a1c, 0x39e00, 0x39e20, 0x39e38, 0x39e3c, 0x39e80, 0x39e80, 0x39e88, 0x39ea8, 0x39eb0, 0x39eb4, 0x39ec8, 0x39ed4, 0x39fb8, 0x3a004, 0x3a200, 0x3a200, 0x3a208, 0x3a240, 0x3a248, 0x3a280, 0x3a288, 0x3a2c0, 0x3a2c8, 0x3a2fc, 0x3a600, 0x3a630, 0x3aa00, 0x3aabc, 0x3ab00, 0x3ab70, 0x3b000, 0x3b048, 0x3b060, 0x3b09c, 0x3b0f0, 0x3b148, 0x3b160, 0x3b19c, 0x3b1f0, 0x3b2e4, 0x3b2f8, 0x3b3e4, 0x3b3f8, 0x3b448, 0x3b460, 0x3b49c, 0x3b4f0, 0x3b548, 0x3b560, 0x3b59c, 0x3b5f0, 0x3b6e4, 0x3b6f8, 0x3b7e4, 0x3b7f8, 0x3b7fc, 0x3b814, 0x3b814, 0x3b82c, 0x3b82c, 0x3b880, 0x3b88c, 0x3b8e8, 0x3b8ec, 0x3b900, 0x3b948, 0x3b960, 0x3b99c, 0x3b9f0, 0x3bae4, 0x3baf8, 0x3bb10, 0x3bb28, 0x3bb28, 0x3bb3c, 0x3bb50, 0x3bbf0, 0x3bc10, 0x3bc28, 0x3bc28, 0x3bc3c, 0x3bc50, 0x3bcf0, 0x3bcfc, 0x3c000, 0x3c030, 0x3c100, 0x3c144, 0x3c190, 0x3c1d0, 0x3c200, 0x3c318, 0x3c400, 0x3c52c, 0x3c540, 0x3c61c, 0x3c800, 0x3c834, 0x3c8c0, 0x3c908, 0x3c910, 0x3c9ac, 0x3ca00, 0x3ca2c, 0x3ca44, 0x3ca50, 0x3ca74, 0x3cc24, 0x3cd00, 0x3cd00, 0x3cd08, 0x3cd14, 0x3cd1c, 0x3cd20, 0x3cd3c, 0x3cd50, 0x3d200, 0x3d20c, 0x3d220, 0x3d220, 0x3d240, 0x3d240, 0x3d600, 0x3d60c, 0x3da00, 0x3da1c, 0x3de00, 0x3de20, 0x3de38, 0x3de3c, 0x3de80, 0x3de80, 0x3de88, 0x3dea8, 0x3deb0, 0x3deb4, 0x3dec8, 0x3ded4, 0x3dfb8, 0x3e004, 0x3e200, 0x3e200, 0x3e208, 0x3e240, 0x3e248, 0x3e280, 0x3e288, 0x3e2c0, 0x3e2c8, 0x3e2fc, 0x3e600, 0x3e630, 0x3ea00, 0x3eabc, 0x3eb00, 0x3eb70, 0x3f000, 0x3f048, 0x3f060, 0x3f09c, 0x3f0f0, 0x3f148, 0x3f160, 0x3f19c, 0x3f1f0, 0x3f2e4, 0x3f2f8, 0x3f3e4, 0x3f3f8, 0x3f448, 0x3f460, 0x3f49c, 0x3f4f0, 0x3f548, 0x3f560, 0x3f59c, 0x3f5f0, 0x3f6e4, 0x3f6f8, 0x3f7e4, 0x3f7f8, 0x3f7fc, 0x3f814, 0x3f814, 0x3f82c, 0x3f82c, 0x3f880, 0x3f88c, 0x3f8e8, 0x3f8ec, 0x3f900, 0x3f948, 0x3f960, 0x3f99c, 0x3f9f0, 0x3fae4, 0x3faf8, 0x3fb10, 0x3fb28, 0x3fb28, 0x3fb3c, 0x3fb50, 0x3fbf0, 0x3fc10, 0x3fc28, 0x3fc28, 0x3fc3c, 0x3fc50, 0x3fcf0, 0x3fcfc, 0x40000, 0x4000c, 0x40040, 0x40068, 0x4007c, 0x40144, 0x40180, 0x4018c, 0x40200, 0x40298, 0x402ac, 0x4033c, 0x403f8, 0x403fc, 0x41304, 0x413c4, 0x41400, 0x4141c, 0x41480, 0x414d0, 0x44000, 0x44078, 0x440c0, 0x44278, 0x442c0, 0x44478, 0x444c0, 0x44678, 0x446c0, 0x44878, 0x448c0, 0x449fc, 0x45000, 0x45068, 0x45080, 0x45084, 0x450a0, 0x450b0, 0x45200, 0x45268, 0x45280, 0x45284, 0x452a0, 0x452b0, 0x460c0, 0x460e4, 0x47000, 0x4708c, 0x47200, 0x47250, 0x47400, 0x47420, 0x47600, 0x47618, 0x47800, 0x47814, 0x48000, 0x4800c, 0x48040, 0x48068, 0x4807c, 0x48144, 0x48180, 0x4818c, 0x48200, 0x48298, 0x482ac, 0x4833c, 0x483f8, 0x483fc, 0x49304, 0x493c4, 0x49400, 0x4941c, 0x49480, 0x494d0, 0x4c000, 0x4c078, 0x4c0c0, 0x4c278, 0x4c2c0, 0x4c478, 0x4c4c0, 0x4c678, 0x4c6c0, 0x4c878, 0x4c8c0, 0x4c9fc, 0x4d000, 0x4d068, 0x4d080, 0x4d084, 0x4d0a0, 0x4d0b0, 0x4d200, 0x4d268, 0x4d280, 0x4d284, 0x4d2a0, 0x4d2b0, 0x4e0c0, 0x4e0e4, 0x4f000, 0x4f08c, 0x4f200, 0x4f250, 0x4f400, 0x4f420, 0x4f600, 0x4f618, 0x4f800, 0x4f814, 0x50000, 0x500cc, 0x50400, 0x50400, 0x50800, 0x508cc, 0x50c00, 0x50c00, 0x51000, 0x5101c, 0x51300, 0x51308, }; if (is_t4(sc)) { reg_ranges = &t4_reg_ranges[0]; n = nitems(t4_reg_ranges); } else { reg_ranges = &t5_reg_ranges[0]; n = nitems(t5_reg_ranges); } regs->version = chip_id(sc) | chip_rev(sc) << 10; for (i = 0; i < n; i += 2) reg_block_dump(sc, buf, reg_ranges[i], reg_ranges[i + 1]); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; struct sge_txq *txq; int i, drops; struct port_stats *s = &pi->stats; PORT_LOCK(pi); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return; /* without scheduling another callout */ } t4_get_port_stats(sc, pi->tx_chan, s); ifp->if_opackets = s->tx_frames - s->tx_pause; ifp->if_ipackets = s->rx_frames - s->rx_pause; ifp->if_obytes = s->tx_octets - s->tx_pause * 64; ifp->if_ibytes = s->rx_octets - s->rx_pause * 64; ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause; ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause; ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3; for (i = 0; i < 4; i++) { if (pi->rx_chan_map & (1 << i)) { uint32_t v; /* * XXX: indirect reads from the same ADDR/DATA pair can * race with each other. */ t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); ifp->if_iqdrops += v; } } drops = s->tx_drop; for_each_txq(pi, i, txq) drops += txq->br->br_drops; ifp->if_snd.ifq_drops = drops; ifp->if_oerrors = s->tx_error_frames; ifp->if_ierrors = s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err; callout_schedule(&pi->tick, hz); PORT_UNLOCK(pi); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } static int cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { #ifdef INVARIANTS panic("%s: opcode 0x%02x on iq %p with payload %p", __func__, rss->opcode, iq, m); #else log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n", __func__, rss->opcode, iq, m); m_freem(m); #endif return (EDOOFUS); } int t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= nitems(sc->cpl_handler)) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &sc->cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } static int an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl) { #ifdef INVARIANTS panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl); #else log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n", __func__, iq, ctrl); #endif return (EDOOFUS); } int t4_register_an_handler(struct adapter *sc, an_handler_t h) { uintptr_t *loc, new; new = h ? (uintptr_t)h : (uintptr_t)an_not_handled; loc = (uintptr_t *) &sc->an_handler; atomic_store_rel_ptr(loc, new); return (0); } static int fw_msg_not_handled(struct adapter *sc, const __be64 *rpl) { const struct cpl_fw6_msg *cpl = __containerof(rpl, struct cpl_fw6_msg, data[0]); #ifdef INVARIANTS panic("%s: fw_msg type %d", __func__, cpl->type); #else log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type); #endif return (EDOOFUS); } int t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h) { uintptr_t *loc, new; if (type >= nitems(sc->fw_msg_handler)) return (EINVAL); /* * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL * handler dispatch table. Reject any attempt to install a handler for * this subtype. */ if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled; loc = (uintptr_t *) &sc->fw_msg_handler[type]; atomic_store_rel_ptr(loc, new); return (0); } static int t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *caps[] = { "\20\1PPP\2QFC\3DCBX", /* caps[0] linkcaps */ "\20\1NIC\2VM\3IDS\4UM\5UM_ISGL" /* caps[1] niccaps */ "\6HASHFILTER\7ETHOFLD", "\20\1TOE", /* caps[2] toecaps */ "\20\1RDDP\2RDMAC", /* caps[3] rdmacaps */ "\20\1INITIATOR_PDU\2TARGET_PDU" /* caps[4] iscsicaps */ "\3INITIATOR_CNXOFLD\4TARGET_CNXOFLD" "\5INITIATOR_SSNOFLD\6TARGET_SSNOFLD", "\20\1INITIATOR\2TARGET\3CTRL_OFLD" /* caps[5] fcoecaps */ "\4PO_INITIAOR\5PO_TARGET" }; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", - CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); + CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", - CTLFLAG_RD, &sc->cfg_file, 0, "configuration file"); + CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkcaps", CTLTYPE_STRING | CTLFLAG_RD, caps[0], sc->linkcaps, sysctl_bitfield, "A", "available link capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "niccaps", CTLTYPE_STRING | CTLFLAG_RD, caps[1], sc->niccaps, sysctl_bitfield, "A", "available NIC capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "toecaps", CTLTYPE_STRING | CTLFLAG_RD, caps[2], sc->toecaps, sysctl_bitfield, "A", "available TCP offload capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdmacaps", CTLTYPE_STRING | CTLFLAG_RD, caps[3], sc->rdmacaps, sysctl_bitfield, "A", "available RDMA capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "iscsicaps", CTLTYPE_STRING | CTLFLAG_RD, caps[4], sc->iscsicaps, sysctl_bitfield, "A", "available iSCSI capabilities"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoecaps", CTLTYPE_STRING | CTLFLAG_RD, caps[5], sc->fcoecaps, sysctl_bitfield, "A", "available FCoE capabilities"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->sge.timer_val, sizeof(sc->sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->sge.counter_val, sizeof(sc->sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); t4_sge_sysctls(sc, ctx, children); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_la, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (is_t5(sc)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_mps_tcam, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (is_t5(sc)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5)); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW, &sc->tt.indsz, 0, "DDP max indicate size allowed"); sc->tt.ddp_thres = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW, &sc->tt.ddp_thres, 0, "DDP threshold"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); } #endif return (0); } static int cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; struct adapter *sc = pi->adapter; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &pi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &pi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &pi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &pi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); #ifdef TCP_OFFLOAD if (is_offload(sc)) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &pi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &pi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &pi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &pi->first_ofld_txq, 0, "index of first TOE tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_txq, "I", "tx queue size"); /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by upto 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT return (0); } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i; struct sbuf sb; sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); for (i = arg1; arg2; arg2 -= sizeof(int), i++) sbuf_printf(&sb, "%d ", *i); sbuf_trim(&sb); sbuf_finish(&sb); rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int rc, val; val = pi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (pi->ntxq > 1)) pi->rsrv_noflowq = 1; else pi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int idx, rc, i; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif uint8_t v; idx = pi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1); for_each_rxq(pi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } #ifdef TCP_OFFLOAD for_each_ofld_rxq(pi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } #endif pi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int idx, rc; idx = pi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (pi->flags & PORT_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else pi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int qsize, rc; qsize = pi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (pi->flags & PORT_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else pi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int qsize, rc; qsize = pi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); /* bufring size must be powerof2 */ if (qsize < 128 || !powerof2(qsize)) return (EINVAL); rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (pi->flags & PORT_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else pi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = is_t4(sc) ? CIM_NUM_OBQ : CIM_NUM_OBQ_T5; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); KASSERT((sc->params.cim_la_size & 7) == 0, ("%s: p will walk off the end of buf", __func__)); for (p = buf; p < &buf[sc->params.cim_la_size]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; if (is_t4(sc)) { cim_num_obq = CIM_NUM_OBQ; ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { cim_num_obq = CIM_NUM_OBQ_T5; ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_cpl_stats(sc, &stats); sbuf_printf(sb, " channel 0 channel 1 channel 2 " "channel 3\n"); sbuf_printf(sb, "CPL requests: %10u %10u %10u %10u\n", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "CPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } const char *devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; const char *devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE" }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0, m; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { dparams->memtype = FW_MEMTYPE_EDC0; dparams->start = 0x84000; dparams->size = 32768; } nentries = dparams->size / sizeof(struct fw_devlog_e); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); m = fwmtype_to_hwmtype(dparams->memtype); rc = -t4_mem_read(sc, m, dparams->start, dparams->size, (void *)buf); if (rc != 0) goto done; for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[4]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_fcoe_stats(sc, 0, &stats[0]); t4_get_fcoe_stats(sc, 1, &stats[1]); t4_get_fcoe_stats(sc, 2, &stats[2]); t4_get_fcoe_stats(sc, 3, &stats[3]); sbuf_printf(sb, " channel 0 channel 1 " "channel 2 channel 3\n"); sbuf_printf(sb, "octetsDDP: %16ju %16ju %16ju %16ju\n", stats[0].octetsDDP, stats[1].octetsDDP, stats[2].octetsDDP, stats[3].octetsDDP); sbuf_printf(sb, "framesDDP: %16u %16u %16u %16u\n", stats[0].framesDDP, stats[1].framesDDP, stats[2].framesDDP, stats[3].framesDDP); sbuf_printf(sb, "framesDrop: %16u %16u %16u %16u", stats[0].framesDrop, stats[1].framesDrop, stats[2].framesDrop, stats[3].framesDrop); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < 4; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct sbuf *sb; static const char *linkdnreasons[] = { "non-specific", "remote fault", "autoneg failed", "reserved3", "PHY overheated", "unknown", "rx los", "reserved7" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (pi->linkdnrc < 0) sbuf_printf(sb, "n/a"); else if (pi->linkdnrc < nitems(linkdnreasons)) sbuf_printf(sb, "%s", linkdnreasons[pi->linkdnrc]); else sbuf_printf(sb, "%d", pi->linkdnrc); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:" }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t4(sc) ? 2 : 3; /* Call it MC for T4 */ i++; } if (!is_t4(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); md->limit = (sc->tids.ntids - hi) * 16 + md->base - 1; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc) && t4_read_reg(sc, A_SGE_CONTROL2) & F_VFIFO_ENABLE) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (G_DBVFIFO_SIZE((t4_read_reg(sc, A_SGE_DBVFIFO_SIZE))) << 2) - 1; } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = md->base + sc->tids.ntids - 1; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = md->base + sc->tids.ntids - 1; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t4(sc)) { used = G_USED(lo); alloc = G_ALLOC(lo); } else { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < 4; i++) { lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t4(sc)) { used = G_USED(lo); alloc = G_ALLOC(lo); } else { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); n = is_t4(sc) ? NUM_MPS_CLS_SRAM_L_INSTANCES : NUM_MPS_T5_CLS_SRAM_L_INSTANCES; for (i = 0; i < n; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.fid_ctl = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_CTL(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, " ------------ error %3u ------------", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc127_96), be32toh(ldst_cmd.u.mps.rplc95_64), be32toh(ldst_cmd.u.mps.rplc63_32), be32toh(ldst_cmd.u.mps.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t cnt[PM_NSTATS]; uint64_t cyc[PM_NSTATS]; static const char *rx_stats[] = { "Read:", "Write bypass:", "Write mem:", "Flush:" }; static const char *tx_stats[] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, cnt, cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < ARRAY_SIZE(tx_stats); i++) sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], cnt[i], cyc[i]); t4_pmrx_get_stats(sc, cnt, cyc); sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < ARRAY_SIZE(rx_stats); i++) sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], cnt[i], cyc[i]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_rdma_stats(sc, &stats); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_tcp_stats(sc, &v4, &v6); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcpOutRsts, v6.tcpOutRsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcpInSegs, v6.tcpInSegs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcpOutSegs, v6.tcpOutSegs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcpRetransSegs, v6.tcpRetransSegs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; if (b) { sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1, t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } else { sbuf_printf(sb, "TID range: %u-%u", t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } } else sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_tp_get_err_stats(sc, &stats); sbuf_printf(sb, " channel 0 channel 1 channel 2 " "channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.macInErrs[0], stats.macInErrs[1], stats.macInErrs[2], stats.macInErrs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdrInErrs[0], stats.hdrInErrs[1], stats.hdrInErrs[2], stats.hdrInErrs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcpInErrs[0], stats.tcpInErrs[1], stats.tcpInErrs[2], stats.tcpInErrs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6InErrs[0], stats.tcp6InErrs[1], stats.tcp6InErrs[2], stats.tcp6InErrs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnlCongDrops[0], stats.tnlCongDrops[1], stats.tnlCongDrops[2], stats.tnlCongDrops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnlTxDrops[0], stats.tnlTxDrops[1], stats.tnlTxDrops[2], stats.tnlTxDrops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofldVlanDrops[0], stats.ofldVlanDrops[1], stats.ofldVlanDrops[2], stats.ofldVlanDrops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofldChanDrops[0], stats.ofldChanDrops[1], stats.ofldChanDrops[2], stats.ofldChanDrops[3]); sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofldNoNeigh, stats.ofldCongDefer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[NCHAN], orate[NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); sbuf_printf(sb, " channel 0 channel 1 channel 2 " "channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { if (G_STATMODE(v) == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (G_STATMODE(v) == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif static inline void txq_start(struct ifnet *ifp, struct sge_txq *txq) { struct buf_ring *br; struct mbuf *m; TXQ_LOCK_ASSERT_OWNED(txq); br = txq->br; m = txq->m ? txq->m : drbr_dequeue(ifp, br); if (m) t4_eth_tx(ifp, txq, m); } void t4_tx_callout(void *arg) { struct sge_eq *eq = arg; struct adapter *sc; if (EQ_TRYLOCK(eq) == 0) goto reschedule; if (eq->flags & EQ_STALLED && !can_resume_tx(eq)) { EQ_UNLOCK(eq); reschedule: if (__predict_true(!(eq->flags && EQ_DOOMED))) callout_schedule(&eq->tx_callout, 1); return; } EQ_LOCK_ASSERT_OWNED(eq); if (__predict_true((eq->flags & EQ_DOOMED) == 0)) { if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) { struct sge_txq *txq = arg; struct port_info *pi = txq->ifp->if_softc; sc = pi->adapter; } else { struct sge_wrq *wrq = arg; sc = wrq->adapter; } taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task); } EQ_UNLOCK(eq); } void t4_tx_task(void *arg, int count) { struct sge_eq *eq = arg; EQ_LOCK(eq); if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) { struct sge_txq *txq = arg; txq_start(txq->ifp, txq); } else { struct sge_wrq *wrq = arg; t4_wrq_tx_locked(wrq->adapter, wrq, NULL); } EQ_UNLOCK(eq); } static uint32_t fconf_to_mode(uint32_t fconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) mode |= T4_FILTER_VNIC; if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t fspec_to_fconf(struct t4_filter_specification *fs) { uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.vnic_vld || fs->mask.vnic_vld) fconf |= F_VNIC_ID; if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; return (fconf); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { int rc; uint32_t fconf; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getfm"); if (rc) return (rc); t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, A_TP_VLAN_PRI_MAP); if (sc->params.tp.vlan_pri_map != fconf) { log(LOG_WARNING, "%s: cached filter mode out of sync %x %x.\n", device_get_nameunit(sc->dev), sc->params.tp.vlan_pri_map, fconf); sc->params.tp.vlan_pri_map = fconf; } *mode = fconf_to_mode(sc->params.tp.vlan_pri_map); end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { uint32_t fconf; int rc; fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (sc->offload_map) { rc = EBUSY; goto done; } #endif #ifdef notyet rc = -t4_set_filter_mode(sc, fconf); if (rc == 0) sc->filter_mode = fconf; #else rc = ENOTSUP; #endif done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t fid) { uint32_t mw_base, off, tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); uint64_t hits; memwin_info(sc, 0, &mw_base, NULL); off = position_memwin(sc, 0, tcb_base + (fid + sc->tids.ftid_base) * TCB_SIZE); if (is_t4(sc)) { hits = t4_read_reg64(sc, mw_base + off + 16); hits = be64toh(hits); } else { hits = t4_read_reg(sc, mw_base + off + 24); hits = be32toh(hits); } return (hits); } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getf"); if (rc) return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; goto done; } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, t->idx); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } if (t->idx >= nfilters) { rc = EINVAL; goto done; } /* Validate against the global filter mode */ if ((sc->params.tp.vlan_pri_map | fspec_to_fconf(&t->fs)) != sc->params.tp.vlan_pri_map) { rc = E2BIG; goto done; } if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { rc = EINVAL; goto done; } if (t->fs.val.iport >= nports) { rc = EINVAL; goto done; } /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) { rc = EINVAL; goto done; } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { rc = EINVAL; goto done; } if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) { rc = ENOMEM; goto done; } mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; rc = set_filter_wr(sc, t->idx); done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4setfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); if (rc) return (rc); nfilters = sc->tids.nftids; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } f = &sc->tids.ftid_tab[t->idx]; if (f->pending) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ rc = del_filter_wr(sc, t->idx); } done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4delfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct wrqe *wr; struct fw_filter_wr *fwr; unsigned int ftid; ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } ftid = sc->tids.ftid_base + fidx; wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq); if (wr == NULL) return (ENOMEM); fwr = wrtod(wr); bzero(fwr, sizeof (*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.vnic_vld)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; t4_wrq_tx(sc, wr); return (0); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct wrqe *wr; struct fw_filter_wr *fwr; unsigned int ftid; ftid = sc->tids.ftid_base + fidx; wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq); if (wr == NULL) return (ENOMEM); fwr = wrtod(wr); bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; t4_wrq_tx(sc, wr); return (0); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); unsigned int rc; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (is_ftid(sc, idx)) { idx -= sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { KASSERT(f->pending, ("%s: filter[%u] isn't pending.", __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { if (rc != FW_FILTER_WR_FLT_DELETED) { /* Add or delete failed, display an error */ log(LOG_ERR, "filter %u setup failed with error %u\n", idx, rc); } clear_filter(f); sc->tids.ftids_in_use--; } wakeup(&sc->tids.ftid_tab); mtx_unlock(&sc->tids.ftid_lock); } return (0); } static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); if (sc->flags & FULL_INIT_DONE) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, off, remaining, i, n; uint32_t *buf, *b; uint32_t mw_base, mw_aperture; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); memwin_info(sc, win, &mw_base, &mw_aperture); buf = b = malloc(min(mr->len, mw_aperture), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { off = position_memwin(sc, win, addr); /* number of bytes that we'll copy in the inner loop */ n = min(remaining, mw_aperture - off); for (i = 0; i < n; i += 4) *b++ = t4_read_reg(sc, mw_base + off + i); rc = copyout(buf, dst, n); if (rc != 0) break; b = buf; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } static int in_range(int val, int lo, int hi) { return (val < 0 || (val <= hi && val >= lo)); } static int set_sched_class(struct adapter *sc, struct t4_sched_params *p) { int fw_subcmd, fw_type, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsc"); if (rc) return (rc); if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } /* * Translate the cxgbetool parameters into T4 firmware parameters. (The * sub-command and type are in common locations.) */ if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) fw_subcmd = FW_SCHED_SC_CONFIG; else if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) fw_subcmd = FW_SCHED_SC_PARAMS; else { rc = EINVAL; goto done; } if (p->type == SCHED_CLASS_TYPE_PACKET) fw_type = FW_SCHED_TYPE_PKTSCHED; else { rc = EINVAL; goto done; } if (fw_subcmd == FW_SCHED_SC_CONFIG) { /* Vet our parameters ..*/ if (p->u.config.minmax < 0) { rc = EINVAL; goto done; } /* And pass the request to the firmware ...*/ rc = -t4_sched_config(sc, fw_type, p->u.config.minmax); goto done; } if (fw_subcmd == FW_SCHED_SC_PARAMS) { int fw_level; int fw_mode; int fw_rateunit; int fw_ratemode; if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; else if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; else if (p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; else { rc = EINVAL; goto done; } if (p->u.params.mode == SCHED_CLASS_MODE_CLASS) fw_mode = FW_SCHED_PARAMS_MODE_CLASS; else if (p->u.params.mode == SCHED_CLASS_MODE_FLOW) fw_mode = FW_SCHED_PARAMS_MODE_FLOW; else { rc = EINVAL; goto done; } if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_BITS) fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; else if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_PKTS) fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; else { rc = EINVAL; goto done; } if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_REL) fw_ratemode = FW_SCHED_PARAMS_RATE_REL; else if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_ABS) fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; else { rc = EINVAL; goto done; } /* Vet our parameters ... */ if (!in_range(p->u.params.channel, 0, 3) || !in_range(p->u.params.cl, 0, is_t4(sc) ? 15 : 16) || !in_range(p->u.params.minrate, 0, 10000000) || !in_range(p->u.params.maxrate, 0, 10000000) || !in_range(p->u.params.weight, 0, 100)) { rc = ERANGE; goto done; } /* * Translate any unset parameters into the firmware's * nomenclature and/or fail the call if the parameters * are required ... */ if (p->u.params.rateunit < 0 || p->u.params.ratemode < 0 || p->u.params.channel < 0 || p->u.params.cl < 0) { rc = EINVAL; goto done; } if (p->u.params.minrate < 0) p->u.params.minrate = 0; if (p->u.params.maxrate < 0) { if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL || p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) { rc = EINVAL; goto done; } else p->u.params.maxrate = 0; } if (p->u.params.weight < 0) { if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) { rc = EINVAL; goto done; } else p->u.params.weight = 0; } if (p->u.params.pktsize < 0) { if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL || p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) { rc = EINVAL; goto done; } else p->u.params.pktsize = 0; } /* See what the firmware thinks of the request ... */ rc = -t4_sched_params(sc, fw_type, fw_level, fw_mode, fw_rateunit, fw_ratemode, p->u.params.channel, p->u.params.cl, p->u.params.minrate, p->u.params.maxrate, p->u.params.weight, p->u.params.pktsize); goto done; } rc = EINVAL; done: end_synchronized_op(sc, 0); return (rc); } static int set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; struct sge_txq *txq; uint32_t fw_mnem, fw_queue, fw_class; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq"); if (rc) return (rc); if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } if (p->port >= sc->params.nports) { rc = EINVAL; goto done; } pi = sc->port[p->port]; if (!in_range(p->queue, 0, pi->ntxq - 1) || !in_range(p->cl, 0, 7)) { rc = EINVAL; goto done; } /* * Create a template for the FW_PARAMS_CMD mnemonic and value (TX * Scheduling Class in this case). */ fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH)); fw_class = p->cl < 0 ? 0xffffffff : p->cl; /* * If op.queue is non-negative, then we're only changing the scheduling * on a single specified TX queue. */ if (p->queue >= 0) { txq = &sc->sge.txq[pi->first_txq + p->queue]; fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); goto done; } /* * Change the scheduling on all the TX queues for the * interface. */ for_each_txq(pi, i, txq) { fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); if (rc) goto done; } rc = 0; done: end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(const struct adapter *sc, int idx) { struct port_info *pi = sc->port[idx]; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(pi->ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(pi->ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(pi->ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(pi->ifp, "%s transceiver inserted.\n", mod_str[pi->mod_type]); } else { if_printf(pi->ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason) { struct port_info *pi = sc->port[idx]; struct ifnet *ifp = pi->ifp; if (link_stat) { pi->linkdnrc = -1; ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if (reason >= 0) pi->linkdnrc = reason; if_link_state_change(ifp, LINK_STATE_DOWN); } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; mtx_lock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } mtx_unlock(&t4_list_lock); } static int t4_open(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_close(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE; uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); t4_get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i; u_int port_id = *(uint32_t *)data; struct port_info *pi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); if (pi->flags & PORT_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(pi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(pi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts_wrs = 0; txq->txpkts_pkts = 0; txq->br->br_drops = 0; txq->no_dmamap = 0; txq->no_desc = 0; } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(pi, i, wrq) { wrq->tx_wrs = 0; wrq->no_desc = 0; } #endif wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs = 0; wrq->no_desc = 0; } break; } case CHELSIO_T4_SCHED_CLASS: rc = set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = set_sched_queue(sc, (struct t4_sched_queue *)data); break; default: rc = EINVAL; } return (rc); } #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *pi, int enable) { int rc; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if (!(sc->flags & FULL_INIT_DONE)) { rc = cxgbe_init_synchronized(pi); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) return (0); if (!(sc->flags & TOM_INIT_DONE)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM activated but flag not set", __func__)); } setbit(&sc->offload_map, pi->port_id); } else { if (!isset(&sc->offload_map, pi->port_id)) return (0); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; mtx_lock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: mtx_unlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; mtx_lock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: mtx_unlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc = EAGAIN; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); mtx_lock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->activate(sc); if (rc == 0) ui->refcount++; goto done; } } done: mtx_unlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc = EINVAL; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); mtx_lock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) ui->refcount--; goto done; } } done: mtx_unlock(&t4_uld_list_lock); return (rc); } #endif /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq10g < 1) t4_ntxq10g = min(nc, NTXQ_10G); if (t4_ntxq1g < 1) t4_ntxq1g = min(nc, NTXQ_1G); if (t4_nrxq10g < 1) t4_nrxq10g = min(nc, NRXQ_10G); if (t4_nrxq1g < 1) t4_nrxq1g = min(nc, NRXQ_1G); #ifdef TCP_OFFLOAD if (t4_nofldtxq10g < 1) t4_nofldtxq10g = min(nc, NOFLDTXQ_10G); if (t4_nofldtxq1g < 1) t4_nofldtxq1g = min(nc, NOFLDTXQ_1G); if (t4_nofldrxq10g < 1) t4_nofldrxq10g = min(nc, NOFLDRXQ_10G); if (t4_nofldrxq1g < 1) t4_nofldrxq1g = min(nc, NOFLDRXQ_1G); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; #endif if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS) t4_tmr_idx_10g = TMR_IDX_10G; if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS) t4_pktc_idx_10g = PKTC_IDX_10G; if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS) t4_tmr_idx_1g = TMR_IDX_1G; if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS) t4_pktc_idx_1g = PKTC_IDX_1G; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; } static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: if (atomic_fetchadd_int(&loaded, 1)) break; t4_sge_modload(); mtx_init(&t4_list_lock, "T4 adapters", 0, MTX_DEF); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD mtx_init(&t4_uld_list_lock, "T4 ULDs", 0, MTX_DEF); SLIST_INIT(&t4_uld_list); #endif tweak_tunables(); break; case MOD_UNLOAD: if (atomic_fetchadd_int(&loaded, -1) > 1) break; #ifdef TCP_OFFLOAD mtx_lock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; mtx_unlock(&t4_uld_list_lock); break; } mtx_unlock(&t4_uld_list_lock); mtx_destroy(&t4_uld_list_lock); #endif mtx_lock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; mtx_unlock(&t4_list_lock); break; } mtx_unlock(&t4_list_lock); mtx_destroy(&t4_list_lock); break; } return (rc); } static devclass_t t4_devclass, t5_devclass; static devclass_t cxgbe_devclass, cxl_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); Index: stable/9/sys/dev/e1000/if_em.c =================================================================== --- stable/9/sys/dev/e1000/if_em.c (revision 273911) +++ stable/9/sys/dev/e1000/if_em.c (revision 273912) @@ -1,5807 +1,5807 @@ /****************************************************************************** Copyright (c) 2001-2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "if_em.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int em_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.4.2"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *em_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int em_probe(device_t); static int em_attach(device_t); static int em_detach(device_t); static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); #ifdef EM_MULTIQUEUE static int em_mq_start(struct ifnet *, struct mbuf *); static int em_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void em_qflush(struct ifnet *); #else static void em_start(struct ifnet *); static void em_start_locked(struct ifnet *, struct tx_ring *); #endif static int em_ioctl(struct ifnet *, u_long, caddr_t); static void em_init(void *); static void em_init_locked(struct adapter *); static void em_stop(void *); static void em_media_status(struct ifnet *, struct ifmediareq *); static int em_media_change(struct ifnet *); static void em_identify_hardware(struct adapter *); static int em_allocate_pci_resources(struct adapter *); static int em_allocate_legacy(struct adapter *); static int em_allocate_msix(struct adapter *); static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); static void em_local_timer(void *); static void em_reset(struct adapter *); static int em_setup_interface(device_t, struct adapter *); static void em_setup_transmit_structures(struct adapter *); static void em_initialize_transmit_unit(struct adapter *); static int em_allocate_transmit_buffers(struct tx_ring *); static void em_free_transmit_structures(struct adapter *); static void em_free_transmit_buffers(struct tx_ring *); static int em_setup_receive_structures(struct adapter *); static int em_allocate_receive_buffers(struct rx_ring *); static void em_initialize_receive_unit(struct adapter *); static void em_free_receive_structures(struct adapter *); static void em_free_receive_buffers(struct rx_ring *); static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static void em_txeof(struct tx_ring *); static bool em_rxeof(struct rx_ring *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, struct tcphdr *, u32 *, u32 *); static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); static void em_update_link_status(struct adapter *); static void em_refresh_mbufs(struct rx_ring *, int); static void em_register_vlan(void *, struct ifnet *, u16); static void em_unregister_vlan(void *, struct ifnet *, u16); static void em_setup_vlan_hw_support(struct adapter *); static int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(device_t); static void em_enable_wakeup(device_t); static int em_enable_phy_wakeup(struct adapter *); static void em_led_func(void *, int); static void em_disable_aspm(struct adapter *); static int em_irq_fast(void *); /* MSIX handlers */ static void em_msix_tx(void *); static void em_msix_rx(void *); static void em_msix_link(void *); static void em_handle_tx(void *context, int pending); static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); static void em_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static __inline void em_rx_discard(struct rx_ring *, int); #ifdef DEVICE_POLLING static poll_handler_t em_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_probe, em_probe), DEVMETHOD(device_attach, em_attach), DEVMETHOD(device_detach, em_detach), DEVMETHOD(device_shutdown, em_shutdown), DEVMETHOD(device_suspend, em_suspend), DEVMETHOD(device_resume, em_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt); TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_rxd = EM_DEFAULT_RXD; static int em_txd = EM_DEFAULT_TXD; TUNABLE_INT("hw.em.rxd", &em_rxd); TUNABLE_INT("hw.em.txd", &em_txd); SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, "Number of transmit descriptors per queue"); static int em_smart_pwr_down = FALSE; TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down); SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = FALSE; TUNABLE_INT("hw.em.sbp", &em_debug_sbp); SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); static int em_enable_msix = TRUE; TUNABLE_INT("hw.em.enable_msix", &em_enable_msix); SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, "Enable MSI-X interrupts"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit); SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; TUNABLE_INT("hw.em.eee_setting", &eee_setting); SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int em_probe(device_t dev) { char adapter_name[60]; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = em_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", em_strings[ent->index], em_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_attach(device_t dev) { struct adapter *adapter; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_attach: begin"); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); /* Setup PCI resources */ if (em_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); /* Sysctl for limiting the amount of work done in the taskqueue */ em_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, em_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, em_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = em_txd; if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = em_rxd; hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* ** Get queue/ring memory */ if (em_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* ** Do interrupt configuration */ if (adapter->msix > 1) /* Do MSIX */ error = em_allocate_msix(adapter); else /* MSI or Legacy */ error = em_allocate_legacy(adapter); if (error) goto err_late; /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(dev); /* Setup OS specific network interface */ if (em_setup_interface(dev, adapter) != 0) goto err_late; em_reset(adapter); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_update_link_status(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; adapter->led_dev = led_create(em_led_func, adapter, device_get_nameunit(dev)); #ifdef DEV_NETMAP em_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("em_attach: end"); return (0); err_late: em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); err_pci: em_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); adapter->in_detach = 1; em_stop(adapter); EM_CORE_UNLOCK(adapter); EM_CORE_LOCK_DESTROY(adapter); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ em_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_shutdown(device_t dev) { return em_suspend(dev); } /* * Suspend/resume device methods. */ static int em_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int em_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; EM_CORE_LOCK(adapter); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_init_locked(adapter); em_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifdef EM_MULTIQUEUE /********************************************************************* * Multiqueue Transmit routines * * em_mq_start is called by the stack to initiate a transmit. * however, if busy the driver can queue the request rather * than do an immediate send. It is this that is an advantage * in this driver, rather than also having multiple tx queues. **********************************************************************/ static int em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || adapter->link_active == 0) { if (m != NULL) err = drbr_enqueue(ifp, txr->br, m); return (err); } enq = 0; if (m != NULL) { err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); } /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = em_xmit(txr, &next)) != 0) { if (next == NULL) drbr_advance(ifp, txr->br); else drbr_putback(ifp, txr->br, next); break; } drbr_advance(ifp, txr->br); enq++; ifp->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) ifp->if_omcasts++; ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enq > 0) { /* Set the watchdog */ txr->queue_status = EM_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail < EM_MAX_SCATTER) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) ifp->if_drv_flags |= IFF_DRV_OACTIVE; return (err); } /* ** Multiqueue capable stack interface */ static int em_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; int error; if (EM_TX_TRYLOCK(txr)) { error = em_mq_start_locked(ifp, txr, m); EM_TX_UNLOCK(txr); } else error = drbr_enqueue(ifp, txr->br, m); return (error); } /* ** Flush all ring buffers */ static void em_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); EM_TX_UNLOCK(txr); } if_qflush(ifp); } #else /* !EM_MULTIQUEUE */ static void em_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; EM_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (em_xmit(txr, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set timeout in case hardware has problems transmitting. */ txr->watchdog_time = ticks; txr->queue_status = EM_QUEUE_WORKING; } return; } static void em_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { EM_TX_LOCK(txr); em_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } return; } #endif /* EM_MULTIQUEUE */ /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) em_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; /* Adapters that do not support jumbo frames */ case e1000_ich8lan: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->hw.mac.max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; em_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { em_disable_promisc(adapter); em_set_promisc(adapter); } } else em_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) em_stop(adapter); adapter->if_flags = ifp->if_flags; EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif em_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); /* falls thru */ case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(em_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); em_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); em_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VLAN_HWTSO; reinit = 1; } if ((mask & IFCAP_WOL) && (ifp->if_capabilities & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) ifp->if_capenable ^= IFCAP_WOL_MCAST; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) em_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("em_init: begin"); EM_CORE_LOCK_ASSERT(adapter); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(adapter); em_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_setup_transmit_structures(adapter); em_initialize_transmit_unit(adapter); /* Setup Multicast table */ em_set_multi(adapter); /* ** Figure out the desired mbuf ** pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (em_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); em_stop(adapter); return; } em_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_set_promisc(adapter); /* Set the interface as ACTIVE */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, em_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp; tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) em_disable_intr(adapter); else #endif /* DEVICE_POLLING */ em_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); } static void em_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine: note this only works with single queue * *********************************************************************/ static int em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 reg_icr; int rx_done; EM_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (0); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); em_rxeof(rxr, count, &rx_done); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int em_irq_fast(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; em_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /* Combined RX/TX handler, used by Legacy and MSI */ static void em_handle_que(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(adapter->tq, &adapter->que_task); return; } } em_enable_intr(adapter); return; } /********************************************************************* * * MSIX Interrupt Service Routines * **********************************************************************/ static void em_msix_tx(void *arg) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; ++txr->tx_irq; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); return; } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static void em_msix_rx(void *arg) { struct rx_ring *rxr = arg; struct adapter *adapter = rxr->adapter; bool more; ++rxr->rx_irq; if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); return; } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static void em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; em_handle_link(adapter, 0); } else E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); return; } static void em_handle_rx(void *context, int pending) { struct rx_ring *rxr = context; struct adapter *adapter = rxr->adapter; bool more; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } static void em_handle_tx(void *context, int pending) { struct tx_ring *txr = context; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); } static void em_handle_link(void *context, int pending) { struct adapter *adapter = context; struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_media_status: begin"); EM_CORE_LOCK(adapter); em_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("em_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_buffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; struct ip *ip = NULL; struct tcphdr *tp = NULL; u32 txd_upper, txd_lower, txd_used, txd_saved; int ip_off, poff; int nsegs, i, j, first, last = 0; int error, do_tso, tso_desc = 0, remap = 1; m_head = *m_headp; txd_upper = txd_lower = txd_used = txd_saved = 0; do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0); ip_off = poff = 0; /* * Intel recommends entire IP/TCP header length reside in a single * buffer. If multiple descriptors are used to describe the IP and * TCP header, each descriptor should describe one or more * complete headers; descriptors referencing only parts of headers * are not supported. If all layer headers are not coalesced into * a single buffer, each buffer should not cross a 4KB boundary, * or be larger than the maximum read request size. * Controller also requires modifing IP/TCP header to make TSO work * so we firstly get a writable mbuf chain then coalesce ethernet/ * IP/TCP header into a single buffer to meet the requirement of * controller. This also simplifies IP/TCP/UDP checksum offloading * which also has similiar restrictions. */ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { if (do_tso || (m_head->m_next != NULL && m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { if (M_WRITABLE(*m_headp) == 0) { m_head = m_dup(*m_headp, M_NOWAIT); m_freem(*m_headp); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } *m_headp = m_head; } } /* * XXX * Assume IPv4, we don't have TSO/checksum offload support * for IPv6 yet. */ ip_off = sizeof(struct ether_header); m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } eh = mtod(m_head, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); if (do_tso) { m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } tp = (struct tcphdr *)(mtod(m_head, char *) + poff); /* * TSO workaround: * pull 4 more bytes of data into it. */ m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); ip->ip_len = 0; ip->ip_sum = 0; /* * The pseudo TCP checksum does not include TCP payload * length so driver should recompute the checksum here * what hardware expect to see. This is adherence of * Microsoft's Large Send specification. */ tp = (struct tcphdr *)(mtod(m_head, char *) + poff); tp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } tp = (struct tcphdr *)(mtod(m_head, char *) + poff); m_head = m_pullup(m_head, poff + (tp->th_off << 2)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); tp = (struct tcphdr *)(mtod(m_head, char *) + poff); } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); } *m_headp = m_head; } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG && remap) { struct mbuf *m; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_alloc_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again, but only once */ remap = 0; goto retry; } else if (error == ENOMEM) { adapter->no_tx_dma_setup++; return (error); } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((do_tso == 0) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } if (nsegs > (txr->tx_avail - 2)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) em_transmit_checksum_setup(txr, m_head, ip_off, ip, &txd_upper, &txd_lower); if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= (htole16(m_head->m_pkthdr.ether_vtag) << 16); /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } i = txr->next_avail_desc; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; tx_buffer = &txr->tx_buffers[i]; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; /* ** TSO Workaround: ** If this is the last descriptor, we want to ** split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) { seg_len -= 4; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == adapter->num_tx_desc) i = 0; /* Now make the sentinel */ ++txd_used; /* using an extra txd */ ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | 4); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } txr->next_avail_desc = i; txr->tx_avail -= nsegs; if (tso_desc) /* TSO used an extra for sentinel */ txr->tx_avail -= txd_used; tx_buffer->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; /* Update the watchdog time early and often */ txr->watchdog_time = ticks; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void em_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void em_disable_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_local_timer(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 trigger; EM_CORE_LOCK_ASSERT(adapter); em_update_link_status(adapter); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Mask to use in the irq trigger */ if (adapter->msix_mem) trigger = rxr->ims; else trigger = E1000_ICS_RXDMT0; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { if ((txr->queue_status == EM_QUEUE_HUNG) && (adapter->pause_frames == 0)) goto hung; /* Schedule a TX tasklet if needed */ if (txr->tx_avail <= EM_MAX_SCATTER) taskqueue_enqueue(txr->tq, &txr->tx_task); } adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, em_local_timer, adapter); #ifndef DEVICE_POLLING /* Trigger an RX interrupt to guarantee mbuf refresh */ E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); #endif return; hung: /* Looks like we're hung */ device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(adapter->dev, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(adapter->dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; adapter->pause_frames = 0; em_init_locked(adapter); } static void em_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; ifp->if_baudrate = adapter->link_speed * 1000000; if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable watchdog */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->queue_status = EM_QUEUE_IDLE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* Unarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); txr->queue_status = EM_QUEUE_IDLE; EM_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Default to a single queue */ adapter->num_queues = 1; /* * Setup MSI/X or MSI if PCI Express */ adapter->msix = em_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); if (adapter->msix == 1) /* using MSI */ rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Allocate a fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); /* Use a TX only tasklet for local timer */ TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", device_get_nameunit(adapter->dev)); TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * This is not really Multiqueue, rather * its just seperate interrupt vectors * for TX, RX, and Link. * **********************************************************************/ int em_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { /* RX ring */ rid = vector + 1; rxr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (rxr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "RX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, rxr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, rxr, &rxr->tag)) != 0) { device_printf(dev, "Failed to register RX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i); #endif rxr->msix = vector++; /* NOTE increment vector for TX */ TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq", device_get_nameunit(adapter->dev)); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 ** are for RX0 and RX1, note this has ** NOTHING to do with the MSIX vector */ rxr->ims = 1 << (20 + i); adapter->ivars |= (8 | rxr->msix) << (i * 4); /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (txr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "TX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, txr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, txr, &txr->tag)) != 0) { device_printf(dev, "Failed to register TX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i); #endif txr->msix = vector++; /* Increment vector for next pass */ TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", device_get_nameunit(adapter->dev)); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 ** are for TX0 and TX1, note this has ** NOTHING to do with the MSIX vector */ txr->ims = 1 << (22 + i); adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); } /* Link interrupt */ ++rid; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; return (0); } static void em_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr; struct rx_ring *rxr; int rid; /* ** Release all the queue interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; rxr = &adapter->rx_rings[i]; /* an early abort? */ if ((txr == NULL) || (rxr == NULL)) break; rid = txr->msix +1; if (txr->tag != NULL) { bus_teardown_intr(dev, txr->res, txr->tag); txr->tag = NULL; } if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); rid = rxr->msix +1; if (rxr->tag != NULL) { bus_teardown_intr(dev, rxr->res, rxr->tag); rxr->tag = NULL; } if (rxr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res); } if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->flash != NULL) bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); } /* * Setup MSI or MSI/X */ static int em_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int val; /* ** Setup MSI/X for Hartwell: tests have shown ** use of two queues to be unstable, and to ** provide no great gain anyway, so we simply ** seperate the interrupts and use a single queue. */ if ((adapter->hw.mac.type == e1000_82574) && (em_enable_msix == TRUE)) { /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } val = pci_msix_count(dev); /* We only need/want 3 vectors */ if (val >= 3) val = 3; else { device_printf(adapter->dev, "MSIX: insufficient vectors, using MSI\n"); goto msi; } if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) { device_printf(adapter->dev, "Using MSIX interrupts " "with %d vectors\n", val); return (val); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); } msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } val = 1; if (pci_alloc_msi(dev, &val) == 0) { device_printf(adapter->dev,"Using an MSI interrupt\n"); return (val); } /* Should only happen due to manual configuration */ device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); return (0); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void em_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: pba = E1000_PBA_26K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (ifp->if_mtu > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (ifp->if_mtu > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_ich9lan: case e1000_ich10lan: if (ifp->if_mtu > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* else fall thru */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); em_disable_aspm(adapter); /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("em_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = em_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = em_ioctl; #ifdef EM_MULTIQUEUE /* Multiqueue stack interface */ ifp->if_transmit = em_mq_start; ifp->if_qflush = em_qflush; #else ifp->if_start = em_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_TSO4; /* * Tell the upper layer(s) we * support full VLAN capability */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { ifp->if_capabilities |= IFCAP_WOL; ifp->if_capenable |= IFCAP_WOL_MAGIC; } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, em_media_change, em_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (error); } static void em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_map = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int em_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* Allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); if (em_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (em_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) em_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) em_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int em_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct em_buffer *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ EM_TSO_SIZE, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct em_buffer *) malloc(sizeof(struct em_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ em_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_buffer *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ EM_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); uint64_t paddr; void *addr; addr = PNMB(slot + si, &paddr); txr->tx_base[i].buffer_addr = htole64(paddr); /* reload the map for netmap mode */ netmap_load_map(txr->txtag, txbuf->map, addr); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->next_eop = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; txr->queue_status = EM_QUEUE_IDLE; /* Clear checksum offload context. */ txr->last_hw_offload = 0; txr->last_hw_ipcss = 0; txr->last_hw_ipcso = 0; txr->last_hw_tucss = 0; txr->last_hw_tucso = 0; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); EM_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void em_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) em_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->queue_status = EM_QUEUE_IDLE; } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void em_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); em_free_transmit_buffers(txr); em_dma_free(adapter, &txr->txdma); EM_TX_UNLOCK(txr); EM_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_buffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; for (int i = 0; i < adapter->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; if (txbuf->map != NULL) { bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } else if (txbuf->map != NULL) { bus_dmamap_unload(txr->txtag, txbuf->map); bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static void em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; struct em_buffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; u8 ipcso, ipcss, tucso, tucss; ipcss = ipcso = tucss = tucso = 0; hdr_len = ip_off + (ip->ip_hl << 2); cur = txr->next_avail_desc; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; offload |= CSUM_IP; ipcss = ip_off; ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->lower_setup.ip_fields.ipcss = ipcss; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = ipcso; cmd |= E1000_TXD_CMD_IP; } if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; offload |= CSUM_TCP; tucss = hdr_len; tucso = hdr_len + offsetof(struct tcphdr, th_sum); /* * Setting up new checksum offload context for every frames * takes a lot of processing time for hardware. This also * reduces performance a lot for small sized frames so avoid * it if driver can use previously configured checksum * offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; cmd |= E1000_TXD_CMD_TCP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; tucss = hdr_len; tucso = hdr_len + offsetof(struct udphdr, uh_sum); /* * Setting up new checksum offload context for every frames * takes a lot of processing time for hardware. This also * reduces performance a lot for small sized frames so avoid * it if driver can use previously configured checksum * offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = tucss; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } if (offload & CSUM_IP) { txr->last_hw_ipcss = ipcss; txr->last_hw_ipcso = ipcso; } TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &txr->tx_buffers[cur]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static void em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; struct em_buffer *tx_buffer; int cur, hdr_len; /* * In theory we can use the same TSO context if and only if * frame is the same type(IP/TCP) and the same MSS. However * checking whether a frame has the same IP/TCP structure is * hard thing so just ignore that and always restablish a * new TSO context. */ hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[cur]; TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = ip_off; TXD->lower_setup.ip_fields.ipcse = htole16(ip_off + (ip->ip_hl << 2) - 1); TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; txr->tx_tso = TRUE; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void em_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; int first, last, done, processed; struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; struct ifnet *ifp = adapter->ifp; EM_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return; #endif /* DEV_NETMAP */ /* No work, make sure watchdog is off */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = EM_QUEUE_IDLE; return; } processed = 0; first = txr->next_to_clean; tx_desc = &txr->tx_base[first]; tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; ++processed; if (tx_buffer->m_head) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; txr->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = &txr->tx_base[first]; } ++ifp->if_opackets; /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. local timer ** will examine this and do a reset if needed. */ if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG)) txr->queue_status = EM_QUEUE_HUNG; /* * If we have a minimum free, clear IFF_DRV_OACTIVE * to tell the stack that it is OK to send packets. * Notice that all writes of OACTIVE happen under the * TX lock which, with a single queue, guarantees * sanity. */ if (txr->tx_avail >= EM_MAX_SCATTER) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* Disable watchdog if all clean */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = EM_QUEUE_IDLE; } } /********************************************************************* * * Refresh RX descriptor mbufs from system mbuf buffer pool. * **********************************************************************/ static void em_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs[1]; struct em_buffer *rxbuf; int i, j, error, nsegs; bool cleaned = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head == NULL) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); /* ** If we have a temporary resource shortage ** that causes a failure, just abort refresh ** for now, we will return to this point when ** reinvoked from em_rxeof. */ if (m == NULL) goto update; } else m = rxbuf->m_head; m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; m->m_flags |= M_PKTHDR; m->m_data = m->m_ext.ext_buf; /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(m); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = m; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr); cleaned = TRUE; i = j; /* Next is precalulated for us */ rxr->next_to_refresh = i; /* Calculate next controlling index */ if (++j == adapter->num_rx_desc) j = 0; } update: /* ** Update the tail pointer only if, ** and as far as we have refreshed. */ if (cleaned) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int em_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct em_buffer *rxbuf; int error; rxr->rx_buffers = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &rxr->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } rxbuf = rxr->rx_buffers; for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: em_free_receive_structures(adapter); return (error); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_buffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* Clear the ring contents */ EM_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, 0, 0); #endif /* ** Free current RX buffer structs and their mbufs */ for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; /* mark as freed */ } } /* Now replenish the mbufs */ for (int j = 0; j != adapter->num_rx_desc; ++j) { rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(slot + si, &paddr); netmap_load_map(rxr->rxtag, rxbuf->map, addr); /* Update descriptor */ rxr->rx_base[j].buffer_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } rxbuf->m_head->m_len = adapter->rx_mbuf_sz; rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, rxbuf->m_head, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; goto fail; } bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr); } rxr->next_to_check = 0; rxr->next_to_refresh = 0; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); fail: EM_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int em_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int q; for (q = 0; q < adapter->num_queues; q++, rxr++) if (em_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ for (int i = 0; i < q; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { struct em_buffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } rxr->next_to_check = 0; rxr->next_to_refresh = 0; } return (ENOBUFS); } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void em_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { em_free_receive_buffers(rxr); /* Free the ring memory as well */ em_dma_free(adapter, &rxr->rxdma); EM_RX_LOCK_DESTROY(rxr); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_buffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->map != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); bus_dmamap_destroy(rxr->rxtag, rxbuf->map); } if (rxbuf->m_head != NULL) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; rxr->next_to_check = 0; rxr->next_to_refresh = 0; } if (rxr->rxtag != NULL) { bus_dma_tag_destroy(rxr->rxtag); rxr->rxtag = NULL; } return; } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u64 bus_addr; u32 rctl, rxcsum; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* ** When using MSIX interrupts we need to throttle ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS); } rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= E1000_RXCSUM_TUOFL; else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* ** XXX TEMPORARY WORKAROUND: on some systems with 82573 ** long latencies are observed, like Lenovo X60. This ** change eliminates the problem, but since having positive ** values in RDTR is a known source of problems on other ** platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ u32 rdt = adapter->num_rx_desc - 1; /* default */ bus_addr = rxr->rxdma.dma_paddr; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. */ if (ifp->if_capenable & IFCAP_NETMAP) rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]); #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } /* Set PTHRESH for improved jumbo performance */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (ifp->if_mtu > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } if (adapter->hw.mac.type >= e1000_pch2lan) { if (ifp->if_mtu > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip the CRC */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; rctl &= ~E1000_RCTL_SBP; if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; if (ifp->if_mtu > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool em_rxeof(struct rx_ring *rxr, int count, int *done) { struct adapter *adapter = rxr->adapter; struct ifnet *ifp = adapter->ifp; struct mbuf *mp, *sendmp; u8 status = 0; u16 len; int i, processed, rxdone = 0; bool eop; struct e1000_rx_desc *cur; EM_RX_LOCK(rxr); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { EM_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->rx_base[i]; status = cur->status; mp = sendmp = NULL; if ((status & E1000_RXD_STAT_DD) == 0) break; len = le16toh(cur->length); eop = (status & E1000_RXD_STAT_EOP) != 0; if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) || (rxr->discard == TRUE)) { adapter->dropped_pkts++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ rxr->discard = TRUE; else rxr->discard = FALSE; em_rx_discard(rxr, i); goto next_desc; } bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); /* Assign correct length to the current fragment */ mp = rxr->rx_buffers[i].m_head; mp->m_len = len; /* Trigger for refresh */ rxr->rx_buffers[i].m_head = NULL; /* First segment? */ if (rxr->fmp == NULL) { mp->m_pkthdr.len = len; rxr->fmp = rxr->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; rxr->lmp->m_next = mp; rxr->lmp = mp; rxr->fmp->m_pkthdr.len += len; } if (eop) { --count; sendmp = rxr->fmp; sendmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; em_receive_checksum(cur, sendmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->hw.mac.max_frame_size > (MCLBYTES - ETHER_ALIGN) && em_fixup_rx(rxr) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { sendmp->m_pkthdr.ether_vtag = le16toh(cur->special); sendmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif rxr->fmp = rxr->lmp = NULL; } next_desc: /* Zero out the receive descriptors status. */ cur->status = 0; ++rxdone; /* cumulative for POLL */ ++processed; /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Send to the stack */ if (sendmp != NULL) { rxr->next_to_check = i; EM_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, sendmp); EM_RX_LOCK(rxr); i = rxr->next_to_check; } /* Only refresh mbufs every 8 descriptors */ if (processed == 8) { em_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remaining refresh work */ if (e1000_rx_unrefreshed(rxr)) em_refresh_mbufs(rxr, i); rxr->next_to_check = i; if (done != NULL) *done = rxdone; EM_RX_UNLOCK(rxr); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } static __inline void em_rx_discard(struct rx_ring *rxr, int i) { struct em_buffer *rbuf; rbuf = &rxr->rx_buffers[i]; bus_dmamap_unload(rxr->rxtag, rbuf->map); /* Free any previous pieces */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** Free buffer and allow em_refresh_mbufs() ** to clean up and recharge buffer. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; } return; } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achived only when jumbo frame is * not used at all on architectures with strict alignment. */ static int em_fixup_rx(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct mbuf *m, *n; int error; error = 0; m = rxr->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; rxr->fmp = n; } else { adapter->dropped_pkts++; m_freem(rxr->fmp); rxr->fmp = NULL; error = ENOMEM; } } return (error); } #endif /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp) { mp->m_pkthdr.csum_flags = 0; /* Ignore Checksum bit is set */ if (rx_desc->status & E1000_RXD_STAT_IXSM) return; if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) return; /* IP Checksum Good? */ if (rx_desc->status & E1000_RXD_STAT_IPCS) mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* TCP or UDP checksum */ if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } /* * This routine is run via an vlan * config EVENT */ static void em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= EM_MSIX_MASK; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* Falls thru */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if ((adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_pch2lan)) { if (em_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (ifp->if_capenable & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); } /* ** Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xoffrxc += adapter->pause_frames; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } ifp = adapter->ifp; ifp->if_collisions = adapter->stats.colc; /* Rx Errors */ ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr; /* Tx Errors */ ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events; } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", CTLFLAG_RD, &adapter->mbuf_alloc_failed, "Std mbuf failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", CTLFLAG_RD, &adapter->mbuf_cluster_failed, "Std mbuf cluster failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } static void em_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); + OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); EM_CORE_LOCK(adapter); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; em_print_debug_info(adapter); } return (error); } /* ** This routine is meant to be fluid, add whatever is ** needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(0)), E1000_READ_REG(&adapter->hw, E1000_TDT(0))); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(0)), E1000_READ_REG(&adapter->hw, E1000_RDT(0))); device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status); device_printf(dev, "TX descriptors avail = %d\n", txr->tx_avail); device_printf(dev, "Tx Descriptors avail failure = %ld\n", txr->no_desc_avail); device_printf(dev, "RX discarded packets = %ld\n", rxr->rx_discarded); device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); } Index: stable/9/sys/dev/e1000/if_igb.c =================================================================== --- stable/9/sys/dev/e1000/if_igb.c (revision 273911) +++ stable/9/sys/dev/e1000/if_igb.c (revision 273912) @@ -1,6128 +1,6128 @@ /****************************************************************************** Copyright (c) 2001-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_altq.h" #endif #include #include #ifndef IGB_LEGACY_TX #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82575.h" #include "if_igb.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int igb_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ char igb_driver_version[] = "version - 2.4.0"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static igb_vendor_info_t igb_vendor_info_array[] = { { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_VF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_SFP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I350_VF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER_IT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I210_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I211_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_I354_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *igb_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int igb_probe(device_t); static int igb_attach(device_t); static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); #ifndef IGB_LEGACY_TX static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); static void igb_qflush(struct ifnet *); static void igb_deferred_mq_start(void *, int); #else static void igb_start(struct ifnet *); static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static void igb_init(void *); static void igb_init_locked(struct adapter *); static void igb_stop(void *); static void igb_media_status(struct ifnet *, struct ifmediareq *); static int igb_media_change(struct ifnet *); static void igb_identify_hardware(struct adapter *); static int igb_allocate_pci_resources(struct adapter *); static int igb_allocate_msix(struct adapter *); static int igb_allocate_legacy(struct adapter *); static int igb_setup_msix(struct adapter *); static void igb_free_pci_resources(struct adapter *); static void igb_local_timer(void *); static void igb_reset(struct adapter *); static int igb_setup_interface(device_t, struct adapter *); static int igb_allocate_queues(struct adapter *); static void igb_configure_queues(struct adapter *); static int igb_allocate_transmit_buffers(struct tx_ring *); static void igb_setup_transmit_structures(struct adapter *); static void igb_setup_transmit_ring(struct tx_ring *); static void igb_initialize_transmit_units(struct adapter *); static void igb_free_transmit_structures(struct adapter *); static void igb_free_transmit_buffers(struct tx_ring *); static int igb_allocate_receive_buffers(struct rx_ring *); static int igb_setup_receive_structures(struct adapter *); static int igb_setup_receive_ring(struct rx_ring *); static void igb_initialize_receive_units(struct adapter *); static void igb_free_receive_structures(struct adapter *); static void igb_free_receive_buffers(struct rx_ring *); static void igb_free_receive_ring(struct rx_ring *); static void igb_enable_intr(struct adapter *); static void igb_disable_intr(struct adapter *); static void igb_update_stats_counters(struct adapter *); static bool igb_txeof(struct tx_ring *); static __inline void igb_rx_discard(struct rx_ring *, int); static __inline void igb_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); static bool igb_rxeof(struct igb_queue *, int, int *); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static int igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void igb_set_promisc(struct adapter *); static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); static void igb_update_link_status(struct adapter *); static void igb_refresh_mbufs(struct rx_ring *, int); static void igb_register_vlan(void *, struct ifnet *, u16); static void igb_unregister_vlan(void *, struct ifnet *, u16); static void igb_setup_vlan_hw_support(struct adapter *); static int igb_xmit(struct tx_ring *, struct mbuf **); static int igb_dma_malloc(struct adapter *, bus_size_t, struct igb_dma_alloc *, int); static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void igb_print_nvm_info(struct adapter *); static int igb_is_valid_ether_addr(u8 *); static void igb_add_hw_stats(struct adapter *); static void igb_vf_init_stats(struct adapter *); static void igb_update_vf_stats_counters(struct adapter *); /* Management and WOL Support */ static void igb_init_manageability(struct adapter *); static void igb_release_manageability(struct adapter *); static void igb_get_hw_control(struct adapter *); static void igb_release_hw_control(struct adapter *); static void igb_enable_wakeup(device_t); static void igb_led_func(void *, int); static int igb_irq_fast(void *); static void igb_msix_que(void *); static void igb_msix_link(void *); static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); static void igb_handle_link_locked(struct adapter *); static void igb_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); #ifdef DEVICE_POLLING static poll_handler_t igb_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, igb_probe), DEVMETHOD(device_attach, igb_attach), DEVMETHOD(device_detach, igb_detach), DEVMETHOD(device_shutdown, igb_shutdown), DEVMETHOD(device_suspend, igb_suspend), DEVMETHOD(device_resume, igb_resume), DEVMETHOD_END }; static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); /********************************************************************* * Tunable default values. *********************************************************************/ static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); /* Descriptor defaults */ static int igb_rxd = IGB_DEFAULT_RXD; static int igb_txd = IGB_DEFAULT_TXD; TUNABLE_INT("hw.igb.rxd", &igb_rxd); TUNABLE_INT("hw.igb.txd", &igb_txd); SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, "Number of transmit descriptors per queue"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int igb_enable_aim = TRUE; TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim); SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0, "Enable adaptive interrupt moderation"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int igb_enable_msix = 1; TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix); SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Tuneable Interrupt rate */ static int igb_max_interrupt_rate = 8000; TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate); SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); #ifndef IGB_LEGACY_TX /* ** Tuneable number of buffers in the buf-ring (drbr_xxx) */ static int igb_buf_ring_size = IGB_BR_SIZE; TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size); SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, &igb_buf_ring_size, 0, "Size of the bufring"); #endif /* ** Header split causes the packet header to ** be dma'd to a seperate mbuf from the payload. ** this can have memory alignment benefits. But ** another plus is that small packets often fit ** into the header and thus use no cluster. Its ** a very workload dependent type feature. */ static int igb_header_split = FALSE; TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, "Enable receive mbuf header split"); /* ** This will autoconfigure based on the ** number of CPUs and max supported ** MSIX messages if left at 0. */ static int igb_num_queues = 0; TUNABLE_INT("hw.igb.num_queues", &igb_num_queues); SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int igb_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int igb_rx_process_limit = 100; TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit); SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &igb_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited"); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * igb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int igb_probe(device_t dev) { char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; igb_vendor_info_t *ent; INIT_DEBUGOUT("igb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IGB_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = igb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", igb_strings[ent->index], igb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int igb_attach(device_t dev) { struct adapter *adapter; int error = 0; u16 eeprom_data; INIT_DEBUGOUT("igb_attach: begin"); if (resource_disabled("igb", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_nvm_info, "I", "NVM Information"); igb_set_sysctl_value(adapter, "enable_aim", "Interrupt Moderation", &adapter->enable_aim, igb_enable_aim); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ igb_identify_hardware(adapter); /* Setup PCI resources */ if (igb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Sysctl for limiting the amount of work done in the taskqueue */ igb_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, igb_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", IGB_DEFAULT_TXD, igb_txd); adapter->num_tx_desc = IGB_DEFAULT_TXD; } else adapter->num_tx_desc = igb_txd; if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", IGB_DEFAULT_RXD, igb_rxd); adapter->num_rx_desc = IGB_DEFAULT_RXD; } else adapter->num_rx_desc = igb_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* ** Allocate and Setup Queues */ if (igb_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate the appropriate stats memory */ if (adapter->vf_ifp) { adapter->stats = (struct e1000_vf_stats *)malloc(sizeof \ (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); igb_vf_init_stats(adapter); } else adapter->stats = (struct e1000_hw_stats *)malloc(sizeof \ (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->stats == NULL) { device_printf(dev, "Can not allocate stats memory\n"); error = ENOMEM; goto err_late; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Some adapter-specific advanced features */ if (adapter->hw.mac.type >= e1000_i350) { SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw); else e1000_set_eee_i350(&adapter->hw); } } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (((adapter->hw.mac.type != e1000_i210) && (adapter->hw.mac.type != e1000_i211)) && (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* ** Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } /* Check its sanity */ if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Setup OS specific network interface */ if (igb_setup_interface(dev, adapter) != 0) goto err_late; /* Now get a good starting state */ igb_reset(adapter); /* Initialize statistics */ igb_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Determine if we have to control management hardware */ adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); /* * Setup Wake-on-Lan */ /* APME bit in EEPROM is mapped to WUC.APME */ eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; if (eeprom_data) adapter->wol = E1000_WUFC_MAG; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); igb_add_hw_stats(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; adapter->led_dev = led_create(igb_led_func, adapter, device_get_nameunit(dev)); /* ** Configure Interrupts */ if ((adapter->msix > 1) && (igb_enable_msix)) error = igb_allocate_msix(adapter); else /* MSI or Legacy */ error = igb_allocate_legacy(adapter); if (error) goto err_late; #ifdef DEV_NETMAP igb_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("igb_attach: end"); return (0); err_late: igb_detach(dev); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); igb_release_hw_control(adapter); err_pci: igb_free_pci_resources(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int igb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("igb_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } ether_ifdetach(adapter->ifp); if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif IGB_CORE_LOCK(adapter); adapter->in_detach = 1; igb_stop(adapter); IGB_CORE_UNLOCK(adapter); e1000_phy_hw_reset(&adapter->hw); /* Give control back to firmware */ igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ igb_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); if (adapter->mta != NULL) free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int igb_shutdown(device_t dev) { return igb_suspend(dev); } /* * Suspend/resume device methods. */ static int igb_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); IGB_CORE_LOCK(adapter); igb_stop(adapter); igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } IGB_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int igb_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); igb_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } } IGB_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifdef IGB_LEGACY_TX /********************************************************************* * Transmit entry point * * igb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; IGB_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail <= IGB_MAX_SCATTER) { txr->queue_status |= IGB_QUEUE_DEPLETED; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (igb_xmit(txr, &m_head)) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_time = ticks; txr->queue_status |= IGB_QUEUE_WORKING; } } /* * Legacy TX driver routine, called from the * stack, always uses tx[0], and spins for it. * Should not be used with multiqueue tx */ static void igb_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_TX_LOCK(txr); igb_start_locked(txr, ifp); IGB_TX_UNLOCK(txr); } return; } #else /* ~IGB_LEGACY_TX */ /* ** Multiqueue Transmit Entry: ** quick turnaround to the stack ** */ static int igb_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; int i, err = 0; /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); if (IGB_TX_TRYLOCK(txr)) { igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &txr->txq_task); return (0); } static int igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; IGB_TX_LOCK_ASSERT(txr); if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) return (ENETDOWN); /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = igb_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ drbr_advance(ifp, txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enq++; ifp->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) ifp->if_omcasts++; ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enq > 0) { /* Set the watchdog */ txr->queue_status |= IGB_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ static void igb_deferred_mq_start(void *arg, int pending) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; IGB_TX_LOCK(txr); if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } /* ** Flush all ring buffers */ static void igb_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IGB_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* ~IGB_LEGACY_TX */ /********************************************************************* * Ioctl entry point * * igb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); IGB_CORE_LOCK(adapter); max_frame_size = 9234; if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { IGB_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); IGB_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { igb_disable_promisc(adapter); igb_set_promisc(adapter); } } else igb_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) igb_stop(adapter); adapter->if_flags = ifp->if_flags; IGB_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); igb_set_multi(adapter); #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif igb_enable_intr(adapter); IGB_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ IGB_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { IGB_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } IGB_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(igb_poll, ifp); if (error) return (error); IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ IGB_CORE_LOCK(adapter); igb_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VLAN_HWTSO; reinit = 1; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; reinit = 1; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void igb_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("igb_init: begin"); IGB_CORE_LOCK_ASSERT(adapter); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); igb_reset(adapter); igb_update_link_status(adapter); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type == e1000_82576) ifp->if_hwassist |= CSUM_SCTP; #endif } if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; /* Configure for OS presence */ igb_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ igb_setup_transmit_structures(adapter); igb_initialize_transmit_units(adapter); /* Setup Multicast table */ igb_set_multi(adapter); /* ** Figure out the desired mbuf pool ** for doing jumbo/packetsplit */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (igb_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); return; } igb_initialize_receive_units(adapter); /* Enable VLAN support */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) igb_setup_vlan_hw_support(adapter); /* Don't lose promiscuous settings */ igb_set_promisc(adapter); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); if (adapter->msix > 1) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) igb_disable_intr(adapter); else #endif /* DEVICE_POLLING */ { igb_enable_intr(adapter); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); } /* Set Energy Efficient Ethernet */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw); else e1000_set_eee_i350(&adapter->hw); } } static void igb_init(void *arg) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_que(void *context, int pending) { struct igb_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more; more = igb_rxeof(que, adapter->rx_process_limit, NULL); IGB_TX_LOCK(txr); igb_txeof(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); /* Do we need another? */ if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) return; #endif /* Reenable this interrupt */ if (que->eims) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); else igb_enable_intr(adapter); } /* Deal with link in a sleepable context */ static void igb_handle_link(void *context, int pending) { struct adapter *adapter = context; IGB_CORE_LOCK(adapter); igb_handle_link_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_link_locked(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK_ASSERT(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } } } /********************************************************************* * * MSI/Legacy Deferred * Interrupt Service routine * *********************************************************************/ static int igb_irq_fast(void *arg) { struct adapter *adapter = arg; struct igb_queue *que = adapter->queues; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); taskqueue_enqueue(que->tq, &que->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) taskqueue_enqueue(que->tq, &adapter->link_task); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } #ifdef DEVICE_POLLING #if __FreeBSD_version >= 800000 #define POLL_RETURN_COUNT(a) (a) static int #else #define POLL_RETURN_COUNT(a) static void #endif igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; u32 reg_icr, rx_done = 0; u32 loop = IGB_MAX_LOOP; bool more; IGB_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { IGB_CORE_UNLOCK(adapter); return POLL_RETURN_COUNT(rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) igb_handle_link_locked(adapter); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; } IGB_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; txr = que->txr; igb_rxeof(que, count, &rx_done); IGB_TX_LOCK(txr); do { more = igb_txeof(txr); } while (loop-- && more); #ifndef IGB_LEGACY_TX if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } return POLL_RETURN_COUNT(rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * MSIX Que Interrupt Service routine * **********************************************************************/ static void igb_msix_que(void *arg) { struct igb_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; u32 newitr = 0; bool more_rx; /* Ignore spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); ++que->irqs; IGB_TX_LOCK(txr); igb_txeof(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); if (adapter->enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) E1000_WRITE_REG(&adapter->hw, E1000_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; /* Used half Default if sub-gig */ if (adapter->link_speed != 1000) newitr = IGB_DEFAULT_ITR / 2; else { if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); } newitr &= 0x7FFC; /* Mask invalid bits */ if (adapter->hw.mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: /* Schedule a clean task if needed*/ if (more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); return; } /********************************************************************* * * MSIX Link Interrupt Service routine * **********************************************************************/ static void igb_msix_link(void *arg) { struct adapter *adapter = arg; u32 icr; ++adapter->link_irq; icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; igb_handle_link(adapter, 0); spurious: /* Rearm */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("igb_media_status: begin"); IGB_CORE_LOCK(adapter); igb_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IGB_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: /* ** Support for 100Mb SFP - these are Fiber ** but the media type appears as serdes */ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) ifmr->ifm_active |= IFM_100_FX; else ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 2500: ifmr->ifm_active |= IFM_2500_SX; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; IGB_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int igb_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("igb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); IGB_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to Advanced TX descriptors. * **********************************************************************/ static int igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; int i, j, error, nsegs; int first; bool remap = TRUE; struct mbuf *m_head; bus_dma_segment_t segs[IGB_MAX_SCATTER]; bus_dmamap_t map; struct igb_tx_buf *txbuf; union e1000_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; map = txbuf->map; /* * Map the packet for DMA. */ retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error)) { struct mbuf *m; switch (error) { case EFBIG: /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; goto retry; } else return (error); case ENOMEM: txr->no_tx_dma_setup++; return (error); default: txr->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); if (__predict_false(error)) { m_freem(*m_headp); *m_headp = NULL; return (error); } /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); if (++i == txr->num_desc) i = 0; } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = txd; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void igb_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 reg; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_enabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } else if (ifp->if_flags & IFF_ALLMULTI) { reg |= E1000_RCTL_MPE; reg &= ~E1000_RCTL_UPE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } } static void igb_disable_promisc(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 reg; int mcnt = 0; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_disabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); reg &= (~E1000_RCTL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg &= (~E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void igb_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; int mcnt = 0; IOCTL_DEBUGOUT("igb_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); } /********************************************************************* * Timer routine: * This routine checks for link status, * updates statistics, and does the watchdog. * **********************************************************************/ static void igb_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct igb_queue *que = adapter->queues; int hung = 0, busy = 0; IGB_CORE_LOCK_ASSERT(adapter); igb_update_link_status(adapter); igb_update_stats_counters(adapter); /* ** Check the TX queues status ** - central locked handling of OACTIVE ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if ((txr->queue_status & IGB_QUEUE_HUNG) && (adapter->pause_frames == 0)) ++hung; if (txr->queue_status & IGB_QUEUE_DEPLETED) ++busy; if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) taskqueue_enqueue(que->tq, &que->que_task); } if (hung == adapter->num_queues) goto timeout; if (busy == adapter->num_queues) ifp->if_drv_flags |= IFF_DRV_OACTIVE; else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && (busy < adapter->num_queues)) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); #ifndef DEVICE_POLLING /* Schedule all queue interrupts - deadlock protection */ E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); #endif return; timeout: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; igb_init_locked(adapter); } static void igb_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check, thstat, ctrl; char *flowctl = NULL; link_check = thstat = ctrl = 0; /* Get the cached link value or read for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* Fall thru */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Get the flow control for display */ switch (fc->current_mode) { case e1000_fc_rx_pause: flowctl = "RX"; break; case e1000_fc_tx_pause: flowctl = "TX"; break; case e1000_fc_full: flowctl = "Full"; break; case e1000_fc_none: default: flowctl = "None"; break; } /* Now we check if a transition has happened */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s," " Flow Control: %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex"), flowctl); adapter->link_active = 1; ifp->if_baudrate = adapter->link_speed * 1000000; if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if (hw->dev_spec._82575.media_changed) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; igb_reset(adapter); } /* This can sleep */ if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_PWR_DOWN)) device_printf(dev, "Link: thermal shutdown\n"); adapter->link_active = 0; /* This can sleep */ if_link_state_change(ifp, LINK_STATE_DOWN); /* Reset queue state */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->queue_status = IGB_QUEUE_IDLE; } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void igb_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; IGB_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("igb_stop: begin"); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* Disarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); txr->queue_status = IGB_QUEUE_IDLE; IGB_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void igb_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MAC type early for PCI setup */ e1000_set_mac_type(&adapter->hw); /* Are we a VF device? */ if ((adapter->hw.mac.type == e1000_vfadapt) || (adapter->hw.mac.type == e1000_vfadapt_i350)) adapter->vf_ifp = 1; else adapter->vf_ifp = 0; } static int igb_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->pci_mem == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->num_queues = 1; /* Defaults for Legacy or MSI */ /* This will setup either MSI/X or MSI */ adapter->msix = igb_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; #ifndef IGB_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif int error, rid = 0; /* Turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* MSI RID is 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } #ifndef IGB_LEGACY_TX TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); #endif /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); /* Make tasklet for deferred link handling */ TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); que->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Queue Interrupt handlers: * **********************************************************************/ static int igb_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; int error, rid, vector = 0; /* Be sure to start with all interrupts disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector +1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Queue Interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register Queue handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; if (adapter->hw.mac.type == e1000_82575) que->eims = E1000_EICR_TX_QUEUE0 << i; else que->eims = 1 << vector; /* ** Bind the msix vector, and thus the ** rings to the corresponding cpu. */ if (adapter->num_queues > 1) { if (igb_last_bind_cpu < 0) igb_last_bind_cpu = CPU_FIRST(); bus_bind_intr(dev, que->res, igb_last_bind_cpu); device_printf(dev, "Bound queue %d to cpu %d\n", i,igb_last_bind_cpu); igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); } #ifndef IGB_LEGACY_TX TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr); #endif /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* And Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Link Interrupt\n"); return (ENXIO); } if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register Link handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; return (0); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igb_queue *que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, que->eims); adapter->que_mask |= que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (igb_max_interrupt_rate > 0) newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); } return; } static void igb_free_pci_resources(struct adapter *adapter) { struct igb_queue *que = adapter->queues; device_t dev = adapter->dev; int rid; /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* * First release all the interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); que = adapter->queues; if (adapter->tag != NULL) { taskqueue_drain(que->tq, &adapter->link_task); bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq != NULL) { #ifndef IGB_LEGACY_TX taskqueue_drain(que->tq, &que->txr->txq_task); #endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, adapter->memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); } /* * Setup Either MSI/X or MSI */ static int igb_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int bar, want, queues, msgs, maxqueues; /* tuneable override */ if (igb_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); bar = pci_read_config(dev, adapter->memrid, 4); if (bar == 0) /* use next bar */ adapter->memrid += 4; adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; /* Manual override */ if (igb_num_queues != 0) queues = igb_num_queues; /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82575: maxqueues = 4; break; case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: maxqueues = 4; break; case e1000_i211: maxqueues = 2; break; default: /* VF interfaces */ maxqueues = 1; break; } if (queues > maxqueues) queues = maxqueues; /* Manual override */ if (igb_num_queues != 0) queues = igb_num_queues; /* ** One vector (RX/TX pair) per queue ** plus an additional for Link interrupt */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors configured, but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev," Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev," Using a Legacy interrupt\n"); return (0); } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; if (hw->mac.type == e1000_i211) return; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - adapter->max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - adapter->max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); #ifdef I210_OBFF_SUPPORT /* * Set the OBFF Rx threshold to DMA Coalescing Rx * threshold - 2KB and enable the feature in the * hardware for I210. */ if (hw->mac.type == e1000_i210) { int obff = dmac - 2; reg = E1000_READ_REG(hw, E1000_DOBFFCTL); reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK; reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK) | E1000_DOBFFCTL_EXIT_ACT_MASK; E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg); } #endif E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * adapter->max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Set up an fresh starting state * **********************************************************************/ static void igb_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; u32 pba = 0; u16 hwm; INIT_DEBUGOUT("igb_reset: begin"); /* Let the firmware know the OS is in control */ igb_get_hw_control(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; default: break; } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } INIT_DEBUGOUT1("igb_init: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); if (hw->mac.type < e1000_82576) { fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ fc->low_water = fc->high_water - 8; } else { fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; } fc->pause_time = IGB_FC_PAUSE_TIME; fc->send_xon = TRUE; if (adapter->fc) fc->requested_mode = adapter->fc; else fc->requested_mode = e1000_fc_default; /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); /* Reset for AutoMediaDetect */ if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } if (e1000_init_hw(hw) < 0) device_printf(dev, "Hardware Initialization Failed\n"); /* Setup DMA Coalescing */ igb_init_dmac(adapter, pba); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int igb_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("igb_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = igb_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = igb_ioctl; #ifndef IGB_LEGACY_TX ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; #else ifp->if_start = igb_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capenable = ifp->if_capabilities; /* Don't enable LRO by default */ ifp->if_capabilities |= IFCAP_LRO; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we * support full VLAN capability. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the igb driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, igb_media_change, igb_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int igb_dma_malloc(struct adapter *adapter, bus_size_t size, struct igb_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ IGB_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (error); } static void igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_map = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int igb_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = NULL; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct igb_queue *) malloc(sizeof(struct igb_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* Next allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; txr->num_desc = adapter->num_tx_desc; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (igb_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #ifndef IGB_LEGACY_TX /* Allocate a buf ring */ txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (igb_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) igb_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #ifndef IGB_LEGACY_TX buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int igb_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct igb_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IGB_TSO_SIZE, /* maxsize */ IGB_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ igb_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void igb_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ IGB_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); /* no need to set the address */ netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si)); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->eop = NULL; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IGB_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void igb_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) igb_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void igb_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl; INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); tctl = txdctl = 0; /* Setup the Tx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(hw, E1000_TDBAL(i)), E1000_READ_REG(hw, E1000_TDLEN(i))); txr->queue_status = IGB_QUEUE_IDLE; txdctl |= IGB_TX_PTHRESH; txdctl |= IGB_TX_HTHRESH << 8; txdctl |= IGB_TX_WTHRESH << 16; txdctl |= E1000_TXDCTL_QUEUE_ENABLE; E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } if (adapter->vf_ifp) return; e1000_config_collision_dist(hw); /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void igb_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); igb_free_transmit_buffers(txr); igb_dma_free(adapter, &txr->txdma); IGB_TX_UNLOCK(txr); IGB_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void igb_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #ifndef IGB_LEGACY_TX if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; u16 vtag = 0, eh_type; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; eh_type = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; eh_type = eh->evl_encap_proto; } switch (ntohs(eh_type)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(eh_type)); break; } ctxd = txr->next_avail_desc; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; tcp_hlen = th->th_off << 2; /* This is used in the transmit desc in encap */ paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); if (++ctxd == txr->num_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; ++txr->tso_tx; return (0); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) /* ... no offload to do */ return (0); /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); /* We've consumed the first desc, adjust counters */ if (++ctxd == txr->num_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (0); } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * * TRUE return means there's work in the ring to clean, FALSE its empty. **********************************************************************/ static bool igb_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; u32 work, processed = 0; u16 limit = txr->process_limit; struct igb_tx_buf *buf; union e1000_adv_tx_desc *txd; mtx_assert(&txr->tx_mtx, MA_OWNED); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return (FALSE); #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return FALSE; } /* Get work starting point */ work = txr->next_to_clean; buf = &txr->tx_buffers[work]; txd = &txr->tx_base[work]; work -= txr->num_desc; /* The distance to ring end */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); do { union e1000_adv_tx_desc *eop = buf->eop; if (eop == NULL) /* No work */ break; if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } buf->eop = NULL; ++txr->tx_avail; /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; ++work; /* wrap the ring? */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } ++txr->tx_avail; buf->eop = NULL; } ++txr->packets; ++processed; ++ifp->if_opackets; txr->watchdog_time = ticks; /* Try the next packet */ ++txd; ++buf; ++work; /* reset with a wrap */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); work += txr->num_desc; txr->next_to_clean = work; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. */ if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) txr->queue_status |= IGB_QUEUE_HUNG; if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) txr->queue_status &= ~IGB_QUEUE_DEPLETED; if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return (FALSE); } return (TRUE); } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void igb_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct igb_rx_buf *rxbuf; struct mbuf *mh, *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; /* No hdr mbuf used with header split off */ if (rxr->hdr_split == FALSE) goto no_split; if (rxbuf->m_head == NULL) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) goto update; } else mh = rxbuf->m_head; mh->m_pkthdr.len = mh->m_len = MHLEN; mh->m_len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(mh); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = mh; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr); no_split: if (rxbuf->m_pack == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) goto update; } else mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->m_pack = NULL; goto update; } rxbuf->m_pack = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr); refreshed = TRUE; /* I feel wefreshed :) */ i = j; /* our next is precalculated */ rxr->next_to_refresh = i; if (++j == adapter->num_rx_desc) j = 0; } update: if (refreshed) /* update tail */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int igb_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct igb_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; if (!(rxr->rx_buffers = (struct igb_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX payload DMA tag\n"); goto fail; } for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head DMA maps\n"); goto fail; } error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX packet DMA maps\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ igb_free_receive_structures(adapter); return (error); } static void igb_free_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int igb_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct igb_rx_buf *rxbuf; bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(rxr->adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ adapter = rxr->adapter; dev = adapter->dev; ifp = adapter->ifp; /* Clear the ring contents */ IGB_RX_LOCK(rxr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* ** Free current RX buffer structures and their mbufs */ igb_free_receive_ring(rxr); /* Configure for header split? */ if (igb_header_split) rxr->hdr_split = TRUE; /* Now replenish the ring mbufs */ for (int j = 0; j < adapter->num_rx_desc; ++j) { struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { /* slot sj is mapped to the i-th NIC-ring entry */ int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(slot + sj, &paddr); netmap_load_map(rxr->ptag, rxbuf->pmap, addr); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(rxbuf->m_head, ETHER_ALIGN); mh = rxbuf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, rxbuf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = adapter->num_rx_desc - 1; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; rxr->rx_bytes = 0; rxr->fmp = NULL; rxr->lmp = NULL; rxr->discard = FALSE; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface, we ** also only do head split when LRO ** is enabled, since so often they ** are undesireable in similar setups. */ if (ifp->if_capenable & IFCAP_LRO) { error = tcp_lro_init(lro); if (error) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IGB_RX_UNLOCK(rxr); return (0); fail: igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int igb_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int i; for (i = 0; i < adapter->num_queues; i++, rxr++) if (igb_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'i' is the endpoint. */ for (int j = 0; j < i; ++j) { rxr = &adapter->rx_rings[j]; IGB_RX_LOCK(rxr); igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); } return (ENOBUFS); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void igb_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, psize, srrctl = 0; INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* ** Set up for header split */ if (igb_header_split) { /* Use a standard mbuf for the header */ srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; /* ** Set up for jumbo frames */ if (ifp->if_mtu > ETHERMTU) { rctl |= E1000_RCTL_LPE; if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } /* Set maximum packet len */ psize = adapter->max_frame_size; /* are we on a vlan? */ if (adapter->ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { rctl &= ~E1000_RCTL_LPE; srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 bus_addr = rxr->rxdma.dma_paddr; u32 rxdctl; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } /* ** Setup for RX MultiQueue */ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (adapter->num_queues >1) { u32 random[10], mrqc, shift = 0; union igb_reta { u32 dword; u8 bytes[4]; } reta; arc4rand(&random, sizeof(random), 0); if (adapter->hw.mac.type == e1000_82575) shift = 6; /* Warning FM follows */ for (int i = 0; i < 128; i++) { reta.bytes[i & 3] = (i % adapter->num_queues) << shift; if ((i & 3) == 3) E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword); } /* Now fill in hash table */ mrqc = E1000_MRQC_ENABLE_RSS_4Q; for (int i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, random[i]); mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); /* ** NOTE: Receive Full-Packet Checksum Offload ** is mutually exclusive with Multiqueue. However ** this is not the same as TCP/IP checksums which ** still work. */ rxcsum |= E1000_RXCSUM_PCSD; #if __FreeBSD_version >= 800000 /* For SCTP Offload */ if ((hw->mac.type == e1000_82576) && (ifp->if_capenable & IFCAP_RXCSUM)) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else { /* Non RSS setup */ if (ifp->if_capenable & IFCAP_RXCSUM) { rxcsum |= E1000_RXCSUM_IPPCSE; #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type == e1000_82576) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; } E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip CRC bytes. */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; /* Don't store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Receives */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and Tail Descriptor Pointers * - needs to be after enable */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. * In this driver it means we adjust RDT to * something different from next_to_refresh * (which is not used in netmap mode). */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = rxr->next_to_refresh - nm_kr_rxspace(kring); if (t >= adapter->num_rx_desc) t -= adapter->num_rx_desc; else if (t < 0) t += adapter->num_rx_desc; E1000_WRITE_REG(hw, E1000_RDT(i), t); } else #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); } return; } /********************************************************************* * * Free receive rings. * **********************************************************************/ static void igb_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; igb_free_receive_buffers(rxr); tcp_lro_free(lro); igb_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures. * **********************************************************************/ static void igb_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; int i; INIT_DEBUGOUT("free_receive_structures: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; if (rxbuf->hmap != NULL) { bus_dmamap_destroy(rxr->htag, rxbuf->hmap); rxbuf->hmap = NULL; } if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } } static __inline void igb_rx_discard(struct rx_ring *rxr, int i) { struct igb_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; /* Partially received? Free the chain */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; bus_dmamap_unload(rxr->htag, rbuf->hmap); } if (rbuf->m_pack) { m_free(rbuf->m_pack); rbuf->m_pack = NULL; bus_dmamap_unload(rxr->ptag, rbuf->pmap); } return; } static __inline void igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IGB_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IGB_RX_LOCK(rxr); } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE if more to clean, FALSE otherwise *********************************************************************/ static bool igb_rxeof(struct igb_queue *que, int count, int *done) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, processed = 0, rxdone = 0; u32 ptype, staterr = 0; union e1000_adv_rx_desc *cur; IGB_RX_LOCK(rxr); /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { IGB_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ /* Main clean loop */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; struct igb_rx_buf *rxbuf; u16 hlen, plen, hdr, vtag; bool eop = FALSE; cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = mh = mp = NULL; cur->wb.upper.status_error = 0; rxbuf = &rxr->rx_buffers[i]; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; if (((adapter->hw.mac.type == e1000_i350) || (adapter->hw.mac.type == e1000_i354)) && (staterr & E1000_RXDEXT_STATERR_LB)) vtag = be16toh(cur->wb.upper.vlan); else vtag = le16toh(cur->wb.upper.vlan); hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); /* Make sure all segments of a bad packet are discarded */ if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) || (rxr->discard)) { adapter->dropped_pkts++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ rxr->discard = TRUE; else rxr->discard = FALSE; igb_rx_discard(rxr, i); goto next_desc; } /* ** The way the hardware is configured to ** split, it will ONLY use the header buffer ** when header split is enabled, otherwise we ** get normal behavior, ie, both header and ** payload are DMA'd into the payload buffer. ** ** The fmp test is to catch the case where a ** packet spans multiple descriptors, in that ** case only the first header is valid. */ if (rxr->hdr_split && rxr->fmp == NULL) { bus_dmamap_unload(rxr->htag, rxbuf->hmap); hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IGB_HDR_BUF) hlen = IGB_HDR_BUF; mh = rxr->rx_buffers[i].m_head; mh->m_len = hlen; /* clear buf pointer for refresh */ rxbuf->m_head = NULL; /* ** Get the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp = rxr->rx_buffers[i].m_pack; mp->m_len = plen; mh->m_next = mp; /* clear buf pointer */ rxbuf->m_pack = NULL; rxr->rx_split_packets++; } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mh = rxr->rx_buffers[i].m_pack; mh->m_len = plen; /* clear buf info for refresh */ rxbuf->m_pack = NULL; } bus_dmamap_unload(rxr->ptag, rxbuf->pmap); ++processed; /* So we know when to refresh */ /* Initial frame - setup */ if (rxr->fmp == NULL) { mh->m_pkthdr.len = mh->m_len; /* Save the head of the chain */ rxr->fmp = mh; rxr->lmp = mh; if (mp != NULL) { /* Add payload if split */ mh->m_pkthdr.len += mp->m_len; rxr->lmp = mh->m_next; } } else { /* Chain mbuf's together */ rxr->lmp->m_next = mh; rxr->lmp = rxr->lmp->m_next; rxr->fmp->m_pkthdr.len += mh->m_len; } if (eop) { rxr->fmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; rxr->rx_packets++; /* capture data for AIM */ rxr->packets++; rxr->bytes += rxr->fmp->m_pkthdr.len; rxr->rx_bytes += rxr->fmp->m_pkthdr.len; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, rxr->fmp, ptype); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { rxr->fmp->m_pkthdr.ether_vtag = vtag; rxr->fmp->m_flags |= M_VLANTAG; } #ifndef IGB_LEGACY_TX rxr->fmp->m_pkthdr.flowid = que->msix; rxr->fmp->m_flags |= M_FLOWID; #endif sendmp = rxr->fmp; /* Make sure to set M_PKTHDR. */ sendmp->m_flags |= M_PKTHDR; rxr->fmp = NULL; rxr->lmp = NULL; } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* ** Send to the stack or LRO */ if (sendmp != NULL) { rxr->next_to_check = i; igb_rx_input(rxr, ifp, sendmp, ptype); i = rxr->next_to_check; rxdone++; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { igb_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remainders */ if (igb_rx_unrefreshed(rxr)) igb_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } if (done != NULL) *done += rxdone; IGB_RX_UNLOCK(rxr); return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); int sctp; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; #endif /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (sctp == 0) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* * This routine is run via an vlan * config EVENT */ static void igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Change hw filter setting */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Change hw filter setting */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 reg; if (adapter->vf_ifp) { e1000_rlpml_set_vf(hw, adapter->max_frame_size + VLAN_TAG_SIZE); return; } reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } /* Update the frame size */ E1000_WRITE_REG(&adapter->hw, E1000_RLPML, adapter->max_frame_size + VLAN_TAG_SIZE); /* Don't bother with table if no vlans */ if ((adapter->num_vlans == 0) || ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IGB_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) { if (adapter->vf_ifp) e1000_vfta_set_vf(hw, adapter->shadow_vfta[i], TRUE); else e1000_write_vfta(hw, i, adapter->shadow_vfta[i]); } } static void igb_enable_intr(struct adapter *adapter) { /* With RSS set up what to auto clear */ if (adapter->msix_mem) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK); } E1000_WRITE_FLUSH(&adapter->hw); return; } static void igb_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) { E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); return; } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void igb_init_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; manc2h |= 1 << 5; /* Mng Port 623 */ manc2h |= 1 << 6; /* Mng Port 664 */ E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void igb_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is loaded. * */ static void igb_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware know the driver has taken over */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. * */ static void igb_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware taken over control of h/w */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); } static int igb_is_valid_ether_addr(uint8_t *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* * Enable PCI Wake On Lan capability */ static void igb_enable_wakeup(device_t dev) { u16 cap, status; u8 id; /* First find the capabilities pointer*/ cap = pci_read_config(dev, PCIR_CAP_PTR, 2); /* Read the PM Capabilities */ id = pci_read_config(dev, cap, 1); if (id != PCIY_PMG) /* Something wrong */ return; /* OK, we have the power capabilities, so now get the status register */ cap += PCIR_POWER_STATUS; status = pci_read_config(dev, cap, 2); status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, cap, status, 2); return; } static void igb_led_func(void *arg, int onoff) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } IGB_CORE_UNLOCK(adapter); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void igb_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp; struct e1000_hw *hw = &adapter->hw; struct e1000_hw_stats *stats; /* ** The virtual function adapter has only a ** small controlled set of stats, do only ** those and return. */ if (adapter->vf_ifp) { igb_update_vf_stats_counters(adapter); return; } stats = (struct e1000_hw_stats *)adapter->stats; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { stats->symerrs += E1000_READ_REG(hw,E1000_SYMERRS); stats->sec += E1000_READ_REG(hw, E1000_SEC); } stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); stats->mpc += E1000_READ_REG(hw, E1000_MPC); stats->scc += E1000_READ_REG(hw, E1000_SCC); stats->ecol += E1000_READ_REG(hw, E1000_ECOL); stats->mcc += E1000_READ_REG(hw, E1000_MCC); stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); stats->colc += E1000_READ_REG(hw, E1000_COLC); stats->dc += E1000_READ_REG(hw, E1000_DC); stats->rlec += E1000_READ_REG(hw, E1000_RLEC); stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); stats->xoffrxc += adapter->pause_frames; stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); stats->gprc += E1000_READ_REG(hw, E1000_GPRC); stats->bprc += E1000_READ_REG(hw, E1000_BPRC); stats->mprc += E1000_READ_REG(hw, E1000_MPRC); stats->gptc += E1000_READ_REG(hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); stats->ruc += E1000_READ_REG(hw, E1000_RUC); stats->rfc += E1000_READ_REG(hw, E1000_RFC); stats->roc += E1000_READ_REG(hw, E1000_ROC); stats->rjc += E1000_READ_REG(hw, E1000_RJC); stats->tor += E1000_READ_REG(hw, E1000_TORH); stats->tot += E1000_READ_REG(hw, E1000_TOTH); stats->tpr += E1000_READ_REG(hw, E1000_TPR); stats->tpt += E1000_READ_REG(hw, E1000_TPT); stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); stats->mptc += E1000_READ_REG(hw, E1000_MPTC); stats->bptc += E1000_READ_REG(hw, E1000_BPTC); /* Interrupt Counts */ stats->iac += E1000_READ_REG(hw, E1000_IAC); stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); /* Host to Card Statistics */ stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); ifp = adapter->ifp; ifp->if_collisions = stats->colc; /* Rx Errors */ ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc + stats->crcerrs + stats->algnerrc + stats->ruc + stats->roc + stats->mpc + stats->cexterr; /* Tx Errors */ ifp->if_oerrors = stats->ecol + stats->latecol + adapter->watchdog_events; /* Driver specific counters */ adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); adapter->packet_buf_alloc_tx = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); adapter->packet_buf_alloc_rx = (E1000_READ_REG(hw, E1000_PBA) & 0xffff); } /********************************************************************** * * Initialize the VF board statistics counters. * **********************************************************************/ static void igb_vf_init_stats(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; stats = (struct e1000_vf_stats *)adapter->stats; if (stats == NULL) return; stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); } /********************************************************************** * * Update the VF board statistics counters. * **********************************************************************/ static void igb_update_vf_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; if (adapter->link_speed == 0) return; stats = (struct e1000_vf_stats *)adapter->stats; UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); } /* Export a single 32-bit register via a read-only sysctl. */ static int igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* ** Tuneable interrupt rate handler */ static int igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); int error; u32 reg, usec, rate; reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); usec = ((reg & 0x7FFC) >> 2); if (usec > 0) rate = 1000000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; return 0; } /* * Add sysctl variables, one per statistic, to the system. */ static void igb_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ - SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", - CTLFLAG_RD, &adapter->link_irq, 0, + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", + CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", CTLFLAG_RD, &adapter->device_control, "Device Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", CTLFLAG_RD, &adapter->rx_control, "Receiver Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", CTLFLAG_RD, &adapter->int_mask, "Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", CTLFLAG_RD, &adapter->eint_mask, "Extended Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_tx, "Transmit Buffer Packet Allocation"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_rx, "Receive Buffer Packet Allocation"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", - CTLFLAG_RD, &adapter->queues[i], + CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], sizeof(&adapter->queues[i]), igb_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", - CTLFLAG_RD, adapter, E1000_TDH(txr->me), + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", - CTLFLAG_RD, adapter, E1000_TDT(txr->me), + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", - CTLFLAG_RD, adapter, E1000_RDH(rxr->me), + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", - CTLFLAG_RD, adapter, E1000_RDT(rxr->me), + CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); /* ** VF adapter has a very limited set of stats ** since its not managing the metal, so to speak. */ if (adapter->vf_ifp) { SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); return; } SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &stats->symerrs, "Symbol Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &stats->sec, "Sequence Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &stats->dc, "Defer Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &stats->mpc, "Missed Packets"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &stats->rnbc, "Receive No Buffers"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &stats->ruc, "Receive Undersize"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &stats->rjc, "Recevied Jabber"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &stats->rxerrc, "Receive Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &stats->algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &stats->cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->xonrxc, "XON Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->xontxc, "XON Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->xoffrxc, "XOFF Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &stats->tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &stats->tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &stats->iac, "Interrupt Assertion Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &stats->icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &stats->icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &stats->ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &stats->ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &stats->ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &stats->ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &stats->icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &stats->icrxoc, "Interrupt Cause Receiver Overrun Count"); /* Host to Card Stats */ host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", CTLFLAG_RD, NULL, "Host to Card Statistics"); host_list = SYSCTL_CHILDREN(host_node); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", CTLFLAG_RD, &stats->cbtmpc, "Circuit Breaker Tx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", CTLFLAG_RD, &stats->htdpmc, "Host Transmit Discarded Packets"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", CTLFLAG_RD, &stats->rpthc, "Rx Packets To Host"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", CTLFLAG_RD, &stats->cbrmpc, "Circuit Breaker Rx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", CTLFLAG_RD, &stats->cbrdpc, "Circuit Breaker Rx Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", CTLFLAG_RD, &stats->hgptc, "Host Good Packets Tx Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", CTLFLAG_RD, &stats->htcbdpc, "Host Tx Circuit Breaker Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", CTLFLAG_RD, &stats->hgorc, "Host Good Octets Received Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", CTLFLAG_RD, &stats->hgotc, "Host Good Octets Transmit Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", CTLFLAG_RD, &stats->lenerrs, "Length Errors"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", CTLFLAG_RD, &stats->scvpc, "SerDes/SGMII Code Violation Pkt Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", CTLFLAG_RD, &stats->hrmpc, "Header Redirection Missed Packet Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; igb_print_nvm_info(adapter); } return (error); } static void igb_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static void igb_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); + OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* ** Manage DMA Coalesce: ** Control values: ** 0/1 - off/on ** Legal timer values are: ** 250,500,1000-10000 in thousands */ static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (adapter->dmac) { case 0: /*Disabling */ break; case 1: /* Just enable and use default */ adapter->dmac = 1000; break; case 250: case 500: case 1000: case 2000: case 3000: case 4000: case 5000: case 6000: case 7000: case 8000: case 9000: case 10000: /* Legal values - allow */ break; default: /* Do nothing, illegal value */ adapter->dmac = 0; return (EINVAL); } /* Reinit the interface */ igb_init(adapter); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec._82575.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); IGB_CORE_LOCK(adapter); adapter->hw.dev_spec._82575.eee_disable = (value != 0); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } Index: stable/9/sys/dev/e1000/if_lem.c =================================================================== --- stable/9/sys/dev/e1000/if_lem.c (revision 273911) +++ stable/9/sys/dev/e1000/if_lem.c (revision 273912) @@ -1,4662 +1,4662 @@ /****************************************************************************** Copyright (c) 2001-2012, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "if_lem.h" /********************************************************************* * Legacy Em Driver version: *********************************************************************/ char lem_driver_version[] = "1.0.6"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t lem_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82540EM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EM_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EP_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EP_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541ER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541ER_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541GI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541GI_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541GI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82542, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82543GC_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82543GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544GC_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545EM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545EM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545GM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545GM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545GM_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_PCIE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82547EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82547EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82547GI, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *lem_strings[] = { "Intel(R) PRO/1000 Legacy Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int lem_probe(device_t); static int lem_attach(device_t); static int lem_detach(device_t); static int lem_shutdown(device_t); static int lem_suspend(device_t); static int lem_resume(device_t); static void lem_start(struct ifnet *); static void lem_start_locked(struct ifnet *ifp); static int lem_ioctl(struct ifnet *, u_long, caddr_t); static void lem_init(void *); static void lem_init_locked(struct adapter *); static void lem_stop(void *); static void lem_media_status(struct ifnet *, struct ifmediareq *); static int lem_media_change(struct ifnet *); static void lem_identify_hardware(struct adapter *); static int lem_allocate_pci_resources(struct adapter *); static int lem_allocate_irq(struct adapter *adapter); static void lem_free_pci_resources(struct adapter *); static void lem_local_timer(void *); static int lem_hardware_init(struct adapter *); static int lem_setup_interface(device_t, struct adapter *); static void lem_setup_transmit_structures(struct adapter *); static void lem_initialize_transmit_unit(struct adapter *); static int lem_setup_receive_structures(struct adapter *); static void lem_initialize_receive_unit(struct adapter *); static void lem_enable_intr(struct adapter *); static void lem_disable_intr(struct adapter *); static void lem_free_transmit_structures(struct adapter *); static void lem_free_receive_structures(struct adapter *); static void lem_update_stats_counters(struct adapter *); static void lem_add_hw_stats(struct adapter *adapter); static void lem_txeof(struct adapter *); static void lem_tx_purge(struct adapter *); static int lem_allocate_receive_structures(struct adapter *); static int lem_allocate_transmit_structures(struct adapter *); static bool lem_rxeof(struct adapter *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int lem_fixup_rx(struct adapter *); #endif static void lem_receive_checksum(struct adapter *, struct e1000_rx_desc *, struct mbuf *); static void lem_transmit_checksum_setup(struct adapter *, struct mbuf *, u32 *, u32 *); static void lem_set_promisc(struct adapter *); static void lem_disable_promisc(struct adapter *); static void lem_set_multi(struct adapter *); static void lem_update_link_status(struct adapter *); static int lem_get_buf(struct adapter *, int); static void lem_register_vlan(void *, struct ifnet *, u16); static void lem_unregister_vlan(void *, struct ifnet *, u16); static void lem_setup_vlan_hw_support(struct adapter *); static int lem_xmit(struct adapter *, struct mbuf **); static void lem_smartspeed(struct adapter *); static int lem_82547_fifo_workaround(struct adapter *, int); static void lem_82547_update_fifo_head(struct adapter *, int); static int lem_82547_tx_fifo_reset(struct adapter *); static void lem_82547_move_tail(void *); static int lem_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void lem_dma_free(struct adapter *, struct em_dma_alloc *); static int lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void lem_print_nvm_info(struct adapter *); static int lem_is_valid_ether_addr(u8 *); static u32 lem_fill_descriptors (bus_addr_t address, u32 length, PDESC_ARRAY desc_array); static int lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void lem_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); static void lem_set_flow_cntrl(struct adapter *, const char *, const char *, int *, int); /* Management and WOL Support */ static void lem_init_manageability(struct adapter *); static void lem_release_manageability(struct adapter *); static void lem_get_hw_control(struct adapter *); static void lem_release_hw_control(struct adapter *); static void lem_get_wakeup(device_t); static void lem_enable_wakeup(device_t); static int lem_enable_phy_wakeup(struct adapter *); static void lem_led_func(void *, int); static void lem_intr(void *); static int lem_irq_fast(void *); static void lem_handle_rxtx(void *context, int pending); static void lem_handle_link(void *context, int pending); static void lem_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); #ifdef DEVICE_POLLING static poll_handler_t lem_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t lem_methods[] = { /* Device interface */ DEVMETHOD(device_probe, lem_probe), DEVMETHOD(device_attach, lem_attach), DEVMETHOD(device_detach, lem_detach), DEVMETHOD(device_shutdown, lem_shutdown), DEVMETHOD(device_suspend, lem_suspend), DEVMETHOD(device_resume, lem_resume), DEVMETHOD_END }; static driver_t lem_driver = { "em", lem_methods, sizeof(struct adapter), }; extern devclass_t em_devclass; DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0); MODULE_DEPEND(lem, pci, 1, 1, 1); MODULE_DEPEND(lem, ether, 1, 1, 1); /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); static int lem_rxd = EM_DEFAULT_RXD; static int lem_txd = EM_DEFAULT_TXD; static int lem_smart_pwr_down = FALSE; /* Controls whether promiscuous also shows bad packets */ static int lem_debug_sbp = FALSE; TUNABLE_INT("hw.em.tx_int_delay", &lem_tx_int_delay_dflt); TUNABLE_INT("hw.em.rx_int_delay", &lem_rx_int_delay_dflt); TUNABLE_INT("hw.em.tx_abs_int_delay", &lem_tx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rx_abs_int_delay", &lem_rx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rxd", &lem_rxd); TUNABLE_INT("hw.em.txd", &lem_txd); TUNABLE_INT("hw.em.smart_pwr_down", &lem_smart_pwr_down); TUNABLE_INT("hw.em.sbp", &lem_debug_sbp); /* Interrupt style - default to fast */ static int lem_use_legacy_irq = 0; TUNABLE_INT("hw.em.use_legacy_irq", &lem_use_legacy_irq); /* How many packets rxeof tries to clean at a time */ static int lem_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &lem_rx_process_limit); /* Flow control setting - default to FULL */ static int lem_fc_setting = e1000_fc_full; TUNABLE_INT("hw.em.fc_setting", &lem_fc_setting); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int lem_probe(device_t dev) { char adapter_name[60]; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = lem_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", lem_strings[ent->index], lem_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int lem_attach(device_t dev) { struct adapter *adapter; int tsize, rsize; int error = 0; INIT_DEBUGOUT("lem_attach: begin"); adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev)); EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, lem_sysctl_nvm_info, "I", "NVM Information"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0); /* Determine hardware and mac info */ lem_identify_hardware(adapter); /* Setup PCI resources */ if (lem_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Set up some sysctls for the tunable interrupt delays */ lem_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(&adapter->hw, E1000_RDTR), lem_rx_int_delay_dflt); lem_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(&adapter->hw, E1000_TIDV), lem_tx_int_delay_dflt); if (adapter->hw.mac.type >= e1000_82540) { lem_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_RADV), lem_rx_abs_int_delay_dflt); lem_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_TADV), lem_tx_abs_int_delay_dflt); lem_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(&adapter->hw, E1000_ITR), DEFAULT_ITR); } /* Sysctls for limiting the amount of work done in the taskqueue */ lem_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, lem_rx_process_limit); /* Sysctl for setting the interface flow control */ lem_set_flow_cntrl(adapter, "flow_control", "flow control setting", &adapter->fc_setting, lem_fc_setting); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((lem_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (adapter->hw.mac.type >= e1000_82544 && lem_txd > EM_MAX_TXD) || (adapter->hw.mac.type < e1000_82544 && lem_txd > EM_MAX_TXD_82543) || (lem_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, lem_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = lem_txd; if (((lem_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || (adapter->hw.mac.type >= e1000_82544 && lem_rxd > EM_MAX_RXD) || (adapter->hw.mac.type < e1000_82544 && lem_rxd > EM_MAX_RXD_82543) || (lem_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, lem_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = lem_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; adapter->rx_buffer_len = 2048; e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ adapter->hw.mac.report_tx_early = 1; tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* Allocate Transmit Descriptor ring */ if (lem_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate tx_desc memory\n"); error = ENOMEM; goto err_tx_desc; } adapter->tx_desc_base = (struct e1000_tx_desc *)adapter->txdma.dma_vaddr; rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); /* Allocate Receive Descriptor ring */ if (lem_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate rx_desc memory\n"); error = ENOMEM; goto err_rx_desc; } adapter->rx_desc_base = (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_hw_init; } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_hw_init; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_hw_init; } if (!lem_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_hw_init; } /* Initialize the hardware */ if (lem_hardware_init(adapter)) { device_printf(dev, "Unable to initialize the hardware\n"); error = EIO; goto err_hw_init; } /* Allocate transmit descriptors and buffers */ if (lem_allocate_transmit_structures(adapter)) { device_printf(dev, "Could not setup transmit structures\n"); error = ENOMEM; goto err_tx_struct; } /* Allocate receive descriptors and buffers */ if (lem_allocate_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); error = ENOMEM; goto err_rx_struct; } /* ** Do interrupt configuration */ error = lem_allocate_irq(adapter); if (error) goto err_rx_struct; /* * Get Wake-on-Lan and Management info for later use */ lem_get_wakeup(dev); /* Setup OS specific network interface */ if (lem_setup_interface(dev, adapter) != 0) goto err_rx_struct; /* Initialize statistics */ lem_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; lem_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Do we need workaround for 82544 PCI-X adapter? */ if (adapter->hw.bus.type == e1000_bus_type_pcix && adapter->hw.mac.type == e1000_82544) adapter->pcix_82544 = TRUE; else adapter->pcix_82544 = FALSE; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lem_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, lem_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); lem_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) lem_get_hw_control(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); adapter->led_dev = led_create(lem_led_func, adapter, device_get_nameunit(dev)); #ifdef DEV_NETMAP lem_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("lem_attach: end"); return (0); err_rx_struct: lem_free_transmit_structures(adapter); err_tx_struct: err_hw_init: lem_release_hw_control(adapter); lem_dma_free(adapter, &adapter->rxdma); err_rx_desc: lem_dma_free(adapter, &adapter->txdma); err_tx_desc: err_pci: if (adapter->ifp != NULL) if_free(adapter->ifp); lem_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_TX_LOCK_DESTROY(adapter); EM_RX_LOCK_DESTROY(adapter); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int lem_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); EM_TX_LOCK(adapter); adapter->in_detach = 1; lem_stop(adapter); e1000_phy_hw_reset(&adapter->hw); lem_release_manageability(adapter); EM_TX_UNLOCK(adapter); EM_CORE_UNLOCK(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); callout_drain(&adapter->tx_fifo_timer); #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ lem_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); lem_free_transmit_structures(adapter); lem_free_receive_structures(adapter); /* Free Transmit Descriptor ring */ if (adapter->tx_desc_base) { lem_dma_free(adapter, &adapter->txdma); adapter->tx_desc_base = NULL; } /* Free Receive Descriptor ring */ if (adapter->rx_desc_base) { lem_dma_free(adapter, &adapter->rxdma); adapter->rx_desc_base = NULL; } lem_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); EM_TX_LOCK_DESTROY(adapter); EM_RX_LOCK_DESTROY(adapter); EM_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int lem_shutdown(device_t dev) { return lem_suspend(dev); } /* * Suspend/resume device methods. */ static int lem_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); lem_release_manageability(adapter); lem_release_hw_control(adapter); lem_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int lem_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; EM_CORE_LOCK(adapter); lem_init_locked(adapter); lem_init_manageability(adapter); EM_CORE_UNLOCK(adapter); lem_start(ifp); return bus_generic_resume(dev); } static void lem_start_locked(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; EM_TX_LOCK_ASSERT(adapter); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* * Force a cleanup if number of TX descriptors * available hits the threshold */ if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) { lem_txeof(adapter); /* Now do we at least have a minimal? */ if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { adapter->no_tx_desc_avail1++; return; } } while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (lem_xmit(adapter, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set timeout in case hardware has problems transmitting. */ adapter->watchdog_check = TRUE; adapter->watchdog_time = ticks; } if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } static void lem_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; EM_TX_LOCK(adapter); if (ifp->if_drv_flags & IFF_DRV_RUNNING) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); } /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) lem_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82542: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { lem_disable_promisc(adapter); lem_set_promisc(adapter); } } else lem_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { EM_TX_LOCK(adapter); lem_stop(adapter); EM_TX_UNLOCK(adapter); } adapter->if_flags = ifp->if_flags; EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); lem_disable_intr(adapter); lem_set_multi(adapter); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { lem_initialize_receive_unit(adapter); } #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif lem_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(lem_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); lem_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); lem_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if ((mask & IFCAP_WOL) && (ifp->if_capabilities & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) ifp->if_capenable ^= IFCAP_WOL_MCAST; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) lem_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void lem_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; u32 pba; INIT_DEBUGOUT("lem_init: begin"); EM_CORE_LOCK_ASSERT(adapter); EM_TX_LOCK(adapter); lem_stop(adapter); EM_TX_UNLOCK(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. * * Devices before the 82547 had a Packet Buffer of 64K. * Default allocation: PBA=48K for Rx, leaving 16K for Tx. * After the 82547 the buffer was reduced to 40K. * Default allocation: PBA=30K for Rx, leaving 10K for Tx. * Note: default does not leave enough room for Jumbo Frame >10k. */ switch (adapter->hw.mac.type) { case e1000_82547: case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */ if (adapter->max_frame_size > 8192) pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ else pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ adapter->tx_fifo_head = 0; adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT; adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT; break; default: /* Devices before 82547 had a Packet Buffer of 64K. */ if (adapter->max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } INIT_DEBUGOUT1("lem_init: pba=%dK",pba); E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Initialize the hardware */ if (lem_hardware_init(adapter)) { device_printf(dev, "Unable to initialize the hardware\n"); return; } lem_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (adapter->hw.mac.type >= e1000_82543) { if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); } /* Configure for OS presence */ lem_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ lem_setup_transmit_structures(adapter); lem_initialize_transmit_unit(adapter); /* Setup Multicast table */ lem_set_multi(adapter); /* Prepare receive descriptors and buffers */ if (lem_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); EM_TX_LOCK(adapter); lem_stop(adapter); EM_TX_UNLOCK(adapter); return; } lem_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ lem_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ lem_set_promisc(adapter); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, lem_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) lem_disable_intr(adapter); else #endif /* DEVICE_POLLING */ lem_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) lem_get_hw_control(adapter); } static void lem_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine * *********************************************************************/ static int lem_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; u32 reg_icr, rx_done = 0; EM_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; lem_update_link_status(adapter); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); lem_rxeof(adapter, count, &rx_done); EM_TX_LOCK(adapter); lem_txeof(adapter); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Legacy Interrupt Service routine * *********************************************************************/ static void lem_intr(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; u32 reg_icr; if ((ifp->if_capenable & IFCAP_POLLING) || ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)) return; EM_CORE_LOCK(adapter); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if ((reg_icr == 0xffffffff) || (reg_icr == 0)) { EM_CORE_UNLOCK(adapter); return; } if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; lem_update_link_status(adapter); /* Deal with TX cruft when link lost */ lem_tx_purge(adapter); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); EM_CORE_UNLOCK(adapter); return; } EM_CORE_UNLOCK(adapter); lem_rxeof(adapter, -1, NULL); EM_TX_LOCK(adapter); lem_txeof(adapter); if (ifp->if_drv_flags & IFF_DRV_RUNNING && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); return; } static void lem_handle_link(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); lem_update_link_status(adapter); /* Deal with TX cruft when link lost */ lem_tx_purge(adapter); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); EM_CORE_UNLOCK(adapter); } /* Combined RX/TX handler, used by Legacy and MSI */ static void lem_handle_rxtx(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); EM_TX_LOCK(adapter); lem_txeof(adapter); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); if (more) { taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); return; } } if (ifp->if_drv_flags & IFF_DRV_RUNNING) lem_enable_intr(adapter); } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int lem_irq_fast(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ lem_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void lem_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("lem_media_status: begin"); EM_CORE_LOCK(adapter); lem_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int lem_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("lem_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int lem_xmit(struct adapter *adapter, struct mbuf **m_headp) { bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_buffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; u32 txd_upper, txd_lower, txd_used, txd_saved; int error, nsegs, i, j, first, last = 0; m_head = *m_headp; txd_upper = txd_lower = txd_used = txd_saved = 0; /* ** When doing checksum offload, it is critical to ** make sure the first mbuf has more than header, ** because that routine expects data to be present. */ if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) && (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) { m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip)); *m_headp = m_head; if (m_head == NULL) return (ENOBUFS); } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = adapter->next_avail_tx_desc; tx_buffer = &adapter->tx_buffer_area[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG) { struct mbuf *m; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_alloc_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again */ error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } else if (error != 0) { adapter->no_tx_dma_setup++; return (error); } if (nsegs > (adapter->num_tx_desc_avail - 2)) { adapter->no_tx_desc_avail2++; bus_dmamap_unload(adapter->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) lem_transmit_checksum_setup(adapter, m_head, &txd_upper, &txd_lower); i = adapter->next_avail_tx_desc; if (adapter->pcix_82544) txd_saved = i; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; /* If adapter is 82544 and on PCIX bus */ if(adapter->pcix_82544) { DESC_ARRAY desc_array; u32 array_elements, counter; /* * Check the Address and Length combination and * split the data accordingly */ array_elements = lem_fill_descriptors(segs[j].ds_addr, segs[j].ds_len, &desc_array); for (counter = 0; counter < array_elements; counter++) { if (txd_used == adapter->num_tx_desc_avail) { adapter->next_avail_tx_desc = txd_saved; adapter->no_tx_desc_avail2++; bus_dmamap_unload(adapter->txtag, map); return (ENOBUFS); } tx_buffer = &adapter->tx_buffer_area[i]; ctxd = &adapter->tx_desc_base[i]; ctxd->buffer_addr = htole64( desc_array.descriptor[counter].address); ctxd->lower.data = htole32( (adapter->txd_cmd | txd_lower | (u16) desc_array.descriptor[counter].length)); ctxd->upper.data = htole32((txd_upper)); last = i; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; txd_used++; } } else { tx_buffer = &adapter->tx_buffer_area[i]; ctxd = &adapter->tx_desc_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } } adapter->next_avail_tx_desc = i; if (adapter->pcix_82544) adapter->num_tx_desc_avail -= txd_used; else adapter->num_tx_desc_avail -= nsegs; if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ ctxd->upper.fields.special = htole16(m_head->m_pkthdr.ether_vtag); /* Tell hardware to add tag */ ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); } tx_buffer->m_head = m_head; tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &adapter->tx_buffer_area[first]; tx_buffer->next_eop = last; adapter->watchdog_time = ticks; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (adapter->hw.mac.type == e1000_82547 && adapter->link_duplex == HALF_DUPLEX) lem_82547_move_tail(adapter); else { E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i); if (adapter->hw.mac.type == e1000_82547) lem_82547_update_fifo_head(adapter, m_head->m_pkthdr.len); } return (0); } /********************************************************************* * * 82547 workaround to avoid controller hang in half-duplex environment. * The workaround is to avoid queuing a large packet that would span * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers * in this case. We do that only when FIFO is quiescent. * **********************************************************************/ static void lem_82547_move_tail(void *arg) { struct adapter *adapter = arg; struct e1000_tx_desc *tx_desc; u16 hw_tdt, sw_tdt, length = 0; bool eop = 0; EM_TX_LOCK_ASSERT(adapter); hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0)); sw_tdt = adapter->next_avail_tx_desc; while (hw_tdt != sw_tdt) { tx_desc = &adapter->tx_desc_base[hw_tdt]; length += tx_desc->lower.flags.length; eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; if (++hw_tdt == adapter->num_tx_desc) hw_tdt = 0; if (eop) { if (lem_82547_fifo_workaround(adapter, length)) { adapter->tx_fifo_wrk_cnt++; callout_reset(&adapter->tx_fifo_timer, 1, lem_82547_move_tail, adapter); break; } E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt); lem_82547_update_fifo_head(adapter, length); length = 0; } } } static int lem_82547_fifo_workaround(struct adapter *adapter, int len) { int fifo_space, fifo_pkt_len; fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); if (adapter->link_duplex == HALF_DUPLEX) { fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) { if (lem_82547_tx_fifo_reset(adapter)) return (0); else return (1); } } return (0); } static void lem_82547_update_fifo_head(struct adapter *adapter, int len) { int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); /* tx_fifo_head is always 16 byte aligned */ adapter->tx_fifo_head += fifo_pkt_len; if (adapter->tx_fifo_head >= adapter->tx_fifo_size) { adapter->tx_fifo_head -= adapter->tx_fifo_size; } } static int lem_82547_tx_fifo_reset(struct adapter *adapter) { u32 tctl; if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) == E1000_READ_REG(&adapter->hw, E1000_TDH(0))) && (E1000_READ_REG(&adapter->hw, E1000_TDFT) == E1000_READ_REG(&adapter->hw, E1000_TDFH)) && (E1000_READ_REG(&adapter->hw, E1000_TDFTS) == E1000_READ_REG(&adapter->hw, E1000_TDFHS)) && (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) { /* Disable TX unit */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl & ~E1000_TCTL_EN); /* Reset FIFO pointers */ E1000_WRITE_REG(&adapter->hw, E1000_TDFT, adapter->tx_head_addr); E1000_WRITE_REG(&adapter->hw, E1000_TDFH, adapter->tx_head_addr); E1000_WRITE_REG(&adapter->hw, E1000_TDFTS, adapter->tx_head_addr); E1000_WRITE_REG(&adapter->hw, E1000_TDFHS, adapter->tx_head_addr); /* Re-enable TX unit */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); E1000_WRITE_FLUSH(&adapter->hw); adapter->tx_fifo_head = 0; adapter->tx_fifo_reset_cnt++; return (TRUE); } else { return (FALSE); } } static void lem_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (lem_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void lem_disable_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void lem_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("lem_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void lem_local_timer(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK_ASSERT(adapter); lem_update_link_status(adapter); lem_update_stats_counters(adapter); lem_smartspeed(adapter); /* * We check the watchdog: the time since * the last TX descriptor was cleaned. * This implies a functional TX engine. */ if ((adapter->watchdog_check == TRUE) && (ticks - adapter->watchdog_time > EM_WATCHDOG)) goto hung; callout_reset(&adapter->timer, hz, lem_local_timer, adapter); return; hung: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; lem_init_locked(adapter); } static void lem_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; ifp->if_baudrate = adapter->link_speed * 1000000; if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable watchdog */ adapter->watchdog_check = FALSE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void lem_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; EM_CORE_LOCK_ASSERT(adapter); EM_TX_LOCK_ASSERT(adapter); INIT_DEBUGOUT("lem_stop: begin"); lem_disable_intr(adapter); callout_stop(&adapter->timer); callout_stop(&adapter->tx_fifo_timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void lem_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int lem_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int val, rid, error = E1000_SUCCESS; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { adapter->io_rid = rid; break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (error); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int lem_allocate_irq(struct adapter *adapter) { device_t dev = adapter->dev; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* We allocate a single interrupt resource */ adapter->res[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res[0] == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* Do Legacy setup? */ if (lem_use_legacy_irq) { if ((error = bus_setup_intr(dev, adapter->res[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, lem_intr, adapter, &adapter->tag[0])) != 0) { device_printf(dev, "Failed to register interrupt handler"); return (error); } return (0); } /* * Use a Fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->rxtx_task, 0, lem_handle_rxtx, adapter); TASK_INIT(&adapter->link_task, 0, lem_handle_link, adapter); adapter->tq = taskqueue_create_fast("lem_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res[0], INTR_TYPE_NET, lem_irq_fast, NULL, adapter, &adapter->tag[0])) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } static void lem_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; if (adapter->tag[0] != NULL) { bus_teardown_intr(dev, adapter->res[0], adapter->tag[0]); adapter->tag[0] = NULL; } if (adapter->res[0] != NULL) { bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res[0]); } if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->ioport != NULL) bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->ioport); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static int lem_hardware_init(struct adapter *adapter) { device_t dev = adapter->dev; u16 rx_buffer_size; INIT_DEBUGOUT("lem_hardware_init: begin"); /* Issue a global reset */ e1000_reset_hw(&adapter->hw); /* When hardware is reset, fifo_head is also reset */ adapter->tx_fifo_head = 0; /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) << 10 ); adapter->hw.fc.high_water = rx_buffer_size - roundup2(adapter->max_frame_size, 1024); adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500; adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME; adapter->hw.fc.send_xon = TRUE; /* Set Flow control, use the tunable location if sane */ if ((lem_fc_setting >= 0) && (lem_fc_setting < 4)) adapter->hw.fc.requested_mode = lem_fc_setting; else adapter->hw.fc.requested_mode = e1000_fc_none; if (e1000_init_hw(&adapter->hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return (EIO); } e1000_check_for_link(&adapter->hw); return (0); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int lem_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("lem_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = lem_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = lem_ioctl; ifp->if_start = lem_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; if (adapter->hw.mac.type >= e1000_82543) { ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } /* * Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; /* ** Dont turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { ifp->if_capabilities |= IFCAP_WOL; ifp->if_capenable |= IFCAP_WOL_MAGIC; } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, lem_media_change, lem_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /********************************************************************* * * Workaround for SmartSpeed on 82541 and 82547 controllers * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } /* * Manage DMA'able memory. */ static void lem_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int lem_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, lem_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (error); } static void lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_map = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. * **********************************************************************/ static int lem_allocate_transmit_structures(struct adapter *adapter) { device_t dev = adapter->dev; struct em_buffer *tx_buffer; int error; /* * Create DMA tags for tx descriptors */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * EM_MAX_SCATTER, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &adapter->txtag)) != 0) { device_printf(dev, "Unable to allocate TX DMA tag\n"); goto fail; } adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->tx_buffer_area == NULL) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ for (int i = 0; i < adapter->num_tx_desc; i++) { tx_buffer = &adapter->tx_buffer_area[i]; error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } tx_buffer->next_eop = -1; } return (0); fail: lem_free_transmit_structures(adapter); return (error); } /********************************************************************* * * (Re)Initialize transmit structures. * **********************************************************************/ static void lem_setup_transmit_structures(struct adapter *adapter) { struct em_buffer *tx_buffer; #ifdef DEV_NETMAP /* we are already locked */ struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0); #endif /* DEV_NETMAP */ /* Clear the old ring contents */ bzero(adapter->tx_desc_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Free any existing TX buffers */ for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { tx_buffer = &adapter->tx_buffer_area[i]; bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; #ifdef DEV_NETMAP if (slot) { /* the i-th NIC entry goes to slot si */ int si = netmap_idx_n2k(&na->tx_rings[0], i); uint64_t paddr; void *addr; addr = PNMB(slot + si, &paddr); adapter->tx_desc_base[i].buffer_addr = htole64(paddr); /* reload the map for netmap mode */ netmap_load_map(adapter->txtag, tx_buffer->map, addr); } #endif /* DEV_NETMAP */ tx_buffer->next_eop = -1; } /* Reset state */ adapter->last_hw_offload = 0; adapter->next_avail_tx_desc = 0; adapter->next_tx_to_clean = 0; adapter->num_tx_desc_avail = adapter->num_tx_desc; bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void lem_initialize_transmit_unit(struct adapter *adapter) { u32 tctl, tipg = 0; u64 bus_addr; INIT_DEBUGOUT("lem_initialize_transmit_unit: begin"); /* Setup the Base and Length of the Tx Descriptor Ring */ bus_addr = adapter->txdma.dma_paddr; E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), (u32)(bus_addr >> 32)); E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0), (u32)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0); E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(0))); /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); /* Setup Transmit Descriptor Base Settings */ adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; } /********************************************************************* * * Free all transmit related data structures. * **********************************************************************/ static void lem_free_transmit_structures(struct adapter *adapter) { struct em_buffer *tx_buffer; INIT_DEBUGOUT("free_transmit_structures: begin"); if (adapter->tx_buffer_area != NULL) { for (int i = 0; i < adapter->num_tx_desc; i++) { tx_buffer = &adapter->tx_buffer_area[i]; if (tx_buffer->m_head != NULL) { bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } else if (tx_buffer->map != NULL) bus_dmamap_unload(adapter->txtag, tx_buffer->map); if (tx_buffer->map != NULL) { bus_dmamap_destroy(adapter->txtag, tx_buffer->map); tx_buffer->map = NULL; } } } if (adapter->tx_buffer_area != NULL) { free(adapter->tx_buffer_area, M_DEVBUF); adapter->tx_buffer_area = NULL; } if (adapter->txtag != NULL) { bus_dma_tag_destroy(adapter->txtag); adapter->txtag = NULL; } #if __FreeBSD_version >= 800000 if (adapter->br != NULL) buf_ring_free(adapter->br, M_DEVBUF); #endif } /********************************************************************* * * The offload context needs to be set when we transfer the first * packet of a particular protocol (TCP/UDP). This routine has been * enhanced to deal with inserted VLAN headers, and IPV6 (not complete) * * Added back the old method of keeping the current context type * and not setting if unnecessary, as this is reported to be a * big performance win. -jfv **********************************************************************/ static void lem_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, u32 *txd_lower) { struct e1000_context_desc *TXD = NULL; struct em_buffer *tx_buffer; struct ether_vlan_header *eh; struct ip *ip = NULL; struct ip6_hdr *ip6; int curr_txd, ehdrlen; u32 cmd, hdr_len, ip_hlen; u16 etype; u8 ipproto; cmd = hdr_len = ipproto = 0; *txd_upper = *txd_lower = 0; curr_txd = adapter->next_avail_tx_desc; /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* * We only support TCP/UDP for IPv4 and IPv6 for the moment. * TODO: Support SCTP too when it hits the tree. */ switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; TXD->lower_setup.ip_fields.ipcss = ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(ehdrlen + ip_hlen); TXD->lower_setup.ip_fields.ipcso = ehdrlen + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; *txd_upper |= E1000_TXD_POPTS_IXSM << 8; } hdr_len = ehdrlen + ip_hlen; ipproto = ip->ip_p; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */ /* IPv6 doesn't have a header checksum. */ hdr_len = ehdrlen + ip_hlen; ipproto = ip6->ip6_nxt; break; default: return; } switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; /* no need for context if already set */ if (adapter->last_hw_offload == CSUM_TCP) return; adapter->last_hw_offload = CSUM_TCP; /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = hdr_len + offsetof(struct tcphdr, th_sum); cmd |= E1000_TXD_CMD_TCP; } break; case IPPROTO_UDP: { if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; /* no need for context if already set */ if (adapter->last_hw_offload == CSUM_UDP) return; adapter->last_hw_offload = CSUM_UDP; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = hdr_len + offsetof(struct udphdr, uh_sum); } /* Fall Thru */ } default: break; } if (TXD == NULL) return; TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &adapter->tx_buffer_area[curr_txd]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++curr_txd == adapter->num_tx_desc) curr_txd = 0; adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc = curr_txd; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void lem_txeof(struct adapter *adapter) { int first, last, done, num_avail; struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; struct ifnet *ifp = adapter->ifp; EM_TX_LOCK_ASSERT(adapter); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, 0)) return; #endif /* DEV_NETMAP */ if (adapter->num_tx_desc_avail == adapter->num_tx_desc) return; num_avail = adapter->num_tx_desc_avail; first = adapter->next_tx_to_clean; tx_desc = &adapter->tx_desc_base[first]; tx_buffer = &adapter->tx_buffer_area[first]; last = tx_buffer->next_eop; eop_desc = &adapter->tx_desc_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++num_avail; if (tx_buffer->m_head) { ifp->if_opackets++; bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; adapter->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &adapter->tx_buffer_area[first]; tx_desc = &adapter->tx_desc_base[first]; } /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &adapter->tx_desc_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); adapter->next_tx_to_clean = first; adapter->num_tx_desc_avail = num_avail; /* * If we have enough room, clear IFF_DRV_OACTIVE to * tell the stack that it is OK to send packets. * If there are no pending descriptors, clear the watchdog. */ if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (adapter->num_tx_desc_avail == adapter->num_tx_desc) { adapter->watchdog_check = FALSE; return; } } } /********************************************************************* * * When Link is lost sometimes there is work still in the TX ring * which may result in a watchdog, rather than allow that we do an * attempted cleanup and then reinit here. Note that this has been * seens mostly with fiber adapters. * **********************************************************************/ static void lem_tx_purge(struct adapter *adapter) { if ((!adapter->link_active) && (adapter->watchdog_check)) { EM_TX_LOCK(adapter); lem_txeof(adapter); EM_TX_UNLOCK(adapter); if (adapter->watchdog_check) /* Still outstanding? */ lem_init_locked(adapter); } } /********************************************************************* * * Get a buffer from system mbuf buffer pool. * **********************************************************************/ static int lem_get_buf(struct adapter *adapter, int i) { struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct em_buffer *rx_buffer; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { adapter->mbuf_cluster_failed++; return (ENOBUFS); } m->m_len = m->m_pkthdr.len = MCLBYTES; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(m, ETHER_ALIGN); /* * Using memory from the mbuf cluster pool, invoke the * bus_dma machinery to arrange the memory mapping. */ error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_free(m); return (error); } /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs == 1, ("Too many segments returned!")); rx_buffer = &adapter->rx_buffer_area[i]; if (rx_buffer->m_head != NULL) bus_dmamap_unload(adapter->rxtag, rx_buffer->map); map = rx_buffer->map; rx_buffer->map = adapter->rx_sparemap; adapter->rx_sparemap = map; bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); rx_buffer->m_head = m; adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int lem_allocate_receive_structures(struct adapter *adapter) { device_t dev = adapter->dev; struct em_buffer *rx_buffer; int i, error; adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->rx_buffer_area == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &adapter->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } /* Create the spare map (used by getbuf) */ error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, &adapter->rx_sparemap); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, &rx_buffer->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: lem_free_receive_structures(adapter); return (error); } /********************************************************************* * * (Re)initialize receive structures. * **********************************************************************/ static int lem_setup_receive_structures(struct adapter *adapter) { struct em_buffer *rx_buffer; int i, error; #ifdef DEV_NETMAP /* we are already under lock */ struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); #endif /* Reset descriptor ring */ bzero(adapter->rx_desc_base, (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc); /* Free current RX buffers. */ rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->m_head != NULL) { bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(adapter->rxtag, rx_buffer->map); m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } } /* Allocate new ones. */ for (i = 0; i < adapter->num_rx_desc; i++) { #ifdef DEV_NETMAP if (slot) { /* the i-th NIC entry goes to slot si */ int si = netmap_idx_n2k(&na->rx_rings[0], i); uint64_t paddr; void *addr; addr = PNMB(slot + si, &paddr); netmap_load_map(adapter->rxtag, rx_buffer->map, addr); /* Update descriptor */ adapter->rx_desc_base[i].buffer_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ error = lem_get_buf(adapter, i); if (error) return (error); } /* Setup our descriptor pointers */ adapter->next_rx_desc_to_check = 0; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void lem_initialize_receive_unit(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; u64 bus_addr; u32 rctl, rxcsum; INIT_DEBUGOUT("lem_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR); } /* Setup the Base and Length of the Rx Descriptor Ring */ bus_addr = adapter->rxdma.dma_paddr; E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), (u32)(bus_addr >> 32)); E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0), (u32)bus_addr); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (e1000_tbi_sbp_enabled_82543(&adapter->hw)) rctl |= E1000_RCTL_SBP; else rctl &= ~E1000_RCTL_SBP; switch (adapter->rx_buffer_len) { default: case 2048: rctl |= E1000_RCTL_SZ_2048; break; case 4096: rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE; break; case 8192: rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE; break; case 16384: rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE; break; } if (ifp->if_mtu > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Enable 82543 Receive Checksum Offload for TCP and UDP */ if ((adapter->hw.mac.type >= e1000_82543) && (ifp->if_capenable & IFCAP_RXCSUM)) { rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM); rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum); } /* Enable Receives */ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and * Tail Descriptor Pointers */ E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); rctl = adapter->num_rx_desc - 1; /* default RDT value */ #ifdef DEV_NETMAP /* preserve buffers already made available to clients */ if (ifp->if_capenable & IFCAP_NETMAP) rctl -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[0]); #endif /* DEV_NETMAP */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); return; } /********************************************************************* * * Free receive related data structures. * **********************************************************************/ static void lem_free_receive_structures(struct adapter *adapter) { struct em_buffer *rx_buffer; int i; INIT_DEBUGOUT("free_receive_structures: begin"); if (adapter->rx_sparemap) { bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap); adapter->rx_sparemap = NULL; } /* Cleanup any existing buffers */ if (adapter->rx_buffer_area != NULL) { rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->m_head != NULL) { bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(adapter->rxtag, rx_buffer->map); m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } else if (rx_buffer->map != NULL) bus_dmamap_unload(adapter->rxtag, rx_buffer->map); if (rx_buffer->map != NULL) { bus_dmamap_destroy(adapter->rxtag, rx_buffer->map); rx_buffer->map = NULL; } } } if (adapter->rx_buffer_area != NULL) { free(adapter->rx_buffer_area, M_DEVBUF); adapter->rx_buffer_area = NULL; } if (adapter->rxtag != NULL) { bus_dma_tag_destroy(adapter->rxtag); adapter->rxtag = NULL; } } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool lem_rxeof(struct adapter *adapter, int count, int *done) { struct ifnet *ifp = adapter->ifp; struct mbuf *mp; u8 status = 0, accept_frame = 0, eop = 0; u16 len, desc_len, prev_len_adj; int i, rx_sent = 0; struct e1000_rx_desc *current_desc; EM_RX_LOCK(adapter); i = adapter->next_rx_desc_to_check; current_desc = &adapter->rx_desc_base[i]; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_POSTREAD); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &rx_sent)) { EM_RX_UNLOCK(adapter); return (FALSE); } #endif /* DEV_NETMAP */ if (!((current_desc->status) & E1000_RXD_STAT_DD)) { if (done != NULL) *done = rx_sent; EM_RX_UNLOCK(adapter); return (FALSE); } while (count != 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) { struct mbuf *m = NULL; status = current_desc->status; if ((status & E1000_RXD_STAT_DD) == 0) break; mp = adapter->rx_buffer_area[i].m_head; /* * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT * needs to access the last received byte in the mbuf. */ bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, BUS_DMASYNC_POSTREAD); accept_frame = 1; prev_len_adj = 0; desc_len = le16toh(current_desc->length); if (status & E1000_RXD_STAT_EOP) { count--; eop = 1; if (desc_len < ETHER_CRC_LEN) { len = 0; prev_len_adj = ETHER_CRC_LEN - desc_len; } else len = desc_len - ETHER_CRC_LEN; } else { eop = 0; len = desc_len; } if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { u8 last_byte; u32 pkt_len = desc_len; if (adapter->fmp != NULL) pkt_len += adapter->fmp->m_pkthdr.len; last_byte = *(mtod(mp, caddr_t) + desc_len - 1); if (TBI_ACCEPT(&adapter->hw, status, current_desc->errors, pkt_len, last_byte, adapter->min_frame_size, adapter->max_frame_size)) { e1000_tbi_adjust_stats_82543(&adapter->hw, &adapter->stats, pkt_len, adapter->hw.mac.addr, adapter->max_frame_size); if (len > 0) len--; } else accept_frame = 0; } if (accept_frame) { if (lem_get_buf(adapter, i) != 0) { ifp->if_iqdrops++; goto discard; } /* Assign correct length to the current fragment */ mp->m_len = len; if (adapter->fmp == NULL) { mp->m_pkthdr.len = len; adapter->fmp = mp; /* Store the first mbuf */ adapter->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; /* * Adjust length of previous mbuf in chain if * we received less than 4 bytes in the last * descriptor. */ if (prev_len_adj > 0) { adapter->lmp->m_len -= prev_len_adj; adapter->fmp->m_pkthdr.len -= prev_len_adj; } adapter->lmp->m_next = mp; adapter->lmp = adapter->lmp->m_next; adapter->fmp->m_pkthdr.len += len; } if (eop) { adapter->fmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; lem_receive_checksum(adapter, current_desc, adapter->fmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->max_frame_size > (MCLBYTES - ETHER_ALIGN) && lem_fixup_rx(adapter) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { adapter->fmp->m_pkthdr.ether_vtag = le16toh(current_desc->special); adapter->fmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif m = adapter->fmp; adapter->fmp = NULL; adapter->lmp = NULL; } } else { adapter->dropped_pkts++; discard: /* Reuse loaded DMA map and just update mbuf chain */ mp = adapter->rx_buffer_area[i].m_head; mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); if (adapter->fmp != NULL) { m_freem(adapter->fmp); adapter->fmp = NULL; adapter->lmp = NULL; } m = NULL; } /* Zero out the receive descriptors status. */ current_desc->status = 0; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Call into the stack */ if (m != NULL) { adapter->next_rx_desc_to_check = i; EM_RX_UNLOCK(adapter); (*ifp->if_input)(ifp, m); EM_RX_LOCK(adapter); rx_sent++; i = adapter->next_rx_desc_to_check; } current_desc = &adapter->rx_desc_base[i]; } adapter->next_rx_desc_to_check = i; /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ if (--i < 0) i = adapter->num_rx_desc - 1; E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); if (done != NULL) *done = rx_sent; EM_RX_UNLOCK(adapter); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achived only when jumbo frame is * not used at all on architectures with strict alignment. */ static int lem_fixup_rx(struct adapter *adapter) { struct mbuf *m, *n; int error; error = 0; m = adapter->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; adapter->fmp = n; } else { adapter->dropped_pkts++; m_freem(adapter->fmp); adapter->fmp = NULL; error = ENOMEM; } } return (error); } #endif /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void lem_receive_checksum(struct adapter *adapter, struct e1000_rx_desc *rx_desc, struct mbuf *mp) { /* 82543 or newer only */ if ((adapter->hw.mac.type < e1000_82543) || /* Ignore Checksum bit is set */ (rx_desc->status & E1000_RXD_STAT_IXSM)) { mp->m_pkthdr.csum_flags = 0; return; } if (rx_desc->status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else { mp->m_pkthdr.csum_flags = 0; } } if (rx_desc->status & E1000_RXD_STAT_TCPCS) { /* Did it pass? */ if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } } /* * This routine is run via an vlan * config EVENT */ static void lem_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void lem_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void lem_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void lem_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void lem_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void lem_init_manageability(struct adapter *adapter) { /* A shared code workaround */ if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void lem_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * lem_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void lem_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * lem_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void lem_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (!adapter->has_manage) return; ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int lem_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void lem_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void lem_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if (adapter->hw.mac.type == e1000_pchlan) { if (lem_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (ifp->if_capenable & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int lem_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) { mreg = E1000_READ_REG(hw, E1000_RAL(i)); e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_RAR_M(i), (u16)((mreg >> 16) & 0xFFFF)); mreg = E1000_READ_REG(hw, E1000_RAH(i)); e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_RAR_CTRL(i), (u16)((mreg >> 16) & 0xFFFF)); } /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void lem_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); } /********************************************************************* * 82544 Coexistence issue workaround. * There are 2 issues. * 1. Transmit Hang issue. * To detect this issue, following equation can be used... * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. * If SUM[3:0] is in between 1 to 4, we will have this issue. * * 2. DAC issue. * To detect this issue, following equation can be used... * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. * If SUM[3:0] is in between 9 to c, we will have this issue. * * * WORKAROUND: * Make sure we do not have ending address * as 1,2,3,4(Hang) or 9,a,b,c (DAC) * *************************************************************************/ static u32 lem_fill_descriptors (bus_addr_t address, u32 length, PDESC_ARRAY desc_array) { u32 safe_terminator; /* Since issue is sensitive to length and address.*/ /* Let us first check the address...*/ if (length <= 4) { desc_array->descriptor[0].address = address; desc_array->descriptor[0].length = length; desc_array->elements = 1; return (desc_array->elements); } safe_terminator = (u32)((((u32)address & 0x7) + (length & 0xF)) & 0xF); /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */ if (safe_terminator == 0 || (safe_terminator > 4 && safe_terminator < 9) || (safe_terminator > 0xC && safe_terminator <= 0xF)) { desc_array->descriptor[0].address = address; desc_array->descriptor[0].length = length; desc_array->elements = 1; return (desc_array->elements); } desc_array->descriptor[0].address = address; desc_array->descriptor[0].length = length - 4; desc_array->descriptor[1].address = address + (length - 4); desc_array->descriptor[1].length = 4; desc_array->elements = 2; return (desc_array->elements); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void lem_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } ifp = adapter->ifp; ifp->if_collisions = adapter->stats.colc; /* Rx Errors */ ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr; /* Tx Errors */ ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events; } /* Export a single 32-bit register via a read-only sysctl. */ static int lem_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void lem_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node; struct sysctl_oid_list *stat_list; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", CTLFLAG_RD, &adapter->mbuf_alloc_failed, "Std mbuf failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", CTLFLAG_RD, &adapter->mbuf_cluster_failed, "Std mbuf cluster failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail1", CTLFLAG_RD, &adapter->no_tx_desc_avail1, "Not enough tx descriptors failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail2", CTLFLAG_RD, &adapter->no_tx_desc_avail2, "Not enough tx descriptors failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, lem_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, lem_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_workaround", CTLFLAG_RD, &adapter->tx_fifo_wrk_cnt, "TX FIFO workaround events"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_reset", CTLFLAG_RD, &adapter->tx_fifo_reset_cnt, "TX FIFO resets"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(0), lem_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(0), lem_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(0), lem_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(0), lem_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; lem_print_nvm_info(adapter); } return (error); } static void lem_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error; int usecs; int ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void lem_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, lem_sysctl_int_delay, "I", description); } static void lem_set_flow_cntrl(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); + OID_AUTO, name, CTLFLAG_RW, limit, value, description); } static void lem_add_rx_process_limit(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); + OID_AUTO, name, CTLFLAG_RW, limit, value, description); } Index: stable/9/sys/dev/e1000 =================================================================== --- stable/9/sys/dev/e1000 (revision 273911) +++ stable/9/sys/dev/e1000 (revision 273912) Property changes on: stable/9/sys/dev/e1000 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev/e1000:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/hatm/if_hatm.c =================================================================== --- stable/9/sys/dev/hatm/if_hatm.c (revision 273911) +++ stable/9/sys/dev/hatm/if_hatm.c (revision 273912) @@ -1,2412 +1,2420 @@ /*- * Copyright (c) 2001-2003 * Fraunhofer Institute for Open Communication Systems (FhG Fokus). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Author: Hartmut Brandt * * ForeHE driver. * * This file contains the module and driver infrastructure stuff as well * as a couple of utility functions and the entire initialisation. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_natm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ENABLE_BPF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include static const struct { uint16_t vid; uint16_t did; const char *name; } hatm_devs[] = { { 0x1127, 0x400, "FORE HE" }, { 0, 0, NULL } }; SYSCTL_DECL(_hw_atm); MODULE_DEPEND(hatm, utopia, 1, 1, 1); MODULE_DEPEND(hatm, pci, 1, 1, 1); MODULE_DEPEND(hatm, atm, 1, 1, 1); #define EEPROM_DELAY 400 /* microseconds */ /* Read from EEPROM 0000 0011b */ static const uint32_t readtab[] = { HE_REGM_HOST_PROM_SEL | HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, /* 0 */ HE_REGM_HOST_PROM_CLOCK, 0, /* 0 */ HE_REGM_HOST_PROM_CLOCK, 0, /* 0 */ HE_REGM_HOST_PROM_CLOCK, 0, /* 0 */ HE_REGM_HOST_PROM_CLOCK, 0, /* 0 */ HE_REGM_HOST_PROM_CLOCK, HE_REGM_HOST_PROM_DATA_IN, /* 0 */ HE_REGM_HOST_PROM_CLOCK | HE_REGM_HOST_PROM_DATA_IN, HE_REGM_HOST_PROM_DATA_IN, /* 1 */ HE_REGM_HOST_PROM_CLOCK | HE_REGM_HOST_PROM_DATA_IN, HE_REGM_HOST_PROM_DATA_IN, /* 1 */ }; static const uint32_t clocktab[] = { 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0, HE_REGM_HOST_PROM_CLOCK, 0 }; /* * Convert cell rate to ATM Forum format */ u_int hatm_cps2atmf(uint32_t pcr) { u_int e; if (pcr == 0) return (0); pcr <<= 9; e = 0; while (pcr > (1024 - 1)) { e++; pcr >>= 1; } return ((1 << 14) | (e << 9) | (pcr & 0x1ff)); } u_int hatm_atmf2cps(uint32_t fcr) { fcr &= 0x7fff; return ((1 << ((fcr >> 9) & 0x1f)) * (512 + (fcr & 0x1ff)) / 512 * (fcr >> 14)); } /************************************************************ * * Initialisation */ /* * Probe for a HE controller */ static int hatm_probe(device_t dev) { int i; for (i = 0; hatm_devs[i].name; i++) if (pci_get_vendor(dev) == hatm_devs[i].vid && pci_get_device(dev) == hatm_devs[i].did) { device_set_desc(dev, hatm_devs[i].name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } /* * Allocate and map DMA-able memory. We support only contiguous mappings. */ static void dmaload_helper(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error) return; KASSERT(nsegs == 1, ("too many segments for DMA: %d", nsegs)); KASSERT(segs[0].ds_addr <= 0xffffffffUL, ("phys addr too large %lx", (u_long)segs[0].ds_addr)); *(bus_addr_t *)arg = segs[0].ds_addr; } static int hatm_alloc_dmamem(struct hatm_softc *sc, const char *what, struct dmamem *mem) { int error; mem->base = NULL; /* * Alignement does not work in the bus_dmamem_alloc function below * on FreeBSD. malloc seems to align objects at least to the object * size so increase the size to the alignment if the size is lesser * than the alignemnt. * XXX on sparc64 this is (probably) not needed. */ if (mem->size < mem->align) mem->size = mem->align; error = bus_dma_tag_create(sc->parent_tag, mem->align, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, mem->size, 1, BUS_SPACE_MAXSIZE_32BIT, BUS_DMA_ALLOCNOW, NULL, NULL, &mem->tag); if (error) { if_printf(sc->ifp, "DMA tag create (%s)\n", what); return (error); } error = bus_dmamem_alloc(mem->tag, &mem->base, 0, &mem->map); if (error) { if_printf(sc->ifp, "DMA mem alloc (%s): %d\n", what, error); bus_dma_tag_destroy(mem->tag); mem->base = NULL; return (error); } error = bus_dmamap_load(mem->tag, mem->map, mem->base, mem->size, dmaload_helper, &mem->paddr, BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "DMA map load (%s): %d\n", what, error); bus_dmamem_free(mem->tag, mem->base, mem->map); bus_dma_tag_destroy(mem->tag); mem->base = NULL; return (error); } DBG(sc, DMA, ("%s S/A/V/P 0x%x 0x%x %p 0x%lx", what, mem->size, mem->align, mem->base, (u_long)mem->paddr)); return (0); } /* * Destroy all the resources of an DMA-able memory region. */ static void hatm_destroy_dmamem(struct dmamem *mem) { if (mem->base != NULL) { bus_dmamap_unload(mem->tag, mem->map); bus_dmamem_free(mem->tag, mem->base, mem->map); (void)bus_dma_tag_destroy(mem->tag); mem->base = NULL; } } /* * Initialize/destroy DMA maps for the large pool 0 */ static void hatm_destroy_rmaps(struct hatm_softc *sc) { u_int b; DBG(sc, ATTACH, ("destroying rmaps and lbuf pointers...")); if (sc->rmaps != NULL) { for (b = 0; b < sc->lbufs_size; b++) bus_dmamap_destroy(sc->mbuf_tag, sc->rmaps[b]); free(sc->rmaps, M_DEVBUF); } if (sc->lbufs != NULL) free(sc->lbufs, M_DEVBUF); } static void hatm_init_rmaps(struct hatm_softc *sc) { u_int b; int err; DBG(sc, ATTACH, ("allocating rmaps and lbuf pointers...")); sc->lbufs = malloc(sizeof(sc->lbufs[0]) * sc->lbufs_size, M_DEVBUF, M_ZERO | M_WAITOK); /* allocate and create the DMA maps for the large pool */ sc->rmaps = malloc(sizeof(sc->rmaps[0]) * sc->lbufs_size, M_DEVBUF, M_WAITOK); for (b = 0; b < sc->lbufs_size; b++) { err = bus_dmamap_create(sc->mbuf_tag, 0, &sc->rmaps[b]); if (err != 0) panic("bus_dmamap_create: %d\n", err); } } /* * Initialize and destroy small mbuf page pointers and pages */ static void hatm_destroy_smbufs(struct hatm_softc *sc) { u_int i, b; struct mbuf_page *pg; struct mbuf_chunk_hdr *h; if (sc->mbuf_pages != NULL) { for (i = 0; i < sc->mbuf_npages; i++) { pg = sc->mbuf_pages[i]; for (b = 0; b < pg->hdr.nchunks; b++) { h = (struct mbuf_chunk_hdr *) ((char *)pg + b * pg->hdr.chunksize + pg->hdr.hdroff); if (h->flags & MBUF_CARD) if_printf(sc->ifp, "%s -- mbuf page=%u card buf %u\n", __func__, i, b); if (h->flags & MBUF_USED) if_printf(sc->ifp, "%s -- mbuf page=%u used buf %u\n", __func__, i, b); } bus_dmamap_unload(sc->mbuf_tag, pg->hdr.map); bus_dmamap_destroy(sc->mbuf_tag, pg->hdr.map); free(pg, M_DEVBUF); } free(sc->mbuf_pages, M_DEVBUF); } } static void hatm_init_smbufs(struct hatm_softc *sc) { sc->mbuf_pages = malloc(sizeof(sc->mbuf_pages[0]) * sc->mbuf_max_pages, M_DEVBUF, M_WAITOK); sc->mbuf_npages = 0; } /* * Initialize/destroy TPDs. This is called from attach/detach. */ static void hatm_destroy_tpds(struct hatm_softc *sc) { struct tpd *t; if (sc->tpds.base == NULL) return; DBG(sc, ATTACH, ("releasing TPDs ...")); if (sc->tpd_nfree != sc->tpd_total) if_printf(sc->ifp, "%u tpds still in use from %u\n", sc->tpd_total - sc->tpd_nfree, sc->tpd_total); while ((t = SLIST_FIRST(&sc->tpd_free)) != NULL) { SLIST_REMOVE_HEAD(&sc->tpd_free, link); bus_dmamap_destroy(sc->tx_tag, t->map); } hatm_destroy_dmamem(&sc->tpds); free(sc->tpd_used, M_DEVBUF); DBG(sc, ATTACH, ("... done")); } static int hatm_init_tpds(struct hatm_softc *sc) { int error; u_int i; struct tpd *t; DBG(sc, ATTACH, ("allocating %u TPDs and maps ...", sc->tpd_total)); error = hatm_alloc_dmamem(sc, "TPD memory", &sc->tpds); if (error != 0) { DBG(sc, ATTACH, ("... dmamem error=%d", error)); return (error); } /* put all the TPDs on the free list and allocate DMA maps */ for (i = 0; i < sc->tpd_total; i++) { t = TPD_ADDR(sc, i); t->no = i; t->mbuf = NULL; error = bus_dmamap_create(sc->tx_tag, 0, &t->map); if (error != 0) { DBG(sc, ATTACH, ("... dmamap error=%d", error)); while ((t = SLIST_FIRST(&sc->tpd_free)) != NULL) { SLIST_REMOVE_HEAD(&sc->tpd_free, link); bus_dmamap_destroy(sc->tx_tag, t->map); } hatm_destroy_dmamem(&sc->tpds); return (error); } SLIST_INSERT_HEAD(&sc->tpd_free, t, link); } /* allocate and zero bitmap */ sc->tpd_used = malloc(sizeof(uint8_t) * (sc->tpd_total + 7) / 8, M_DEVBUF, M_ZERO | M_WAITOK); sc->tpd_nfree = sc->tpd_total; DBG(sc, ATTACH, ("... done")); return (0); } /* * Free all the TPDs that where given to the card. * An mbuf chain may be attached to a TPD - free it also and * unload its associated DMA map. */ static void hatm_stop_tpds(struct hatm_softc *sc) { u_int i; struct tpd *t; DBG(sc, ATTACH, ("free TPDs ...")); for (i = 0; i < sc->tpd_total; i++) { if (TPD_TST_USED(sc, i)) { t = TPD_ADDR(sc, i); if (t->mbuf) { m_freem(t->mbuf); t->mbuf = NULL; bus_dmamap_unload(sc->tx_tag, t->map); } TPD_CLR_USED(sc, i); SLIST_INSERT_HEAD(&sc->tpd_free, t, link); sc->tpd_nfree++; } } } /* * This frees ALL resources of this interface and leaves the structure * in an indeterminate state. This is called just before detaching or * on a failed attach. No lock should be held. */ static void hatm_destroy(struct hatm_softc *sc) { u_int cid; bus_teardown_intr(sc->dev, sc->irqres, sc->ih); hatm_destroy_rmaps(sc); hatm_destroy_smbufs(sc); hatm_destroy_tpds(sc); if (sc->vcc_zone != NULL) { for (cid = 0; cid < HE_MAX_VCCS; cid++) if (sc->vccs[cid] != NULL) uma_zfree(sc->vcc_zone, sc->vccs[cid]); uma_zdestroy(sc->vcc_zone); } /* * Release all memory allocated to the various queues and * Status pages. These have there own flag which shows whether * they are really allocated. */ hatm_destroy_dmamem(&sc->irq_0.mem); hatm_destroy_dmamem(&sc->rbp_s0.mem); hatm_destroy_dmamem(&sc->rbp_l0.mem); hatm_destroy_dmamem(&sc->rbp_s1.mem); hatm_destroy_dmamem(&sc->rbrq_0.mem); hatm_destroy_dmamem(&sc->rbrq_1.mem); hatm_destroy_dmamem(&sc->tbrq.mem); hatm_destroy_dmamem(&sc->tpdrq.mem); hatm_destroy_dmamem(&sc->hsp_mem); if (sc->irqres != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irqid, sc->irqres); if (sc->tx_tag != NULL) if (bus_dma_tag_destroy(sc->tx_tag)) if_printf(sc->ifp, "mbuf DMA tag busy\n"); if (sc->mbuf_tag != NULL) if (bus_dma_tag_destroy(sc->mbuf_tag)) if_printf(sc->ifp, "mbuf DMA tag busy\n"); if (sc->parent_tag != NULL) if (bus_dma_tag_destroy(sc->parent_tag)) if_printf(sc->ifp, "parent DMA tag busy\n"); if (sc->memres != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->memid, sc->memres); sysctl_ctx_free(&sc->sysctl_ctx); cv_destroy(&sc->cv_rcclose); cv_destroy(&sc->vcc_cv); mtx_destroy(&sc->mtx); if (sc->ifp != NULL) if_free(sc->ifp); } /* * 4.4 Card reset */ static int hatm_reset(struct hatm_softc *sc) { u_int v, count; WRITE4(sc, HE_REGO_RESET_CNTL, 0x00); BARRIER_W(sc); WRITE4(sc, HE_REGO_RESET_CNTL, 0xff); BARRIER_RW(sc); count = 0; while (((v = READ4(sc, HE_REGO_RESET_CNTL)) & HE_REGM_RESET_STATE) == 0) { BARRIER_R(sc); if (++count == 100) { if_printf(sc->ifp, "reset failed\n"); return (ENXIO); } DELAY(1000); } return (0); } /* * 4.5 Set Bus Width */ static void hatm_init_bus_width(struct hatm_softc *sc) { uint32_t v, v1; v = READ4(sc, HE_REGO_HOST_CNTL); BARRIER_R(sc); if (v & HE_REGM_HOST_BUS64) { sc->pci64 = 1; v1 = pci_read_config(sc->dev, HE_PCIR_GEN_CNTL_0, 4); v1 |= HE_PCIM_CTL0_64BIT; pci_write_config(sc->dev, HE_PCIR_GEN_CNTL_0, v1, 4); v |= HE_REGM_HOST_DESC_RD64 | HE_REGM_HOST_DATA_RD64 | HE_REGM_HOST_DATA_WR64; WRITE4(sc, HE_REGO_HOST_CNTL, v); BARRIER_W(sc); } else { sc->pci64 = 0; v = pci_read_config(sc->dev, HE_PCIR_GEN_CNTL_0, 4); v &= ~HE_PCIM_CTL0_64BIT; pci_write_config(sc->dev, HE_PCIR_GEN_CNTL_0, v, 4); } } /* * 4.6 Set Host Endianess */ static void hatm_init_endianess(struct hatm_softc *sc) { uint32_t v; v = READ4(sc, HE_REGO_LB_SWAP); BARRIER_R(sc); #if BYTE_ORDER == BIG_ENDIAN v |= HE_REGM_LBSWAP_INTR_SWAP | HE_REGM_LBSWAP_DESC_WR_SWAP | HE_REGM_LBSWAP_BIG_ENDIAN; v &= ~(HE_REGM_LBSWAP_DATA_WR_SWAP | HE_REGM_LBSWAP_DESC_RD_SWAP | HE_REGM_LBSWAP_DATA_RD_SWAP); #else v &= ~(HE_REGM_LBSWAP_DATA_WR_SWAP | HE_REGM_LBSWAP_DESC_RD_SWAP | HE_REGM_LBSWAP_DATA_RD_SWAP | HE_REGM_LBSWAP_INTR_SWAP | HE_REGM_LBSWAP_DESC_WR_SWAP | HE_REGM_LBSWAP_BIG_ENDIAN); #endif if (sc->he622) v |= HE_REGM_LBSWAP_XFER_SIZE; WRITE4(sc, HE_REGO_LB_SWAP, v); BARRIER_W(sc); } /* * 4.7 Read EEPROM */ static uint8_t hatm_read_prom_byte(struct hatm_softc *sc, u_int addr) { uint32_t val, tmp_read, byte_read; u_int i, j; int n; val = READ4(sc, HE_REGO_HOST_CNTL); val &= HE_REGM_HOST_PROM_BITS; BARRIER_R(sc); val |= HE_REGM_HOST_PROM_WREN; WRITE4(sc, HE_REGO_HOST_CNTL, val); BARRIER_W(sc); /* send READ */ for (i = 0; i < sizeof(readtab) / sizeof(readtab[0]); i++) { WRITE4(sc, HE_REGO_HOST_CNTL, val | readtab[i]); BARRIER_W(sc); DELAY(EEPROM_DELAY); } /* send ADDRESS */ for (n = 7, j = 0; n >= 0; n--) { WRITE4(sc, HE_REGO_HOST_CNTL, val | clocktab[j++] | (((addr >> n) & 1 ) << HE_REGS_HOST_PROM_DATA_IN)); BARRIER_W(sc); DELAY(EEPROM_DELAY); WRITE4(sc, HE_REGO_HOST_CNTL, val | clocktab[j++] | (((addr >> n) & 1 ) << HE_REGS_HOST_PROM_DATA_IN)); BARRIER_W(sc); DELAY(EEPROM_DELAY); } val &= ~HE_REGM_HOST_PROM_WREN; WRITE4(sc, HE_REGO_HOST_CNTL, val); BARRIER_W(sc); /* read DATA */ byte_read = 0; for (n = 7, j = 0; n >= 0; n--) { WRITE4(sc, HE_REGO_HOST_CNTL, val | clocktab[j++]); BARRIER_W(sc); DELAY(EEPROM_DELAY); tmp_read = READ4(sc, HE_REGO_HOST_CNTL); byte_read |= (uint8_t)(((tmp_read & HE_REGM_HOST_PROM_DATA_OUT) >> HE_REGS_HOST_PROM_DATA_OUT) << n); WRITE4(sc, HE_REGO_HOST_CNTL, val | clocktab[j++]); BARRIER_W(sc); DELAY(EEPROM_DELAY); } WRITE4(sc, HE_REGO_HOST_CNTL, val | clocktab[j++]); BARRIER_W(sc); DELAY(EEPROM_DELAY); return (byte_read); } static void hatm_init_read_eeprom(struct hatm_softc *sc) { u_int n, count; u_char byte; uint32_t v; for (n = count = 0; count < HE_EEPROM_PROD_ID_LEN; count++) { byte = hatm_read_prom_byte(sc, HE_EEPROM_PROD_ID + count); if (n > 0 || byte != ' ') sc->prod_id[n++] = byte; } while (n > 0 && sc->prod_id[n-1] == ' ') n--; sc->prod_id[n] = '\0'; for (n = count = 0; count < HE_EEPROM_REV_LEN; count++) { byte = hatm_read_prom_byte(sc, HE_EEPROM_REV + count); if (n > 0 || byte != ' ') sc->rev[n++] = byte; } while (n > 0 && sc->rev[n-1] == ' ') n--; sc->rev[n] = '\0'; IFP2IFATM(sc->ifp)->mib.hw_version = sc->rev[0]; IFP2IFATM(sc->ifp)->mib.serial = hatm_read_prom_byte(sc, HE_EEPROM_M_SN + 0) << 0; IFP2IFATM(sc->ifp)->mib.serial |= hatm_read_prom_byte(sc, HE_EEPROM_M_SN + 1) << 8; IFP2IFATM(sc->ifp)->mib.serial |= hatm_read_prom_byte(sc, HE_EEPROM_M_SN + 2) << 16; IFP2IFATM(sc->ifp)->mib.serial |= hatm_read_prom_byte(sc, HE_EEPROM_M_SN + 3) << 24; v = hatm_read_prom_byte(sc, HE_EEPROM_MEDIA + 0) << 0; v |= hatm_read_prom_byte(sc, HE_EEPROM_MEDIA + 1) << 8; v |= hatm_read_prom_byte(sc, HE_EEPROM_MEDIA + 2) << 16; v |= hatm_read_prom_byte(sc, HE_EEPROM_MEDIA + 3) << 24; switch (v) { case HE_MEDIA_UTP155: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_UTP_155; IFP2IFATM(sc->ifp)->mib.pcr = ATM_RATE_155M; break; case HE_MEDIA_MMF155: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_MM_155; IFP2IFATM(sc->ifp)->mib.pcr = ATM_RATE_155M; break; case HE_MEDIA_MMF622: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_MM_622; IFP2IFATM(sc->ifp)->mib.device = ATM_DEVICE_HE622; IFP2IFATM(sc->ifp)->mib.pcr = ATM_RATE_622M; sc->he622 = 1; break; case HE_MEDIA_SMF155: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_SM_155; IFP2IFATM(sc->ifp)->mib.pcr = ATM_RATE_155M; break; case HE_MEDIA_SMF622: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_SM_622; IFP2IFATM(sc->ifp)->mib.device = ATM_DEVICE_HE622; IFP2IFATM(sc->ifp)->mib.pcr = ATM_RATE_622M; sc->he622 = 1; break; } IFP2IFATM(sc->ifp)->mib.esi[0] = hatm_read_prom_byte(sc, HE_EEPROM_MAC + 0); IFP2IFATM(sc->ifp)->mib.esi[1] = hatm_read_prom_byte(sc, HE_EEPROM_MAC + 1); IFP2IFATM(sc->ifp)->mib.esi[2] = hatm_read_prom_byte(sc, HE_EEPROM_MAC + 2); IFP2IFATM(sc->ifp)->mib.esi[3] = hatm_read_prom_byte(sc, HE_EEPROM_MAC + 3); IFP2IFATM(sc->ifp)->mib.esi[4] = hatm_read_prom_byte(sc, HE_EEPROM_MAC + 4); IFP2IFATM(sc->ifp)->mib.esi[5] = hatm_read_prom_byte(sc, HE_EEPROM_MAC + 5); } /* * Clear unused interrupt queue */ static void hatm_clear_irq(struct hatm_softc *sc, u_int group) { WRITE4(sc, HE_REGO_IRQ_BASE(group), 0); WRITE4(sc, HE_REGO_IRQ_HEAD(group), 0); WRITE4(sc, HE_REGO_IRQ_CNTL(group), 0); WRITE4(sc, HE_REGO_IRQ_DATA(group), 0); } /* * 4.10 Initialize interrupt queues */ static void hatm_init_irq(struct hatm_softc *sc, struct heirq *q, u_int group) { u_int i; if (q->size == 0) { hatm_clear_irq(sc, group); return; } q->group = group; q->sc = sc; q->irq = q->mem.base; q->head = 0; q->tailp = q->irq + (q->size - 1); *q->tailp = 0; for (i = 0; i < q->size; i++) q->irq[i] = HE_REGM_ITYPE_INVALID; WRITE4(sc, HE_REGO_IRQ_BASE(group), q->mem.paddr); WRITE4(sc, HE_REGO_IRQ_HEAD(group), ((q->size - 1) << HE_REGS_IRQ_HEAD_SIZE) | (q->thresh << HE_REGS_IRQ_HEAD_THRESH)); WRITE4(sc, HE_REGO_IRQ_CNTL(group), q->line); WRITE4(sc, HE_REGO_IRQ_DATA(group), 0); } /* * 5.1.3 Initialize connection memory */ static void hatm_init_cm(struct hatm_softc *sc) { u_int rsra, mlbm, rabr, numbuffs; u_int tsra, tabr, mtpd; u_int n; for (n = 0; n < HE_CONFIG_TXMEM; n++) WRITE_TCM4(sc, n, 0); for (n = 0; n < HE_CONFIG_RXMEM; n++) WRITE_RCM4(sc, n, 0); numbuffs = sc->r0_numbuffs + sc->r1_numbuffs + sc->tx_numbuffs; rsra = 0; mlbm = ((rsra + IFP2IFATM(sc->ifp)->mib.max_vccs * 8) + 0x7ff) & ~0x7ff; rabr = ((mlbm + numbuffs * 2) + 0x7ff) & ~0x7ff; sc->rsrb = ((rabr + 2048) + (2 * IFP2IFATM(sc->ifp)->mib.max_vccs - 1)) & ~(2 * IFP2IFATM(sc->ifp)->mib.max_vccs - 1); tsra = 0; sc->tsrb = tsra + IFP2IFATM(sc->ifp)->mib.max_vccs * 8; sc->tsrc = sc->tsrb + IFP2IFATM(sc->ifp)->mib.max_vccs * 4; sc->tsrd = sc->tsrc + IFP2IFATM(sc->ifp)->mib.max_vccs * 2; tabr = sc->tsrd + IFP2IFATM(sc->ifp)->mib.max_vccs * 1; mtpd = ((tabr + 1024) + (16 * IFP2IFATM(sc->ifp)->mib.max_vccs - 1)) & ~(16 * IFP2IFATM(sc->ifp)->mib.max_vccs - 1); DBG(sc, ATTACH, ("rsra=%x mlbm=%x rabr=%x rsrb=%x", rsra, mlbm, rabr, sc->rsrb)); DBG(sc, ATTACH, ("tsra=%x tsrb=%x tsrc=%x tsrd=%x tabr=%x mtpd=%x", tsra, sc->tsrb, sc->tsrc, sc->tsrd, tabr, mtpd)); WRITE4(sc, HE_REGO_TSRB_BA, sc->tsrb); WRITE4(sc, HE_REGO_TSRC_BA, sc->tsrc); WRITE4(sc, HE_REGO_TSRD_BA, sc->tsrd); WRITE4(sc, HE_REGO_TMABR_BA, tabr); WRITE4(sc, HE_REGO_TPD_BA, mtpd); WRITE4(sc, HE_REGO_RCMRSRB_BA, sc->rsrb); WRITE4(sc, HE_REGO_RCMLBM_BA, mlbm); WRITE4(sc, HE_REGO_RCMABR_BA, rabr); BARRIER_W(sc); } /* * 5.1.4 Initialize Local buffer Pools */ static void hatm_init_rx_buffer_pool(struct hatm_softc *sc, u_int num, /* bank */ u_int start, /* start row */ u_int numbuffs /* number of entries */ ) { u_int row_size; /* bytes per row */ uint32_t row_addr; /* start address of this row */ u_int lbuf_size; /* bytes per lbuf */ u_int lbufs_per_row; /* number of lbufs per memory row */ uint32_t lbufd_index; /* index of lbuf descriptor */ uint32_t lbufd_addr; /* address of lbuf descriptor */ u_int lbuf_row_cnt; /* current lbuf in current row */ uint32_t lbuf_addr; /* address of current buffer */ u_int i; row_size = sc->bytes_per_row; row_addr = start * row_size; lbuf_size = sc->cells_per_lbuf * 48; lbufs_per_row = sc->cells_per_row / sc->cells_per_lbuf; /* descriptor index */ lbufd_index = num; /* 2 words per entry */ lbufd_addr = READ4(sc, HE_REGO_RCMLBM_BA) + lbufd_index * 2; /* write head of queue */ WRITE4(sc, HE_REGO_RLBF_H(num), lbufd_index); lbuf_row_cnt = 0; for (i = 0; i < numbuffs; i++) { lbuf_addr = (row_addr + lbuf_row_cnt * lbuf_size) / 32; WRITE_RCM4(sc, lbufd_addr, lbuf_addr); lbufd_index += 2; WRITE_RCM4(sc, lbufd_addr + 1, lbufd_index); if (++lbuf_row_cnt == lbufs_per_row) { lbuf_row_cnt = 0; row_addr += row_size; } lbufd_addr += 2 * 2; } WRITE4(sc, HE_REGO_RLBF_T(num), lbufd_index - 2); WRITE4(sc, HE_REGO_RLBF_C(num), numbuffs); BARRIER_W(sc); } static void hatm_init_tx_buffer_pool(struct hatm_softc *sc, u_int start, /* start row */ u_int numbuffs /* number of entries */ ) { u_int row_size; /* bytes per row */ uint32_t row_addr; /* start address of this row */ u_int lbuf_size; /* bytes per lbuf */ u_int lbufs_per_row; /* number of lbufs per memory row */ uint32_t lbufd_index; /* index of lbuf descriptor */ uint32_t lbufd_addr; /* address of lbuf descriptor */ u_int lbuf_row_cnt; /* current lbuf in current row */ uint32_t lbuf_addr; /* address of current buffer */ u_int i; row_size = sc->bytes_per_row; row_addr = start * row_size; lbuf_size = sc->cells_per_lbuf * 48; lbufs_per_row = sc->cells_per_row / sc->cells_per_lbuf; /* descriptor index */ lbufd_index = sc->r0_numbuffs + sc->r1_numbuffs; /* 2 words per entry */ lbufd_addr = READ4(sc, HE_REGO_RCMLBM_BA) + lbufd_index * 2; /* write head of queue */ WRITE4(sc, HE_REGO_TLBF_H, lbufd_index); lbuf_row_cnt = 0; for (i = 0; i < numbuffs; i++) { lbuf_addr = (row_addr + lbuf_row_cnt * lbuf_size) / 32; WRITE_RCM4(sc, lbufd_addr, lbuf_addr); lbufd_index++; WRITE_RCM4(sc, lbufd_addr + 1, lbufd_index); if (++lbuf_row_cnt == lbufs_per_row) { lbuf_row_cnt = 0; row_addr += row_size; } lbufd_addr += 2; } WRITE4(sc, HE_REGO_TLBF_T, lbufd_index - 1); BARRIER_W(sc); } /* * 5.1.5 Initialize Intermediate Receive Queues */ static void hatm_init_imed_queues(struct hatm_softc *sc) { u_int n; if (sc->he622) { for (n = 0; n < 8; n++) { WRITE4(sc, HE_REGO_INMQ_S(n), 0x10*n+0x000f); WRITE4(sc, HE_REGO_INMQ_L(n), 0x10*n+0x200f); } } else { for (n = 0; n < 8; n++) { WRITE4(sc, HE_REGO_INMQ_S(n), n); WRITE4(sc, HE_REGO_INMQ_L(n), n+0x8); } } } /* * 5.1.7 Init CS block */ static void hatm_init_cs_block(struct hatm_softc *sc) { u_int n, i; u_int clkfreg, cellrate, decr, tmp; static const uint32_t erthr[2][5][3] = HE_REGT_CS_ERTHR; static const uint32_t erctl[2][3] = HE_REGT_CS_ERCTL; static const uint32_t erstat[2][2] = HE_REGT_CS_ERSTAT; static const uint32_t rtfwr[2] = HE_REGT_CS_RTFWR; static const uint32_t rtatr[2] = HE_REGT_CS_RTATR; static const uint32_t bwalloc[2][6] = HE_REGT_CS_BWALLOC; static const uint32_t orcf[2][2] = HE_REGT_CS_ORCF; /* Clear Rate Controller Start Times and Occupied Flags */ for (n = 0; n < 32; n++) WRITE_MBOX4(sc, HE_REGO_CS_STTIM(n), 0); clkfreg = sc->he622 ? HE_622_CLOCK : HE_155_CLOCK; cellrate = sc->he622 ? ATM_RATE_622M : ATM_RATE_155M; decr = cellrate / 32; for (n = 0; n < 16; n++) { tmp = clkfreg / cellrate; WRITE_MBOX4(sc, HE_REGO_CS_TGRLD(n), tmp - 1); cellrate -= decr; } i = (sc->cells_per_lbuf == 2) ? 0 :(sc->cells_per_lbuf == 4) ? 1 : 2; /* table 5.2 */ WRITE_MBOX4(sc, HE_REGO_CS_ERTHR0, erthr[sc->he622][0][i]); WRITE_MBOX4(sc, HE_REGO_CS_ERTHR1, erthr[sc->he622][1][i]); WRITE_MBOX4(sc, HE_REGO_CS_ERTHR2, erthr[sc->he622][2][i]); WRITE_MBOX4(sc, HE_REGO_CS_ERTHR3, erthr[sc->he622][3][i]); WRITE_MBOX4(sc, HE_REGO_CS_ERTHR4, erthr[sc->he622][4][i]); WRITE_MBOX4(sc, HE_REGO_CS_ERCTL0, erctl[sc->he622][0]); WRITE_MBOX4(sc, HE_REGO_CS_ERCTL1, erctl[sc->he622][1]); WRITE_MBOX4(sc, HE_REGO_CS_ERCTL2, erctl[sc->he622][2]); WRITE_MBOX4(sc, HE_REGO_CS_ERSTAT0, erstat[sc->he622][0]); WRITE_MBOX4(sc, HE_REGO_CS_ERSTAT1, erstat[sc->he622][1]); WRITE_MBOX4(sc, HE_REGO_CS_RTFWR, rtfwr[sc->he622]); WRITE_MBOX4(sc, HE_REGO_CS_RTATR, rtatr[sc->he622]); WRITE_MBOX4(sc, HE_REGO_CS_TFBSET, bwalloc[sc->he622][0]); WRITE_MBOX4(sc, HE_REGO_CS_WCRMAX, bwalloc[sc->he622][1]); WRITE_MBOX4(sc, HE_REGO_CS_WCRMIN, bwalloc[sc->he622][2]); WRITE_MBOX4(sc, HE_REGO_CS_WCRINC, bwalloc[sc->he622][3]); WRITE_MBOX4(sc, HE_REGO_CS_WCRDEC, bwalloc[sc->he622][4]); WRITE_MBOX4(sc, HE_REGO_CS_WCRCEIL, bwalloc[sc->he622][5]); WRITE_MBOX4(sc, HE_REGO_CS_OTPPER, orcf[sc->he622][0]); WRITE_MBOX4(sc, HE_REGO_CS_OTWPER, orcf[sc->he622][1]); WRITE_MBOX4(sc, HE_REGO_CS_OTTLIM, 8); for (n = 0; n < 8; n++) WRITE_MBOX4(sc, HE_REGO_CS_HGRRT(n), 0); } /* * 5.1.8 CS Block Connection Memory Initialisation */ static void hatm_init_cs_block_cm(struct hatm_softc *sc) { u_int n, i; u_int expt, mant, etrm, wcr, ttnrm, tnrm; uint32_t rate; uint32_t clkfreq, cellrate, decr; uint32_t *rg, rtg, val = 0; uint64_t drate; u_int buf, buf_limit; uint32_t base = READ4(sc, HE_REGO_RCMABR_BA); for (n = 0; n < HE_REGL_CM_GQTBL; n++) WRITE_RCM4(sc, base + HE_REGO_CM_GQTBL + n, 0); for (n = 0; n < HE_REGL_CM_RGTBL; n++) WRITE_RCM4(sc, base + HE_REGO_CM_RGTBL + n, 0); tnrm = 0; for (n = 0; n < HE_REGL_CM_TNRMTBL * 4; n++) { expt = (n >> 5) & 0x1f; mant = ((n & 0x18) << 4) | 0x7f; wcr = (1 << expt) * (mant + 512) / 512; etrm = n & 0x7; ttnrm = wcr / 10 / (1 << etrm); if (ttnrm > 255) ttnrm = 255; else if(ttnrm < 2) ttnrm = 2; tnrm = (tnrm << 8) | (ttnrm & 0xff); if (n % 4 == 0) WRITE_RCM4(sc, base + HE_REGO_CM_TNRMTBL + (n/4), tnrm); } clkfreq = sc->he622 ? HE_622_CLOCK : HE_155_CLOCK; buf_limit = 4; cellrate = sc->he622 ? ATM_RATE_622M : ATM_RATE_155M; decr = cellrate / 32; /* compute GRID top row in 1000 * cps */ for (n = 0; n < 16; n++) { u_int interval = clkfreq / cellrate; sc->rate_grid[0][n] = (u_int64_t)clkfreq * 1000 / interval; cellrate -= decr; } /* compute the other rows according to 2.4 */ for (i = 1; i < 16; i++) for (n = 0; n < 16; n++) sc->rate_grid[i][n] = sc->rate_grid[i-1][n] / ((i < 14) ? 2 : 4); /* first entry is line rate */ n = hatm_cps2atmf(sc->he622 ? ATM_RATE_622M : ATM_RATE_155M); expt = (n >> 9) & 0x1f; mant = n & 0x1f0; sc->rate_grid[0][0] = (u_int64_t)(1<he622 ? ATM_RATE_622M : ATM_RATE_155M; rg = &sc->rate_grid[15][15]; for (rate = 0; rate < 2 * HE_REGL_CM_RTGTBL; rate++) { /* unpack the ATMF rate */ expt = rate >> 5; mant = (rate & 0x1f) << 4; /* get the cell rate - minimum is 10 per second */ drate = (uint64_t)(1 << expt) * 1000 * (mant + 512) / 512; if (drate < 10 * 1000) drate = 10 * 1000; /* now look up the grid index */ while (drate >= *rg && rg-- > &sc->rate_grid[0][0]) ; rg++; rtg = rg - &sc->rate_grid[0][0]; /* now compute the buffer limit */ buf = drate * sc->tx_numbuffs / (cellrate * 2) / 1000; if (buf == 0) buf = 1; else if (buf > buf_limit) buf = buf_limit; /* make value */ val = (val << 16) | (rtg << 8) | buf; /* write */ if (rate % 2 == 1) WRITE_RCM4(sc, base + HE_REGO_CM_RTGTBL + rate/2, val); } } /* * Clear an unused receive group buffer pool */ static void hatm_clear_rpool(struct hatm_softc *sc, u_int group, u_int large) { WRITE4(sc, HE_REGO_RBP_S(large, group), 0); WRITE4(sc, HE_REGO_RBP_T(large, group), 0); WRITE4(sc, HE_REGO_RBP_QI(large, group), 1); WRITE4(sc, HE_REGO_RBP_BL(large, group), 0); } /* * Initialize a receive group buffer pool */ static void hatm_init_rpool(struct hatm_softc *sc, struct herbp *q, u_int group, u_int large) { if (q->size == 0) { hatm_clear_rpool(sc, group, large); return; } bzero(q->mem.base, q->mem.size); q->rbp = q->mem.base; q->head = q->tail = 0; DBG(sc, ATTACH, ("RBP%u%c=0x%lx", group, "SL"[large], (u_long)q->mem.paddr)); WRITE4(sc, HE_REGO_RBP_S(large, group), q->mem.paddr); WRITE4(sc, HE_REGO_RBP_T(large, group), 0); WRITE4(sc, HE_REGO_RBP_QI(large, group), ((q->size - 1) << HE_REGS_RBP_SIZE) | HE_REGM_RBP_INTR_ENB | (q->thresh << HE_REGS_RBP_THRESH)); WRITE4(sc, HE_REGO_RBP_BL(large, group), (q->bsize >> 2) & ~1); } /* * Clear an unused receive buffer return queue */ static void hatm_clear_rbrq(struct hatm_softc *sc, u_int group) { WRITE4(sc, HE_REGO_RBRQ_ST(group), 0); WRITE4(sc, HE_REGO_RBRQ_H(group), 0); WRITE4(sc, HE_REGO_RBRQ_Q(group), (1 << HE_REGS_RBRQ_THRESH)); WRITE4(sc, HE_REGO_RBRQ_I(group), 0); } /* * Initialize receive buffer return queue */ static void hatm_init_rbrq(struct hatm_softc *sc, struct herbrq *rq, u_int group) { if (rq->size == 0) { hatm_clear_rbrq(sc, group); return; } rq->rbrq = rq->mem.base; rq->head = 0; DBG(sc, ATTACH, ("RBRQ%u=0x%lx", group, (u_long)rq->mem.paddr)); WRITE4(sc, HE_REGO_RBRQ_ST(group), rq->mem.paddr); WRITE4(sc, HE_REGO_RBRQ_H(group), 0); WRITE4(sc, HE_REGO_RBRQ_Q(group), (rq->thresh << HE_REGS_RBRQ_THRESH) | ((rq->size - 1) << HE_REGS_RBRQ_SIZE)); WRITE4(sc, HE_REGO_RBRQ_I(group), (rq->tout << HE_REGS_RBRQ_TIME) | (rq->pcnt << HE_REGS_RBRQ_COUNT)); } /* * Clear an unused transmit buffer return queue N */ static void hatm_clear_tbrq(struct hatm_softc *sc, u_int group) { WRITE4(sc, HE_REGO_TBRQ_B_T(group), 0); WRITE4(sc, HE_REGO_TBRQ_H(group), 0); WRITE4(sc, HE_REGO_TBRQ_S(group), 0); WRITE4(sc, HE_REGO_TBRQ_THRESH(group), 1); } /* * Initialize transmit buffer return queue N */ static void hatm_init_tbrq(struct hatm_softc *sc, struct hetbrq *tq, u_int group) { if (tq->size == 0) { hatm_clear_tbrq(sc, group); return; } tq->tbrq = tq->mem.base; tq->head = 0; DBG(sc, ATTACH, ("TBRQ%u=0x%lx", group, (u_long)tq->mem.paddr)); WRITE4(sc, HE_REGO_TBRQ_B_T(group), tq->mem.paddr); WRITE4(sc, HE_REGO_TBRQ_H(group), 0); WRITE4(sc, HE_REGO_TBRQ_S(group), tq->size - 1); WRITE4(sc, HE_REGO_TBRQ_THRESH(group), tq->thresh); } /* * Initialize TPDRQ */ static void hatm_init_tpdrq(struct hatm_softc *sc) { struct hetpdrq *tq; tq = &sc->tpdrq; tq->tpdrq = tq->mem.base; tq->tail = tq->head = 0; DBG(sc, ATTACH, ("TPDRQ=0x%lx", (u_long)tq->mem.paddr)); WRITE4(sc, HE_REGO_TPDRQ_H, tq->mem.paddr); WRITE4(sc, HE_REGO_TPDRQ_T, 0); WRITE4(sc, HE_REGO_TPDRQ_S, tq->size - 1); } /* * Function can be called by the infrastructure to start the card. */ static void hatm_init(void *p) { struct hatm_softc *sc = p; mtx_lock(&sc->mtx); hatm_stop(sc); hatm_initialize(sc); mtx_unlock(&sc->mtx); } enum { CTL_ISTATS, }; /* * Sysctl handler */ static int hatm_sysctl(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; uint32_t *ret; int error; size_t len; switch (arg2) { case CTL_ISTATS: len = sizeof(sc->istats); break; default: panic("bad control code"); } ret = malloc(len, M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); switch (arg2) { case CTL_ISTATS: sc->istats.mcc += READ4(sc, HE_REGO_MCC); sc->istats.oec += READ4(sc, HE_REGO_OEC); sc->istats.dcc += READ4(sc, HE_REGO_DCC); sc->istats.cec += READ4(sc, HE_REGO_CEC); bcopy(&sc->istats, ret, sizeof(sc->istats)); break; } mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, ret, len); free(ret, M_TEMP); return (error); } static int kenv_getuint(struct hatm_softc *sc, const char *var, u_int *ptr, u_int def, int rw) { char full[IFNAMSIZ + 3 + 20]; char *val, *end; u_int u; *ptr = def; - if (SYSCTL_ADD_UINT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), - OID_AUTO, var, rw ? CTLFLAG_RW : CTLFLAG_RD, ptr, 0, "") == NULL) - return (ENOMEM); + if (rw != 0) { + if (SYSCTL_ADD_UINT(&sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, var, + CTLFLAG_RW, ptr, 0, "") == NULL) + return (ENOMEM); + } else { + if (SYSCTL_ADD_UINT(&sc->sysctl_ctx, + SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, var, + CTLFLAG_RD, ptr, 0, "") == NULL) + return (ENOMEM); + } snprintf(full, sizeof(full), "hw.%s.%s", device_get_nameunit(sc->dev), var); if ((val = getenv(full)) == NULL) return (0); u = strtoul(val, &end, 0); if (end == val || *end != '\0') { freeenv(val); return (EINVAL); } freeenv(val); if (bootverbose) if_printf(sc->ifp, "%s=%u\n", full, u); *ptr = u; return (0); } /* * Set configurable parameters. Many of these are configurable via * kenv. */ static int hatm_configure(struct hatm_softc *sc) { /* Receive buffer pool 0 small */ kenv_getuint(sc, "rbps0_size", &sc->rbp_s0.size, HE_CONFIG_RBPS0_SIZE, 0); kenv_getuint(sc, "rbps0_thresh", &sc->rbp_s0.thresh, HE_CONFIG_RBPS0_THRESH, 0); sc->rbp_s0.bsize = MBUF0_SIZE; /* Receive buffer pool 0 large */ kenv_getuint(sc, "rbpl0_size", &sc->rbp_l0.size, HE_CONFIG_RBPL0_SIZE, 0); kenv_getuint(sc, "rbpl0_thresh", &sc->rbp_l0.thresh, HE_CONFIG_RBPL0_THRESH, 0); sc->rbp_l0.bsize = MCLBYTES - MBUFL_OFFSET; /* Receive buffer return queue 0 */ kenv_getuint(sc, "rbrq0_size", &sc->rbrq_0.size, HE_CONFIG_RBRQ0_SIZE, 0); kenv_getuint(sc, "rbrq0_thresh", &sc->rbrq_0.thresh, HE_CONFIG_RBRQ0_THRESH, 0); kenv_getuint(sc, "rbrq0_tout", &sc->rbrq_0.tout, HE_CONFIG_RBRQ0_TOUT, 0); kenv_getuint(sc, "rbrq0_pcnt", &sc->rbrq_0.pcnt, HE_CONFIG_RBRQ0_PCNT, 0); /* Receive buffer pool 1 small */ kenv_getuint(sc, "rbps1_size", &sc->rbp_s1.size, HE_CONFIG_RBPS1_SIZE, 0); kenv_getuint(sc, "rbps1_thresh", &sc->rbp_s1.thresh, HE_CONFIG_RBPS1_THRESH, 0); sc->rbp_s1.bsize = MBUF1_SIZE; /* Receive buffer return queue 1 */ kenv_getuint(sc, "rbrq1_size", &sc->rbrq_1.size, HE_CONFIG_RBRQ1_SIZE, 0); kenv_getuint(sc, "rbrq1_thresh", &sc->rbrq_1.thresh, HE_CONFIG_RBRQ1_THRESH, 0); kenv_getuint(sc, "rbrq1_tout", &sc->rbrq_1.tout, HE_CONFIG_RBRQ1_TOUT, 0); kenv_getuint(sc, "rbrq1_pcnt", &sc->rbrq_1.pcnt, HE_CONFIG_RBRQ1_PCNT, 0); /* Interrupt queue 0 */ kenv_getuint(sc, "irq0_size", &sc->irq_0.size, HE_CONFIG_IRQ0_SIZE, 0); kenv_getuint(sc, "irq0_thresh", &sc->irq_0.thresh, HE_CONFIG_IRQ0_THRESH, 0); sc->irq_0.line = HE_CONFIG_IRQ0_LINE; /* Transmit buffer return queue 0 */ kenv_getuint(sc, "tbrq0_size", &sc->tbrq.size, HE_CONFIG_TBRQ_SIZE, 0); kenv_getuint(sc, "tbrq0_thresh", &sc->tbrq.thresh, HE_CONFIG_TBRQ_THRESH, 0); /* Transmit buffer ready queue */ kenv_getuint(sc, "tpdrq_size", &sc->tpdrq.size, HE_CONFIG_TPDRQ_SIZE, 0); /* Max TPDs per VCC */ kenv_getuint(sc, "tpdmax", &sc->max_tpd, HE_CONFIG_TPD_MAXCC, 0); /* external mbuf pages */ kenv_getuint(sc, "max_mbuf_pages", &sc->mbuf_max_pages, HE_CONFIG_MAX_MBUF_PAGES, 0); /* mpsafe */ kenv_getuint(sc, "mpsafe", &sc->mpsafe, 0, 0); if (sc->mpsafe != 0) sc->mpsafe = INTR_MPSAFE; return (0); } #ifdef HATM_DEBUG /* * Get TSRs from connection memory */ static int hatm_sysctl_tsr(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; int error, i, j; uint32_t *val; val = malloc(sizeof(uint32_t) * HE_MAX_VCCS * 15, M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); for (i = 0; i < HE_MAX_VCCS; i++) for (j = 0; j <= 14; j++) val[15 * i + j] = READ_TSR(sc, i, j); mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, val, sizeof(uint32_t) * HE_MAX_VCCS * 15); free(val, M_TEMP); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } /* * Get TPDs from connection memory */ static int hatm_sysctl_tpd(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; int error, i, j; uint32_t *val; val = malloc(sizeof(uint32_t) * HE_MAX_VCCS * 16, M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); for (i = 0; i < HE_MAX_VCCS; i++) for (j = 0; j < 16; j++) val[16 * i + j] = READ_TCM4(sc, 16 * i + j); mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, val, sizeof(uint32_t) * HE_MAX_VCCS * 16); free(val, M_TEMP); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } /* * Get mbox registers */ static int hatm_sysctl_mbox(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; int error, i; uint32_t *val; val = malloc(sizeof(uint32_t) * HE_REGO_CS_END, M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); for (i = 0; i < HE_REGO_CS_END; i++) val[i] = READ_MBOX4(sc, i); mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, val, sizeof(uint32_t) * HE_REGO_CS_END); free(val, M_TEMP); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } /* * Get connection memory */ static int hatm_sysctl_cm(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; int error, i; uint32_t *val; val = malloc(sizeof(uint32_t) * (HE_CONFIG_RXMEM + 1), M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); val[0] = READ4(sc, HE_REGO_RCMABR_BA); for (i = 0; i < HE_CONFIG_RXMEM; i++) val[i + 1] = READ_RCM4(sc, i); mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, val, sizeof(uint32_t) * (HE_CONFIG_RXMEM + 1)); free(val, M_TEMP); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } /* * Get local buffer memory */ static int hatm_sysctl_lbmem(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; int error, i; uint32_t *val; u_int bytes = (1 << 21); val = malloc(bytes, M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); for (i = 0; i < bytes / 4; i++) val[i] = READ_LB4(sc, i); mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, val, bytes); free(val, M_TEMP); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } /* * Get all card registers */ static int hatm_sysctl_heregs(SYSCTL_HANDLER_ARGS) { struct hatm_softc *sc = arg1; int error, i; uint32_t *val; val = malloc(HE_REGO_END, M_TEMP, M_WAITOK); mtx_lock(&sc->mtx); for (i = 0; i < HE_REGO_END; i += 4) val[i / 4] = READ4(sc, i); mtx_unlock(&sc->mtx); error = SYSCTL_OUT(req, val, HE_REGO_END); free(val, M_TEMP); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } #endif /* * Suni register access */ /* * read at most n SUNI registers starting at reg into val */ static int hatm_utopia_readregs(struct ifatm *ifatm, u_int reg, uint8_t *val, u_int *n) { u_int i; struct hatm_softc *sc = ifatm->ifp->if_softc; if (reg >= (HE_REGO_SUNI_END - HE_REGO_SUNI) / 4) return (EINVAL); if (reg + *n > (HE_REGO_SUNI_END - HE_REGO_SUNI) / 4) *n = reg - (HE_REGO_SUNI_END - HE_REGO_SUNI) / 4; mtx_assert(&sc->mtx, MA_OWNED); for (i = 0; i < *n; i++) val[i] = READ4(sc, HE_REGO_SUNI + 4 * (reg + i)); return (0); } /* * change the bits given by mask to them in val in register reg */ static int hatm_utopia_writereg(struct ifatm *ifatm, u_int reg, u_int mask, u_int val) { uint32_t regval; struct hatm_softc *sc = ifatm->ifp->if_softc; if (reg >= (HE_REGO_SUNI_END - HE_REGO_SUNI) / 4) return (EINVAL); mtx_assert(&sc->mtx, MA_OWNED); regval = READ4(sc, HE_REGO_SUNI + 4 * reg); regval = (regval & ~mask) | (val & mask); WRITE4(sc, HE_REGO_SUNI + 4 * reg, regval); return (0); } static struct utopia_methods hatm_utopia_methods = { hatm_utopia_readregs, hatm_utopia_writereg, }; /* * Detach - if it is running, stop. Destroy. */ static int hatm_detach(device_t dev) { struct hatm_softc *sc = device_get_softc(dev); mtx_lock(&sc->mtx); hatm_stop(sc); if (sc->utopia.state & UTP_ST_ATTACHED) { utopia_stop(&sc->utopia); utopia_detach(&sc->utopia); } mtx_unlock(&sc->mtx); atm_ifdetach(sc->ifp); hatm_destroy(sc); return (0); } /* * Attach to the device. Assume that no locking is needed here. * All resource we allocate here are freed by calling hatm_destroy. */ static int hatm_attach(device_t dev) { struct hatm_softc *sc; int error; uint32_t v; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp = if_alloc(IFT_ATM); if (ifp == NULL) { device_printf(dev, "could not if_alloc()\n"); return (ENOSPC); } sc->dev = dev; IFP2IFATM(sc->ifp)->mib.device = ATM_DEVICE_HE155; IFP2IFATM(sc->ifp)->mib.serial = 0; IFP2IFATM(sc->ifp)->mib.hw_version = 0; IFP2IFATM(sc->ifp)->mib.sw_version = 0; IFP2IFATM(sc->ifp)->mib.vpi_bits = HE_CONFIG_VPI_BITS; IFP2IFATM(sc->ifp)->mib.vci_bits = HE_CONFIG_VCI_BITS; IFP2IFATM(sc->ifp)->mib.max_vpcs = 0; IFP2IFATM(sc->ifp)->mib.max_vccs = HE_MAX_VCCS; IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_UNKNOWN; sc->he622 = 0; IFP2IFATM(sc->ifp)->phy = &sc->utopia; SLIST_INIT(&sc->tpd_free); mtx_init(&sc->mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); cv_init(&sc->vcc_cv, "HEVCCcv"); cv_init(&sc->cv_rcclose, "RCClose"); sysctl_ctx_init(&sc->sysctl_ctx); /* * 4.2 BIOS Configuration */ v = pci_read_config(dev, PCIR_COMMAND, 2); v |= PCIM_CMD_BUSMASTEREN | PCIM_CMD_MWRICEN; pci_write_config(dev, PCIR_COMMAND, v, 2); /* * 4.3 PCI Bus Controller-Specific Initialisation */ v = pci_read_config(dev, HE_PCIR_GEN_CNTL_0, 4); v |= HE_PCIM_CTL0_MRL | HE_PCIM_CTL0_MRM | HE_PCIM_CTL0_IGNORE_TIMEOUT; #if BYTE_ORDER == BIG_ENDIAN && 0 v |= HE_PCIM_CTL0_BIGENDIAN; #endif pci_write_config(dev, HE_PCIR_GEN_CNTL_0, v, 4); /* * Map memory */ sc->memid = PCIR_BAR(0); sc->memres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->memid, RF_ACTIVE); if (sc->memres == NULL) { device_printf(dev, "could not map memory\n"); error = ENXIO; goto failed; } sc->memh = rman_get_bushandle(sc->memres); sc->memt = rman_get_bustag(sc->memres); /* * ALlocate a DMA tag for subsequent allocations */ if (bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 1, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->parent_tag)) { device_printf(dev, "could not allocate DMA tag\n"); error = ENOMEM; goto failed; } if (bus_dma_tag_create(sc->parent_tag, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MBUF_ALLOC_SIZE, 1, MBUF_ALLOC_SIZE, 0, NULL, NULL, &sc->mbuf_tag)) { device_printf(dev, "could not allocate mbuf DMA tag\n"); error = ENOMEM; goto failed; } /* * Allocate a DMA tag for packets to send. Here we have a problem with * the specification of the maximum number of segments. Theoretically * this would be the size of the transmit ring - 1 multiplied by 3, * but this would not work. So make the maximum number of TPDs * occupied by one packet a configuration parameter. */ if (bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, HE_MAX_PDU, 3 * HE_CONFIG_MAX_TPD_PER_PACKET, HE_MAX_PDU, 0, NULL, NULL, &sc->tx_tag)) { device_printf(dev, "could not allocate TX tag\n"); error = ENOMEM; goto failed; } /* * Setup the interrupt */ sc->irqid = 0; sc->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid, RF_SHAREABLE | RF_ACTIVE); if (sc->irqres == 0) { device_printf(dev, "could not allocate irq\n"); error = ENXIO; goto failed; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Make the sysctl tree */ error = ENOMEM; if ((sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_atm), OID_AUTO, device_get_nameunit(dev), CTLFLAG_RD, 0, "")) == NULL) goto failed; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "istats", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, CTL_ISTATS, hatm_sysctl, "LU", "internal statistics") == NULL) goto failed; #ifdef HATM_DEBUG if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "tsr", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, 0, hatm_sysctl_tsr, "S", "transmission status registers") == NULL) goto failed; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "tpd", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, 0, hatm_sysctl_tpd, "S", "transmission packet descriptors") == NULL) goto failed; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "mbox", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, 0, hatm_sysctl_mbox, "S", "mbox registers") == NULL) goto failed; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "cm", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, 0, hatm_sysctl_cm, "S", "connection memory") == NULL) goto failed; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "heregs", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, 0, hatm_sysctl_heregs, "S", "card registers") == NULL) goto failed; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "lbmem", CTLFLAG_RD | CTLTYPE_OPAQUE, sc, 0, hatm_sysctl_lbmem, "S", "local memory") == NULL) goto failed; kenv_getuint(sc, "debug", &sc->debug, HATM_DEBUG, 1); #endif /* * Configure */ if ((error = hatm_configure(sc)) != 0) goto failed; /* * Compute memory parameters */ if (sc->rbp_s0.size != 0) { sc->rbp_s0.mask = (sc->rbp_s0.size - 1) << 3; sc->rbp_s0.mem.size = sc->rbp_s0.size * 8; sc->rbp_s0.mem.align = sc->rbp_s0.mem.size; } if (sc->rbp_l0.size != 0) { sc->rbp_l0.mask = (sc->rbp_l0.size - 1) << 3; sc->rbp_l0.mem.size = sc->rbp_l0.size * 8; sc->rbp_l0.mem.align = sc->rbp_l0.mem.size; } if (sc->rbp_s1.size != 0) { sc->rbp_s1.mask = (sc->rbp_s1.size - 1) << 3; sc->rbp_s1.mem.size = sc->rbp_s1.size * 8; sc->rbp_s1.mem.align = sc->rbp_s1.mem.size; } if (sc->rbrq_0.size != 0) { sc->rbrq_0.mem.size = sc->rbrq_0.size * 8; sc->rbrq_0.mem.align = sc->rbrq_0.mem.size; } if (sc->rbrq_1.size != 0) { sc->rbrq_1.mem.size = sc->rbrq_1.size * 8; sc->rbrq_1.mem.align = sc->rbrq_1.mem.size; } sc->irq_0.mem.size = sc->irq_0.size * sizeof(uint32_t); sc->irq_0.mem.align = 4 * 1024; sc->tbrq.mem.size = sc->tbrq.size * 4; sc->tbrq.mem.align = 2 * sc->tbrq.mem.size; /* ZZZ */ sc->tpdrq.mem.size = sc->tpdrq.size * 8; sc->tpdrq.mem.align = sc->tpdrq.mem.size; sc->hsp_mem.size = sizeof(struct he_hsp); sc->hsp_mem.align = 1024; sc->lbufs_size = sc->rbp_l0.size + sc->rbrq_0.size; sc->tpd_total = sc->tbrq.size + sc->tpdrq.size; sc->tpds.align = 64; sc->tpds.size = sc->tpd_total * HE_TPD_SIZE; hatm_init_rmaps(sc); hatm_init_smbufs(sc); if ((error = hatm_init_tpds(sc)) != 0) goto failed; /* * Allocate memory */ if ((error = hatm_alloc_dmamem(sc, "IRQ", &sc->irq_0.mem)) != 0 || (error = hatm_alloc_dmamem(sc, "TBRQ0", &sc->tbrq.mem)) != 0 || (error = hatm_alloc_dmamem(sc, "TPDRQ", &sc->tpdrq.mem)) != 0 || (error = hatm_alloc_dmamem(sc, "HSP", &sc->hsp_mem)) != 0) goto failed; if (sc->rbp_s0.mem.size != 0 && (error = hatm_alloc_dmamem(sc, "RBPS0", &sc->rbp_s0.mem))) goto failed; if (sc->rbp_l0.mem.size != 0 && (error = hatm_alloc_dmamem(sc, "RBPL0", &sc->rbp_l0.mem))) goto failed; if (sc->rbp_s1.mem.size != 0 && (error = hatm_alloc_dmamem(sc, "RBPS1", &sc->rbp_s1.mem))) goto failed; if (sc->rbrq_0.mem.size != 0 && (error = hatm_alloc_dmamem(sc, "RBRQ0", &sc->rbrq_0.mem))) goto failed; if (sc->rbrq_1.mem.size != 0 && (error = hatm_alloc_dmamem(sc, "RBRQ1", &sc->rbrq_1.mem))) goto failed; if ((sc->vcc_zone = uma_zcreate("HE vccs", sizeof(struct hevcc), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0)) == NULL) { device_printf(dev, "cannot allocate zone for vccs\n"); goto failed; } /* * 4.4 Reset the card. */ if ((error = hatm_reset(sc)) != 0) goto failed; /* * Read the prom. */ hatm_init_bus_width(sc); hatm_init_read_eeprom(sc); hatm_init_endianess(sc); /* * Initialize interface */ ifp->if_flags = IFF_SIMPLEX; ifp->if_ioctl = hatm_ioctl; ifp->if_start = hatm_start; ifp->if_init = hatm_init; utopia_attach(&sc->utopia, IFP2IFATM(sc->ifp), &sc->media, &sc->mtx, &sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), &hatm_utopia_methods); utopia_init_media(&sc->utopia); /* these two SUNI routines need the lock */ mtx_lock(&sc->mtx); /* poll while we are not running */ sc->utopia.flags |= UTP_FL_POLL_CARRIER; utopia_start(&sc->utopia); utopia_reset(&sc->utopia); mtx_unlock(&sc->mtx); atm_ifattach(ifp); #ifdef ENABLE_BPF bpfattach(ifp, DLT_ATM_RFC1483, sizeof(struct atmllc)); #endif error = bus_setup_intr(dev, sc->irqres, sc->mpsafe | INTR_TYPE_NET, NULL, hatm_intr, &sc->irq_0, &sc->ih); if (error != 0) { device_printf(dev, "could not setup interrupt\n"); hatm_detach(dev); return (error); } return (0); failed: hatm_destroy(sc); return (error); } /* * Start the interface. Assume a state as from attach(). */ void hatm_initialize(struct hatm_softc *sc) { uint32_t v; u_int cid; static const u_int layout[2][7] = HE_CONFIG_MEM_LAYOUT; if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) return; hatm_init_bus_width(sc); hatm_init_endianess(sc); if_printf(sc->ifp, "%s, Rev. %s, S/N %u, " "MAC=%02x:%02x:%02x:%02x:%02x:%02x (%ubit PCI)\n", sc->prod_id, sc->rev, IFP2IFATM(sc->ifp)->mib.serial, IFP2IFATM(sc->ifp)->mib.esi[0], IFP2IFATM(sc->ifp)->mib.esi[1], IFP2IFATM(sc->ifp)->mib.esi[2], IFP2IFATM(sc->ifp)->mib.esi[3], IFP2IFATM(sc->ifp)->mib.esi[4], IFP2IFATM(sc->ifp)->mib.esi[5], sc->pci64 ? 64 : 32); /* * 4.8 SDRAM Controller Initialisation * 4.9 Initialize RNUM value */ if (sc->he622) WRITE4(sc, HE_REGO_SDRAM_CNTL, HE_REGM_SDRAM_64BIT); else WRITE4(sc, HE_REGO_SDRAM_CNTL, 0); BARRIER_W(sc); v = READ4(sc, HE_REGO_LB_SWAP); BARRIER_R(sc); v |= 0xf << HE_REGS_LBSWAP_RNUM; WRITE4(sc, HE_REGO_LB_SWAP, v); BARRIER_W(sc); hatm_init_irq(sc, &sc->irq_0, 0); hatm_clear_irq(sc, 1); hatm_clear_irq(sc, 2); hatm_clear_irq(sc, 3); WRITE4(sc, HE_REGO_GRP_1_0_MAP, 0); WRITE4(sc, HE_REGO_GRP_3_2_MAP, 0); WRITE4(sc, HE_REGO_GRP_5_4_MAP, 0); WRITE4(sc, HE_REGO_GRP_7_6_MAP, 0); BARRIER_W(sc); /* * 4.11 Enable PCI Bus Controller State Machine */ v = READ4(sc, HE_REGO_HOST_CNTL); BARRIER_R(sc); v |= HE_REGM_HOST_OUTFF_ENB | HE_REGM_HOST_CMDFF_ENB | HE_REGM_HOST_QUICK_RD | HE_REGM_HOST_QUICK_WR; WRITE4(sc, HE_REGO_HOST_CNTL, v); BARRIER_W(sc); /* * 5.1.1 Generic configuration state */ sc->cells_per_row = layout[sc->he622][0]; sc->bytes_per_row = layout[sc->he622][1]; sc->r0_numrows = layout[sc->he622][2]; sc->tx_numrows = layout[sc->he622][3]; sc->r1_numrows = layout[sc->he622][4]; sc->r0_startrow = layout[sc->he622][5]; sc->tx_startrow = sc->r0_startrow + sc->r0_numrows; sc->r1_startrow = sc->tx_startrow + sc->tx_numrows; sc->cells_per_lbuf = layout[sc->he622][6]; sc->r0_numbuffs = sc->r0_numrows * (sc->cells_per_row / sc->cells_per_lbuf); sc->r1_numbuffs = sc->r1_numrows * (sc->cells_per_row / sc->cells_per_lbuf); sc->tx_numbuffs = sc->tx_numrows * (sc->cells_per_row / sc->cells_per_lbuf); if (sc->r0_numbuffs > 2560) sc->r0_numbuffs = 2560; if (sc->r1_numbuffs > 2560) sc->r1_numbuffs = 2560; if (sc->tx_numbuffs > 5120) sc->tx_numbuffs = 5120; DBG(sc, ATTACH, ("cells_per_row=%u bytes_per_row=%u r0_numrows=%u " "tx_numrows=%u r1_numrows=%u r0_startrow=%u tx_startrow=%u " "r1_startrow=%u cells_per_lbuf=%u\nr0_numbuffs=%u r1_numbuffs=%u " "tx_numbuffs=%u\n", sc->cells_per_row, sc->bytes_per_row, sc->r0_numrows, sc->tx_numrows, sc->r1_numrows, sc->r0_startrow, sc->tx_startrow, sc->r1_startrow, sc->cells_per_lbuf, sc->r0_numbuffs, sc->r1_numbuffs, sc->tx_numbuffs)); /* * 5.1.2 Configure Hardware dependend registers */ if (sc->he622) { WRITE4(sc, HE_REGO_LBARB, (0x2 << HE_REGS_LBARB_SLICE) | (0xf << HE_REGS_LBARB_RNUM) | (0x3 << HE_REGS_LBARB_THPRI) | (0x3 << HE_REGS_LBARB_RHPRI) | (0x2 << HE_REGS_LBARB_TLPRI) | (0x1 << HE_REGS_LBARB_RLPRI) | (0x28 << HE_REGS_LBARB_BUS_MULT) | (0x50 << HE_REGS_LBARB_NET_PREF)); BARRIER_W(sc); WRITE4(sc, HE_REGO_SDRAMCON, /* HW bug: don't use banking */ /* HE_REGM_SDRAMCON_BANK | */ HE_REGM_SDRAMCON_WIDE | (0x384 << HE_REGS_SDRAMCON_REF)); BARRIER_W(sc); WRITE4(sc, HE_REGO_RCMCONFIG, (0x1 << HE_REGS_RCMCONFIG_BANK_WAIT) | (0x1 << HE_REGS_RCMCONFIG_RW_WAIT) | (0x0 << HE_REGS_RCMCONFIG_TYPE)); WRITE4(sc, HE_REGO_TCMCONFIG, (0x2 << HE_REGS_TCMCONFIG_BANK_WAIT) | (0x1 << HE_REGS_TCMCONFIG_RW_WAIT) | (0x0 << HE_REGS_TCMCONFIG_TYPE)); } else { WRITE4(sc, HE_REGO_LBARB, (0x2 << HE_REGS_LBARB_SLICE) | (0xf << HE_REGS_LBARB_RNUM) | (0x3 << HE_REGS_LBARB_THPRI) | (0x3 << HE_REGS_LBARB_RHPRI) | (0x2 << HE_REGS_LBARB_TLPRI) | (0x1 << HE_REGS_LBARB_RLPRI) | (0x46 << HE_REGS_LBARB_BUS_MULT) | (0x8C << HE_REGS_LBARB_NET_PREF)); BARRIER_W(sc); WRITE4(sc, HE_REGO_SDRAMCON, /* HW bug: don't use banking */ /* HE_REGM_SDRAMCON_BANK | */ (0x150 << HE_REGS_SDRAMCON_REF)); BARRIER_W(sc); WRITE4(sc, HE_REGO_RCMCONFIG, (0x0 << HE_REGS_RCMCONFIG_BANK_WAIT) | (0x1 << HE_REGS_RCMCONFIG_RW_WAIT) | (0x0 << HE_REGS_RCMCONFIG_TYPE)); WRITE4(sc, HE_REGO_TCMCONFIG, (0x1 << HE_REGS_TCMCONFIG_BANK_WAIT) | (0x1 << HE_REGS_TCMCONFIG_RW_WAIT) | (0x0 << HE_REGS_TCMCONFIG_TYPE)); } WRITE4(sc, HE_REGO_LBCONFIG, (sc->cells_per_lbuf * 48)); WRITE4(sc, HE_REGO_RLBC_H, 0); WRITE4(sc, HE_REGO_RLBC_T, 0); WRITE4(sc, HE_REGO_RLBC_H2, 0); WRITE4(sc, HE_REGO_RXTHRSH, 512); WRITE4(sc, HE_REGO_LITHRSH, 256); WRITE4(sc, HE_REGO_RLBF0_C, sc->r0_numbuffs); WRITE4(sc, HE_REGO_RLBF1_C, sc->r1_numbuffs); if (sc->he622) { WRITE4(sc, HE_REGO_RCCONFIG, (8 << HE_REGS_RCCONFIG_UTDELAY) | (IFP2IFATM(sc->ifp)->mib.vpi_bits << HE_REGS_RCCONFIG_VP) | (IFP2IFATM(sc->ifp)->mib.vci_bits << HE_REGS_RCCONFIG_VC)); WRITE4(sc, HE_REGO_TXCONFIG, (32 << HE_REGS_TXCONFIG_THRESH) | (IFP2IFATM(sc->ifp)->mib.vci_bits << HE_REGS_TXCONFIG_VCI_MASK) | (sc->tx_numbuffs << HE_REGS_TXCONFIG_LBFREE)); } else { WRITE4(sc, HE_REGO_RCCONFIG, (0 << HE_REGS_RCCONFIG_UTDELAY) | HE_REGM_RCCONFIG_UT_MODE | (IFP2IFATM(sc->ifp)->mib.vpi_bits << HE_REGS_RCCONFIG_VP) | (IFP2IFATM(sc->ifp)->mib.vci_bits << HE_REGS_RCCONFIG_VC)); WRITE4(sc, HE_REGO_TXCONFIG, (32 << HE_REGS_TXCONFIG_THRESH) | HE_REGM_TXCONFIG_UTMODE | (IFP2IFATM(sc->ifp)->mib.vci_bits << HE_REGS_TXCONFIG_VCI_MASK) | (sc->tx_numbuffs << HE_REGS_TXCONFIG_LBFREE)); } WRITE4(sc, HE_REGO_TXAAL5_PROTO, 0); if (sc->rbp_s1.size != 0) { WRITE4(sc, HE_REGO_RHCONFIG, HE_REGM_RHCONFIG_PHYENB | ((sc->he622 ? 0x41 : 0x31) << HE_REGS_RHCONFIG_PTMR_PRE) | (1 << HE_REGS_RHCONFIG_OAM_GID)); } else { WRITE4(sc, HE_REGO_RHCONFIG, HE_REGM_RHCONFIG_PHYENB | ((sc->he622 ? 0x41 : 0x31) << HE_REGS_RHCONFIG_PTMR_PRE) | (0 << HE_REGS_RHCONFIG_OAM_GID)); } BARRIER_W(sc); hatm_init_cm(sc); hatm_init_rx_buffer_pool(sc, 0, sc->r0_startrow, sc->r0_numbuffs); hatm_init_rx_buffer_pool(sc, 1, sc->r1_startrow, sc->r1_numbuffs); hatm_init_tx_buffer_pool(sc, sc->tx_startrow, sc->tx_numbuffs); hatm_init_imed_queues(sc); /* * 5.1.6 Application tunable Parameters */ WRITE4(sc, HE_REGO_MCC, 0); WRITE4(sc, HE_REGO_OEC, 0); WRITE4(sc, HE_REGO_DCC, 0); WRITE4(sc, HE_REGO_CEC, 0); hatm_init_cs_block(sc); hatm_init_cs_block_cm(sc); hatm_init_rpool(sc, &sc->rbp_s0, 0, 0); hatm_init_rpool(sc, &sc->rbp_l0, 0, 1); hatm_init_rpool(sc, &sc->rbp_s1, 1, 0); hatm_clear_rpool(sc, 1, 1); hatm_clear_rpool(sc, 2, 0); hatm_clear_rpool(sc, 2, 1); hatm_clear_rpool(sc, 3, 0); hatm_clear_rpool(sc, 3, 1); hatm_clear_rpool(sc, 4, 0); hatm_clear_rpool(sc, 4, 1); hatm_clear_rpool(sc, 5, 0); hatm_clear_rpool(sc, 5, 1); hatm_clear_rpool(sc, 6, 0); hatm_clear_rpool(sc, 6, 1); hatm_clear_rpool(sc, 7, 0); hatm_clear_rpool(sc, 7, 1); hatm_init_rbrq(sc, &sc->rbrq_0, 0); hatm_init_rbrq(sc, &sc->rbrq_1, 1); hatm_clear_rbrq(sc, 2); hatm_clear_rbrq(sc, 3); hatm_clear_rbrq(sc, 4); hatm_clear_rbrq(sc, 5); hatm_clear_rbrq(sc, 6); hatm_clear_rbrq(sc, 7); sc->lbufs_next = 0; bzero(sc->lbufs, sizeof(sc->lbufs[0]) * sc->lbufs_size); hatm_init_tbrq(sc, &sc->tbrq, 0); hatm_clear_tbrq(sc, 1); hatm_clear_tbrq(sc, 2); hatm_clear_tbrq(sc, 3); hatm_clear_tbrq(sc, 4); hatm_clear_tbrq(sc, 5); hatm_clear_tbrq(sc, 6); hatm_clear_tbrq(sc, 7); hatm_init_tpdrq(sc); WRITE4(sc, HE_REGO_UBUFF_BA, (sc->he622 ? 0x104780 : 0x800)); /* * Initialize HSP */ bzero(sc->hsp_mem.base, sc->hsp_mem.size); sc->hsp = sc->hsp_mem.base; WRITE4(sc, HE_REGO_HSP_BA, sc->hsp_mem.paddr); /* * 5.1.12 Enable transmit and receive * Enable bus master and interrupts */ v = READ_MBOX4(sc, HE_REGO_CS_ERCTL0); v |= 0x18000000; WRITE_MBOX4(sc, HE_REGO_CS_ERCTL0, v); v = READ4(sc, HE_REGO_RCCONFIG); v |= HE_REGM_RCCONFIG_RXENB; WRITE4(sc, HE_REGO_RCCONFIG, v); v = pci_read_config(sc->dev, HE_PCIR_GEN_CNTL_0, 4); v |= HE_PCIM_CTL0_INIT_ENB | HE_PCIM_CTL0_INT_PROC_ENB; pci_write_config(sc->dev, HE_PCIR_GEN_CNTL_0, v, 4); sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_baudrate = 53 * 8 * IFP2IFATM(sc->ifp)->mib.pcr; sc->utopia.flags &= ~UTP_FL_POLL_CARRIER; /* reopen vccs */ for (cid = 0; cid < HE_MAX_VCCS; cid++) if (sc->vccs[cid] != NULL) hatm_load_vc(sc, cid, 1); ATMEV_SEND_IFSTATE_CHANGED(IFP2IFATM(sc->ifp), sc->utopia.carrier == UTP_CARR_OK); } /* * This functions stops the card and frees all resources allocated after * the attach. Must have the global lock. */ void hatm_stop(struct hatm_softc *sc) { uint32_t v; u_int i, p, cid; struct mbuf_chunk_hdr *ch; struct mbuf_page *pg; mtx_assert(&sc->mtx, MA_OWNED); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ATMEV_SEND_IFSTATE_CHANGED(IFP2IFATM(sc->ifp), sc->utopia.carrier == UTP_CARR_OK); sc->utopia.flags |= UTP_FL_POLL_CARRIER; /* * Stop and reset the hardware so that everything remains * stable. */ v = READ_MBOX4(sc, HE_REGO_CS_ERCTL0); v &= ~0x18000000; WRITE_MBOX4(sc, HE_REGO_CS_ERCTL0, v); v = READ4(sc, HE_REGO_RCCONFIG); v &= ~HE_REGM_RCCONFIG_RXENB; WRITE4(sc, HE_REGO_RCCONFIG, v); WRITE4(sc, HE_REGO_RHCONFIG, (0x2 << HE_REGS_RHCONFIG_PTMR_PRE)); BARRIER_W(sc); v = READ4(sc, HE_REGO_HOST_CNTL); BARRIER_R(sc); v &= ~(HE_REGM_HOST_OUTFF_ENB | HE_REGM_HOST_CMDFF_ENB); WRITE4(sc, HE_REGO_HOST_CNTL, v); BARRIER_W(sc); /* * Disable bust master and interrupts */ v = pci_read_config(sc->dev, HE_PCIR_GEN_CNTL_0, 4); v &= ~(HE_PCIM_CTL0_INIT_ENB | HE_PCIM_CTL0_INT_PROC_ENB); pci_write_config(sc->dev, HE_PCIR_GEN_CNTL_0, v, 4); (void)hatm_reset(sc); /* * Card resets the SUNI when resetted, so re-initialize it */ utopia_reset(&sc->utopia); /* * Give any waiters on closing a VCC a chance. They will stop * to wait if they see that IFF_DRV_RUNNING disappeared. */ cv_broadcast(&sc->vcc_cv); cv_broadcast(&sc->cv_rcclose); /* * Now free all resources. */ /* * Free the large mbufs that are given to the card. */ for (i = 0 ; i < sc->lbufs_size; i++) { if (sc->lbufs[i] != NULL) { bus_dmamap_unload(sc->mbuf_tag, sc->rmaps[i]); m_freem(sc->lbufs[i]); sc->lbufs[i] = NULL; } } /* * Free small buffers */ for (p = 0; p < sc->mbuf_npages; p++) { pg = sc->mbuf_pages[p]; for (i = 0; i < pg->hdr.nchunks; i++) { ch = (struct mbuf_chunk_hdr *) ((char *)pg + i * pg->hdr.chunksize + pg->hdr.hdroff); if (ch->flags & MBUF_CARD) { ch->flags &= ~MBUF_CARD; ch->flags |= MBUF_USED; hatm_ext_free(&sc->mbuf_list[pg->hdr.pool], (struct mbufx_free *)((u_char *)ch - pg->hdr.hdroff)); } } } hatm_stop_tpds(sc); /* * Free all partial reassembled PDUs on any VCC. */ for (cid = 0; cid < HE_MAX_VCCS; cid++) { if (sc->vccs[cid] != NULL) { if (sc->vccs[cid]->chain != NULL) { m_freem(sc->vccs[cid]->chain); sc->vccs[cid]->chain = NULL; sc->vccs[cid]->last = NULL; } if (!(sc->vccs[cid]->vflags & (HE_VCC_RX_OPEN | HE_VCC_TX_OPEN))) { hatm_tx_vcc_closed(sc, cid); uma_zfree(sc->vcc_zone, sc->vccs[cid]); sc->vccs[cid] = NULL; sc->open_vccs--; } else { sc->vccs[cid]->vflags = 0; sc->vccs[cid]->ntpds = 0; } } } if (sc->rbp_s0.size != 0) bzero(sc->rbp_s0.mem.base, sc->rbp_s0.mem.size); if (sc->rbp_l0.size != 0) bzero(sc->rbp_l0.mem.base, sc->rbp_l0.mem.size); if (sc->rbp_s1.size != 0) bzero(sc->rbp_s1.mem.base, sc->rbp_s1.mem.size); if (sc->rbrq_0.size != 0) bzero(sc->rbrq_0.mem.base, sc->rbrq_0.mem.size); if (sc->rbrq_1.size != 0) bzero(sc->rbrq_1.mem.base, sc->rbrq_1.mem.size); bzero(sc->tbrq.mem.base, sc->tbrq.mem.size); bzero(sc->tpdrq.mem.base, sc->tpdrq.mem.size); bzero(sc->hsp_mem.base, sc->hsp_mem.size); } /************************************************************ * * Driver infrastructure */ devclass_t hatm_devclass; static device_method_t hatm_methods[] = { DEVMETHOD(device_probe, hatm_probe), DEVMETHOD(device_attach, hatm_attach), DEVMETHOD(device_detach, hatm_detach), {0,0} }; static driver_t hatm_driver = { "hatm", hatm_methods, sizeof(struct hatm_softc), }; DRIVER_MODULE(hatm, pci, hatm_driver, hatm_devclass, NULL, 0); Index: stable/9/sys/dev/isp =================================================================== --- stable/9/sys/dev/isp (revision 273911) +++ stable/9/sys/dev/isp (revision 273912) Property changes on: stable/9/sys/dev/isp ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev/isp:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/ixgbe/ixgbe.c =================================================================== --- stable/9/sys/dev/ixgbe/ixgbe.c (revision 273911) +++ stable/9/sys/dev/ixgbe/ixgbe.c (revision 273912) @@ -1,5828 +1,5828 @@ /****************************************************************************** Copyright (c) 2001-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "ixgbe.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int ixgbe_display_debug_stats = 0; /********************************************************************* * Driver version *********************************************************************/ char ixgbe_driver_version[] = "2.5.15"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgbe_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = { {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixgbe_strings[] = { "Intel(R) PRO/10GbE PCI-Express Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixgbe_probe(device_t); static int ixgbe_attach(device_t); static int ixgbe_detach(device_t); static int ixgbe_shutdown(device_t); #ifdef IXGBE_LEGACY_TX static void ixgbe_start(struct ifnet *); static void ixgbe_start_locked(struct tx_ring *, struct ifnet *); #else /* ! IXGBE_LEGACY_TX */ static int ixgbe_mq_start(struct ifnet *, struct mbuf *); static int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); static void ixgbe_qflush(struct ifnet *); static void ixgbe_deferred_mq_start(void *, int); #endif /* IXGBE_LEGACY_TX */ static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); static void ixgbe_init(void *); static void ixgbe_init_locked(struct adapter *); static void ixgbe_stop(void *); static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); static int ixgbe_media_change(struct ifnet *); static void ixgbe_identify_hardware(struct adapter *); static int ixgbe_allocate_pci_resources(struct adapter *); static void ixgbe_get_slot_info(struct ixgbe_hw *); static int ixgbe_allocate_msix(struct adapter *); static int ixgbe_allocate_legacy(struct adapter *); static int ixgbe_allocate_queues(struct adapter *); static int ixgbe_setup_msix(struct adapter *); static void ixgbe_free_pci_resources(struct adapter *); static void ixgbe_local_timer(void *); static int ixgbe_setup_interface(device_t, struct adapter *); static void ixgbe_config_link(struct adapter *); static int ixgbe_allocate_transmit_buffers(struct tx_ring *); static int ixgbe_setup_transmit_structures(struct adapter *); static void ixgbe_setup_transmit_ring(struct tx_ring *); static void ixgbe_initialize_transmit_units(struct adapter *); static void ixgbe_free_transmit_structures(struct adapter *); static void ixgbe_free_transmit_buffers(struct tx_ring *); static int ixgbe_allocate_receive_buffers(struct rx_ring *); static int ixgbe_setup_receive_structures(struct adapter *); static int ixgbe_setup_receive_ring(struct rx_ring *); static void ixgbe_initialize_receive_units(struct adapter *); static void ixgbe_free_receive_structures(struct adapter *); static void ixgbe_free_receive_buffers(struct rx_ring *); static void ixgbe_setup_hw_rsc(struct rx_ring *); static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); static void ixgbe_txeof(struct tx_ring *); static bool ixgbe_rxeof(struct ix_queue *); static void ixgbe_rx_checksum(u32, struct mbuf *, u32); static void ixgbe_set_promisc(struct adapter *); static void ixgbe_set_multi(struct adapter *); static void ixgbe_update_link_status(struct adapter *); static void ixgbe_refresh_mbufs(struct rx_ring *, int); static int ixgbe_xmit(struct tx_ring *, struct mbuf **); static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS); static int ixgbe_dma_malloc(struct adapter *, bus_size_t, struct ixgbe_dma_alloc *, int); static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); static int ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static int ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static void ixgbe_setup_vlan_hw_support(struct adapter *); static void ixgbe_register_vlan(void *, struct ifnet *, u16); static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); static void ixgbe_add_hw_stats(struct adapter *adapter); static __inline void ixgbe_rx_discard(struct rx_ring *, int); static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); static void ixgbe_enable_rx_drop(struct adapter *); static void ixgbe_disable_rx_drop(struct adapter *); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); static void ixgbe_setup_optics(struct adapter *); /* Legacy (single vector interrupt handler */ static void ixgbe_legacy_irq(void *); /* The MSI/X Interrupt handlers */ static void ixgbe_msix_que(void *); static void ixgbe_msix_link(void *); /* Deferred interrupt tasklets */ static void ixgbe_handle_que(void *, int); static void ixgbe_handle_link(void *, int); static void ixgbe_handle_msf(void *, int); static void ixgbe_handle_mod(void *, int); #ifdef IXGBE_FDIR static void ixgbe_atr(struct tx_ring *, struct mbuf *); static void ixgbe_reinit_fdir(void *, int); #endif /* Missing shared code prototype */ extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixgbe_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixgbe_probe), DEVMETHOD(device_attach, ixgbe_attach), DEVMETHOD(device_detach, ixgbe_detach), DEVMETHOD(device_shutdown, ixgbe_shutdown), DEVMETHOD_END }; static driver_t ixgbe_driver = { "ix", ixgbe_methods, sizeof(struct adapter), }; devclass_t ixgbe_devclass; DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0); MODULE_DEPEND(ixgbe, pci, 1, 1, 1); MODULE_DEPEND(ixgbe, ether, 1, 1, 1); /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, "IXGBE driver parameters"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int ixgbe_enable_aim = TRUE; TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim); SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0, "Enable adaptive interrupt moderation"); static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time," "-1 means unlimited"); /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, &ixgbe_tx_process_limit, 0, "Maximum number of sent packets to process at a time," "-1 means unlimited"); /* ** Smart speed setting, default to on ** this only works as a compile option ** right now as its during attach, set ** this to 'ixgbe_smart_speed_off' to ** disable. */ static int ixgbe_smart_speed = ixgbe_smart_speed_on; /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix); SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); /* * Number of Queues, can be set to 0, * it then autoconfigures based on the * number of cpus with a max of 8. This * can be overriden manually here. */ static int ixgbe_num_queues = 0; TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues); SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Number of TX descriptors per ring, ** setting higher than RX as this seems ** the better performing choice. */ static int ixgbe_txd = PERFORM_TXD; TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd); SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, "Number of transmit descriptors per queue"); /* Number of RX descriptors per ring */ static int ixgbe_rxd = PERFORM_RXD; TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd); SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, "Number of receive descriptors per queue"); /* ** Defining this on will allow the use ** of unsupported SFP+ modules, note that ** doing so you are on your own :) */ static int allow_unsupported_sfp = FALSE; TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp); /* ** HW RSC control: ** this feature only works with ** IPv4, and only on 82599 and later. ** Also this will cause IP forwarding to ** fail and that can't be controlled by ** the stack as LRO can. For all these ** reasons I've deemed it best to leave ** this off and not bother with a tuneable ** interface, this would need to be compiled ** to enable. */ static bool ixgbe_rsc_enable = FALSE; /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; #ifdef IXGBE_FDIR /* ** For Flow Director: this is the ** number of TX packets we sample ** for the filter pool, this means ** every 20th packet will be probed. ** ** This feature can be disabled by ** setting this to 0. */ static int atr_sample_rate = 20; /* ** Flow Director actually 'steals' ** part of the packet buffer as its ** filter pool, this variable controls ** how much it uses: ** 0 = 64K, 1 = 128K, 2 = 256K */ static int fdir_pballoc = 1; #endif #ifdef DEV_NETMAP /* * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to * be a reference on how to implement netmap support in a driver. * Additional comments are in ixgbe_netmap.h . * * contains functions for netmap support * that extend the standard driver. */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * ixgbe_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixgbe_probe(device_t dev) { ixgbe_vendor_info_t *ent; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; char adapter_name[256]; INIT_DEBUGOUT("ixgbe_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixgbe_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", ixgbe_strings[ent->index], ixgbe_driver_version); device_set_desc_copy(dev, adapter_name); ++ixgbe_total_ports; return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_attach(device_t dev) { struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; u16 csum; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, + OID_AUTO, "enable_aim", CTLFLAG_RW, &ixgbe_enable_aim, 1, "Interrupt Moderation"); /* ** Allow a kind of speed control by forcing the autoneg ** advertised speed list to only a certain value, this ** supports 1G on 82599 devices, and 100Mb on x540. */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_set_advertise, "I", "Link Speed"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_set_thermal_test, "I", "Thermal Test"); /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware revision */ ixgbe_identify_hardware(adapter); /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Do descriptor calc and sanity checks */ if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { device_printf(dev, "TXD config issue, using default!\n"); adapter->num_tx_desc = DEFAULT_TXD; } else adapter->num_tx_desc = ixgbe_txd; /* ** With many RX rings it is easy to exceed the ** system mbuf allocation. Tuning nmbclusters ** can alleviate this. */ if (nmbclusters > 0 ) { int s; s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; if (s > nmbclusters) { device_printf(dev, "RX Descriptors exceed " "system mbuf max, using default instead!\n"); ixgbe_rxd = DEFAULT_RXD; } } if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) { device_printf(dev, "RXD config issue, using default!\n"); adapter->num_rx_desc = DEFAULT_RXD; } else adapter->num_rx_desc = ixgbe_rxd; /* Allocate our TX/RX Queues */ if (ixgbe_allocate_queues(adapter)) { error = ENOMEM; goto err_out; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Initialize the shared code */ hw->allow_unsupported_sfp = allow_unsupported_sfp; error = ixgbe_init_shared_code(hw); if (error == IXGBE_ERR_SFP_NOT_PRESENT) { /* ** No optics in this port, set up ** so the timer routine will probe ** for later insertion. */ adapter->sfp_probe = TRUE; error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev,"Unsupported SFP+ module detected!\n"); error = EIO; goto err_late; } else if (error) { device_printf(dev,"Unable to initialize the shared code\n"); error = EIO; goto err_late; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { device_printf(dev,"The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } error = ixgbe_init_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: device_printf(dev, "This device is a pre-production adapter/" "LOM. Please be aware there may be issues associated " "with your hardware.\n If you are experiencing problems " "please contact your Intel or hardware representative " "who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev,"Unsupported SFP+ Module\n"); error = EIO; goto err_late; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev,"No SFP+ Module found\n"); /* falls thru */ default: break; } /* Detect and set physical type */ ixgbe_setup_optics(adapter); if ((adapter->msix > 1) && (ixgbe_enable_msix)) error = ixgbe_allocate_msix(adapter); else error = ixgbe_allocate_legacy(adapter); if (error) goto err_late; /* Setup OS specific network interface */ if (ixgbe_setup_interface(dev, adapter) != 0) goto err_late; /* Initialize statistics */ ixgbe_update_stats_counters(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); /* ** Check PCIE slot type/speed/width */ ixgbe_get_slot_info(hw); /* Set an initial default flow control value */ adapter->fc = ixgbe_fc_full; /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_add_hw_stats(adapter); #ifdef DEV_NETMAP ixgbe_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixgbe_attach: end"); return (0); err_late: ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); err_out: if (adapter->ifp != NULL) if_free(adapter->ifp); ixgbe_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } IXGBE_CORE_LOCK(adapter); ixgbe_stop(adapter); IXGBE_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if (que->tq) { #ifndef IXGBE_LEGACY_TX taskqueue_drain(que->tq, &txr->txq_task); #endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } /* Drain the Link queue */ if (adapter->tq) { taskqueue_drain(adapter->tq, &adapter->link_task); taskqueue_drain(adapter->tq, &adapter->mod_task); taskqueue_drain(adapter->tq, &adapter->msf_task); #ifdef IXGBE_FDIR taskqueue_drain(adapter->tq, &adapter->fdir_task); #endif taskqueue_free(adapter->tq); } /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ ixgbe_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); free(adapter->mta, M_DEVBUF); IXGBE_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixgbe_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); IXGBE_CORE_LOCK(adapter); ixgbe_stop(adapter); IXGBE_CORE_UNLOCK(adapter); return (0); } #ifdef IXGBE_LEGACY_TX /********************************************************************* * Transmit entry point * * ixgbe_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) { struct mbuf *m_head; struct adapter *adapter = txr->adapter; IXGBE_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; if (!adapter->link_active) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (ixgbe_xmit(txr, &m_head)) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_time = ticks; txr->queue_status = IXGBE_QUEUE_WORKING; } return; } /* * Legacy TX start - called by the stack, this * always uses the first tx ring, and should * not be used with multiqueue tx enabled. */ static void ixgbe_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_TX_LOCK(txr); ixgbe_start_locked(txr, ifp); IXGBE_TX_UNLOCK(txr); } return; } #else /* ! IXGBE_LEGACY_TX */ /* ** Multiqueue Transmit driver ** */ static int ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct ix_queue *que; struct tx_ring *txr; int i, err = 0; /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); if (IXGBE_TX_TRYLOCK(txr)) { err = ixgbe_mq_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &txr->txq_task); return (err); } static int ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int enqueued = 0, err = 0; if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) return (ENETDOWN); /* Process the queue */ #if __FreeBSD_version < 901504 next = drbr_dequeue(ifp, txr->br); while (next != NULL) { if ((err = ixgbe_xmit(txr, &next)) != 0) { if (next != NULL) err = drbr_enqueue(ifp, txr->br, next); #else while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = ixgbe_xmit(txr, &next)) != 0) { if (next == NULL) { drbr_advance(ifp, txr->br); } else { drbr_putback(ifp, txr->br, next); } #endif break; } #if __FreeBSD_version >= 901504 drbr_advance(ifp, txr->br); #endif enqueued++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; #if __FreeBSD_version < 901504 next = drbr_dequeue(ifp, txr->br); #endif } if (enqueued > 0) { /* Set watchdog on */ txr->queue_status = IXGBE_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) ixgbe_txeof(txr); return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ static void ixgbe_deferred_mq_start(void *arg, int pending) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; IXGBE_TX_LOCK(txr); if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); } /* ** Flush all ring buffers */ static void ixgbe_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IXGBE_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IXGBE_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* IXGBE_LEGACY_TX */ /********************************************************************* * Ioctl entry point * * ixgbe_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ixgbe_hw *hw = &adapter->hw; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixgbe_init(adapter); if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else error = ether_ioctl(ifp, command, data); #endif break; case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) { error = EINVAL; } else { IXGBE_CORE_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXGBE_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixgbe_set_promisc(adapter); } } else ixgbe_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_stop(adapter); adapter->if_flags = ifp->if_flags; IXGBE_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_disable_intr(adapter); ixgbe_set_multi(adapter); ixgbe_enable_intr(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } VLAN_CAPABILITIES(ifp); break; } case SIOCGI2C: { struct ixgbe_i2c_req i2c; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (error) break; if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){ error = EINVAL; break; } hw->phy.ops.read_i2c_byte(hw, i2c.offset, i2c.dev_addr, i2c.data); error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ #define IXGBE_MHADD_MFS_SHIFT 16 static void ixgbe_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; u32 k, txdctl, mhadd, gpie; u32 rxdctl, rxctrl; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_init_locked: begin"); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); hw->addr_ctrl.rar_used_count = 1; /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 if (hw->mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_SCTP; #endif } /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { device_printf(dev,"Could not setup transmit structures\n"); ixgbe_stop(adapter); return; } ixgbe_init_hw(hw); ixgbe_initialize_transmit_units(adapter); /* Setup Multicast table */ ixgbe_set_multi(adapter); /* ** Determine the correct mbuf pool ** for doing jumbo frames */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else if (adapter->max_frame_size <= 9216) adapter->rx_mbuf_sz = MJUM9BYTES; else adapter->rx_mbuf_sz = MJUM16BYTES; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(adapter)) { device_printf(dev,"Could not setup receive structures\n"); ixgbe_stop(adapter); return; } /* Configure RX settings */ ixgbe_initialize_receive_units(adapter); gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE); /* Enable Fan Failure Interrupt */ gpie |= IXGBE_SDP1_GPIEN; /* Add for Module detection */ if (hw->mac.type == ixgbe_mac_82599EB) gpie |= IXGBE_SDP2_GPIEN; /* Thermal Failure Detection */ if (hw->mac.type == ixgbe_mac_X540) gpie |= IXGBE_SDP0_GPIEN; if (adapter->msix > 1) { /* Enable Enhanced MSIX mode */ gpie |= IXGBE_GPIE_MSIX_MODE; gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Now enable all the queues */ for (int i = 0; i < adapter->num_queues; i++) { txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least * HTHRESH (1) buffers ready. The values are taken * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl); } for (int i = 0; i < adapter->num_queues; i++) { rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); if (hw->mac.type == ixgbe_mac_82598EB) { /* ** PTHRESH = 21 ** HTHRESH = 4 ** WTHRESH = 8 */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl); for (k = 0; k < 10; k++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); #ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1); } /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(adapter); /* Enable Receive engine */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); /* Set up MSI/X routing */ if (ixgbe_enable_msix) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); else { IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); } } else { /* Simple settings for Legacy/MSI */ ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 0, 0, 1); IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); } #ifdef IXGBE_FDIR /* Init Flow director */ if (hw->mac.type != ixgbe_mac_82598EB) { u32 hdrm = 32 << fdir_pballoc; hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL); ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc); } #endif /* ** Check on any SFP devices that ** need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { int err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } } /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR); /* Config/Enable Link */ ixgbe_config_link(adapter); /* Hardware Packet Buffer & Flow Control setup */ { u32 rxpb, frame, size, tmp; frame = adapter->max_frame_size; /* Calculate High Water */ if (hw->mac.type == ixgbe_mac_X540) tmp = IXGBE_DV_X540(frame, frame); else tmp = IXGBE_DV(frame, frame); size = IXGBE_BT2KB(tmp); rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; hw->fc.high_water[0] = rxpb - size; /* Now calculate Low Water */ if (hw->mac.type == ixgbe_mac_X540) tmp = IXGBE_LOW_DV_X540(frame); else tmp = IXGBE_LOW_DV(frame); hw->fc.low_water[0] = IXGBE_BT2KB(tmp); hw->fc.requested_mode = adapter->fc; hw->fc.pause_time = IXGBE_FC_PAUSE; hw->fc.send_xon = TRUE; } /* Initialize the FC settings */ ixgbe_start_hw(hw); /* And now turn on interrupts */ ixgbe_enable_intr(adapter); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } static void ixgbe_init(void *arg) { struct adapter *adapter = arg; IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static inline void ixgbe_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } } static inline void ixgbe_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); } } static void ixgbe_handle_que(void *context, int pending) { struct ix_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifndef IXGBE_LEGACY_TX if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #endif IXGBE_TX_UNLOCK(txr); } /* Reenable this interrupt */ if (que->res != NULL) ixgbe_enable_queue(adapter, que->msix); else ixgbe_enable_intr(adapter); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ static void ixgbe_legacy_irq(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; bool more; u32 reg_eicr; reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (reg_eicr == 0) { ixgbe_enable_intr(adapter); return; } more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Check for fan failure */ if ((hw->phy.media_type == ixgbe_media_type_copper) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1); } /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_intr(adapter); return; } /********************************************************************* * * MSIX Queue Interrupt Service routine * **********************************************************************/ void ixgbe_msix_que(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; bool more; u32 newitr = 0; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ixgbe_disable_queue(adapter, que->msix); ++que->irqs; more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ if (ixgbe_enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); if (adapter->hw.mac.type == ixgbe_mac_82598EB) newitr |= newitr << 16; else newitr |= IXGBE_EITR_CNT_WDIS; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_queue(adapter, que->msix); return; } static void ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg_eicr; ++adapter->link_irq; /* First get the cause */ reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ reg_eicr &= ~IXGBE_EICR_RTX_QUEUE; /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); if (adapter->hw.mac.type != ixgbe_mac_82598EB) { #ifdef IXGBE_FDIR if (reg_eicr & IXGBE_EICR_FLOW_DIR) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) return; /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); taskqueue_enqueue(adapter->tq, &adapter->fdir_task); } else #endif if (reg_eicr & IXGBE_EICR_ECC) { device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! " "Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } else if (reg_eicr & IXGBE_EICR_GPI_SDP1) { /* Clear the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) { /* Clear the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2); taskqueue_enqueue(adapter->tq, &adapter->mod_task); } } /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); } /* Check for over temp condition */ if ((hw->mac.type == ixgbe_mac_X540) && (reg_eicr & IXGBE_EICR_TS)) { device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! " "PHY IS SHUT DOWN!!\n"); device_printf(adapter->dev, "System shutdown required\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); } IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("ixgbe_media_status: begin"); IXGBE_CORE_LOCK(adapter); ixgbe_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IXGBE_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= adapter->optics | IFM_FDX; break; } IXGBE_CORE_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixgbe_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("ixgbe_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.phy.autoneg_advertised = IXGBE_LINK_SPEED_100_FULL | IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_10GB_FULL; break; default: device_printf(adapter->dev, "Only auto media type\n"); return (EINVAL); } return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors, allowing the * TX engine to transmit the packets. * - return 0 on success, positive on failure * **********************************************************************/ static int ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; int i, j, error, nsegs; int first; bool remap = TRUE; struct mbuf *m_head; bus_dma_segment_t segs[adapter->num_segs]; bus_dmamap_t map; struct ixgbe_tx_buf *txbuf; union ixgbe_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; map = txbuf->map; /* * Map the packet for DMA. */ retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error)) { struct mbuf *m; switch (error) { case EFBIG: /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; goto retry; } else return (error); case ENOMEM: txr->no_tx_dma_setup++; return (error); default: txr->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); if (__predict_false(error)) { if (error == ENOBUFS) *m_headp = NULL; return (error); } #ifdef IXGBE_FDIR /* Do the flow director magic */ if ((txr->atr_sample) && (!adapter->fdir_reinit)) { ++txr->atr_count; if (txr->atr_count >= atr_sample_rate) { ixgbe_atr(txr, m_head); txr->atr_count = 0; } } #endif i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(txr->txd_cmd | cmd_type_len |seglen); txd->read.olinfo_status = htole32(olinfo_status); if (++i == txr->num_desc) i = 0; } txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = txd; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i); return (0); } static void ixgbe_set_promisc(struct adapter *adapter) { u_int32_t reg_rctl; struct ifnet *ifp = adapter->ifp; int mcnt = 0; reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); reg_rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= IXGBE_FCTRL_MPE; reg_rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); } return; } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ #define IXGBE_RAR_ENTRIES 16 static void ixgbe_set_multi(struct adapter *adapter) { u32 fctrl; u8 *mta; u8 *update_ptr; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (ifp->if_flags & IFF_PROMISC) fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || ifp->if_flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; fctrl &= ~IXGBE_FCTRL_UPE; } else fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { update_ptr = mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } return; } /* * This is an iterator function now needed by the multicast * shared code. It simply feeds the shared code routine the * addresses in the array of ixgbe_set_multi() one by one. */ static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; *update_ptr = newptr; return addr; } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixgbe_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int hung = 0, paused = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); /* Check for pluggable optics */ if (adapter->sfp_probe) if (!ixgbe_sfp_probe(adapter)) goto out; /* Nothing to do */ ixgbe_update_link_status(adapter); ixgbe_update_stats_counters(adapter); /* * If the interface has been paused * then don't do the watchdog check */ if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) paused = 1; /* ** Check the TX queues status ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if ((txr->queue_status == IXGBE_QUEUE_HUNG) && (paused == 0)) ++hung; else if (txr->queue_status == IXGBE_QUEUE_WORKING) taskqueue_enqueue(que->tq, &txr->txq_task); } /* Only truely watchdog if all queues show hung */ if (hung == adapter->num_queues) goto watchdog; out: callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); return; watchdog: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)), IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; ixgbe_init_locked(adapter); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixgbe_update_link_status(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; if (adapter->link_up){ if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev,"Link is up %d Gbps %s \n", ((adapter->link_speed == 128)? 10:1), "Full Duplex"); adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixgbe_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; ifp = adapter->ifp; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_stop: begin\n"); ixgbe_disable_intr(adapter); callout_stop(&adapter->timer); /* Let the stack know...*/ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); if (hw->mac.type == ixgbe_mac_82599EB) ixgbe_stop_mac_link_on_d3_82599(hw); /* Turn off the laser - noop with no optics */ ixgbe_disable_tx_laser(hw); /* Update the stack */ adapter->link_up = FALSE; ixgbe_update_link_status(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixgbe_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* We need this here to set the num_segs below */ ixgbe_set_mac_type(hw); /* Pick up the 82599 and VF settings */ if (hw->mac.type != ixgbe_mac_82598EB) { hw->phy.smart_speed = ixgbe_smart_speed; adapter->num_segs = IXGBE_82599_SCATTER; } else adapter->num_segs = IXGBE_82598_SCATTER; return; } /********************************************************************* * * Determine optic type * **********************************************************************/ static void ixgbe_setup_optics(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int layer; layer = ixgbe_get_supported_physical_layer(hw); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { adapter->optics = IFM_10G_T; return; } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { adapter->optics = IFM_1000_T; return; } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { adapter->optics = IFM_1000_SX; return; } if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { adapter->optics = IFM_10G_LR; return; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { adapter->optics = IFM_10G_SR; return; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { adapter->optics = IFM_10G_TWINAX; return; } if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { adapter->optics = IFM_10G_CX4; return; } /* If we get here just set the default */ adapter->optics = IFM_ETHER | IFM_AUTO; return; } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int ixgbe_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; #ifndef IXGBE_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif int error, rid = 0; /* MSI RID at 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ #ifndef IXGBE_LEGACY_TX TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); #endif TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", device_get_nameunit(adapter->dev)); /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); #ifdef IXGBE_FDIR TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); #endif adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, que, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); taskqueue_free(adapter->tq); que->tq = NULL; adapter->tq = NULL; return (error); } /* For simplicity in the handlers */ adapter->que_mask = IXGBE_EIMS_ENABLE_MASK; return (0); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers * **********************************************************************/ static int ixgbe_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error, rid, vector = 0; for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { rid = vector + 1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register QUE handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; adapter->que_mask |= (u64)(1 << que->msix); /* ** Bind the msix vector, and thus the ** ring to the corresponding cpu. */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); #ifndef IXGBE_LEGACY_TX TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); #endif TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* and Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate" " bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); #ifdef IXGBE_FDIR TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); #endif adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); return (0); } /* * Setup Either MSI/X or MSI */ static int ixgbe_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want, queues, msgs; /* Override by tuneable */ if (ixgbe_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; rid = PCIR_BAR(MSIX_82598_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { rid += 4; /* 82599 maps in higher BAR */ adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; if (ixgbe_num_queues != 0) queues = ixgbe_num_queues; /* Set max queues to 8 when autoconfiguring */ else if ((ixgbe_num_queues == 0) && (queues > 8)) queues = 8; /* reflect correct sysctl value */ ixgbe_num_queues = queues; /* ** Want one vector (RX/TX pair) per queue ** plus an additional for Link. */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rid, adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev,"Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev,"Using a Legacy interrupt\n"); return (0); } static int ixgbe_allocate_pci_resources(struct adapter *adapter) { int rid; device_t dev = adapter->dev; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev,"Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; /* Legacy defaults */ adapter->num_queues = 1; adapter->hw.back = &adapter->osdep; /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ adapter->msix = ixgbe_setup_msix(adapter); return (0); } static void ixgbe_free_pci_resources(struct adapter * adapter) { struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; if (adapter->hw.mac.type == ixgbe_mac_82598EB) memrid = PCIR_BAR(MSIX_82598_BAR); else memrid = PCIR_BAR(MSIX_82599_BAR); /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* ** Release all msix queue resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixgbe_setup_interface(device_t dev, struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp; INIT_DEBUGOUT("ixgbe_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); #if __FreeBSD_version < 1000025 ifp->if_baudrate = 1000000000; #else if_initbaudrate(ifp, IF_Gbps(10)); #endif ifp->if_init = ixgbe_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgbe_ioctl; #ifndef IXGBE_LEGACY_TX ifp->if_transmit = ixgbe_mq_start; ifp->if_qflush = ixgbe_qflush; #else ifp->if_start = ixgbe_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixgbe driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, ixgbe_media_status); ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics); if (hw->device_id == IXGBE_DEV_ID_82598AT) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static void ixgbe_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; sfp = ixgbe_is_sfp(hw); if (sfp) { if (hw->phy.multispeed_fiber) { hw->mac.ops.setup_sfp(hw); ixgbe_enable_tx_laser(hw); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } else taskqueue_enqueue(adapter->tq, &adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) goto out; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) goto out; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } out: return; } /******************************************************************** * Manage DMA'able memory. *******************************************************************/ static void ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } static int ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, struct ixgbe_dma_alloc *dma, int mapflags) { device_t dev = adapter->dev; int r; r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &dma->dma_tag); if (r != 0) { device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " "error %u\n", r); goto fail_0; } r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) { device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " "error %u\n", r); goto fail_1; } r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) { device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " "error %u\n", r); goto fail_2; } dma->dma_size = size; return (0); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); fail_1: bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (r); } static void ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int ixgbe_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize, error = IXGBE_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct ix_queue *) malloc(sizeof(struct ix_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* First allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Next allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } /* For the ring itself */ tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; txr->num_desc = adapter->num_tx_desc; /* Initialize the TX side lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (ixgbe_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #ifndef IXGBE_LEGACY_TX /* Allocate a buf ring */ txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, M_WAITOK, &txr->tx_mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up buf ring\n"); error = ENOMEM; goto err_tx_desc; } #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; /* Set up some basics */ rxr->adapter = adapter; rxr->me = i; rxr->num_desc = adapter->num_rx_desc; /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), rxr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (ixgbe_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) ixgbe_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) ixgbe_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int ixgbe_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct ixgbe_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create( bus_get_dma_tag(adapter->dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IXGBE_TSO_SIZE, /* maxsize */ adapter->num_segs, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ ixgbe_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void ixgbe_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ixgbe_tx_buf *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old ring contents */ IXGBE_TX_LOCK(txr); #ifdef DEV_NETMAP /* * (under lock): if in netmap mode, do some consistency * checks and set slot to entry 0 of the netmap ring. */ slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < txr->num_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * Slots in the netmap ring (indexed by "si") are * kring->nkr_hwofs positions "ahead" wrt the * corresponding slot in the NIC ring. In some drivers * (not here) nkr_hwofs can be negative. Function * netmap_idx_n2k() handles wraparounds properly. */ if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si)); } #endif /* DEV_NETMAP */ /* Clear the EOP descriptor pointer */ txbuf->eop = NULL; } #ifdef IXGBE_FDIR /* Set the rate at which we sample packets */ if (adapter->hw.mac.type != ixgbe_mac_82598EB) txr->atr_sample = atr_sample_rate; #endif /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IXGBE_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static int ixgbe_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) ixgbe_setup_transmit_ring(txr); return (0); } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void ixgbe_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ixgbe_hw *hw = &adapter->hw; /* Setup the Base and Length of the Tx Descriptor Ring */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 tdba = txr->txdma.dma_paddr; u32 txctrl; IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i), adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0); /* Setup Transmit Descriptor Cmd Settings */ txr->txd_cmd = IXGBE_TXD_CMD_IFCS; txr->queue_status = IXGBE_QUEUE_IDLE; /* Set the processing limit */ txr->process_limit = ixgbe_tx_process_limit; /* Disable Head Writeback */ switch (hw->mac.type) { case ixgbe_mac_82598EB: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: default: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); break; } txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; switch (hw->mac.type) { case ixgbe_mac_82598EB: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: default: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl); break; } } if (hw->mac.type != ixgbe_mac_82598EB) { u32 dmatxctl, rttdcs; dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); dmatxctl |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); /* Disable arbiter to set MTQC */ rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); rttdcs |= IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } return; } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void ixgbe_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IXGBE_TX_LOCK(txr); ixgbe_free_transmit_buffers(txr); ixgbe_dma_free(adapter, &txr->txdma); IXGBE_TX_UNLOCK(txr); IXGBE_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void ixgbe_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ixgbe_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #ifdef IXGBE_LEGACY_TX if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct ixgbe_adv_tx_context_desc *TXD; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) /* ... no offload to do */ return (0); /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(0); /* We've consumed the first desc, adjust counters */ if (++ctxd == txr->num_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (0); } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct ixgbe_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; u16 vtag = 0, eh_type; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; eh_type = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; eh_type = eh->evl_encap_proto; } switch (ntohs(eh_type)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; break; #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(eh_type)); break; } ctxd = txr->next_avail_desc; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; tcp_hlen = th->th_off << 2; /* This is used in the transmit desc in encap */ paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); if (++ctxd == txr->num_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; ++txr->tso_tx; return (0); } #ifdef IXGBE_FDIR /* ** This routine parses packet headers so that Flow ** Director can make a hashed filter table entry ** allowing traffic flows to be identified and kept ** on the same cpu. This would be a performance ** hit, but we only do it at IXGBE_FDIR_RATE of ** packets. */ static void ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) { struct adapter *adapter = txr->adapter; struct ix_queue *que; struct ip *ip; struct tcphdr *th; struct udphdr *uh; struct ether_vlan_header *eh; union ixgbe_atr_hash_dword input = {.dword = 0}; union ixgbe_atr_hash_dword common = {.dword = 0}; int ehdrlen, ip_hlen; u16 etype; eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; etype = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; etype = eh->evl_encap_proto; } /* Only handling IPv4 */ if (etype != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; /* check if we're UDP or TCP */ switch (ip->ip_p) { case IPPROTO_TCP: th = (struct tcphdr *)((caddr_t)ip + ip_hlen); /* src and dst are inverted */ common.port.dst ^= th->th_sport; common.port.src ^= th->th_dport; input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; break; case IPPROTO_UDP: uh = (struct udphdr *)((caddr_t)ip + ip_hlen); /* src and dst are inverted */ common.port.dst ^= uh->uh_sport; common.port.src ^= uh->uh_dport; input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; break; default: return; } input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); if (mp->m_pkthdr.ether_vtag) common.flex_bytes ^= htons(ETHERTYPE_VLAN); else common.flex_bytes ^= etype; common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; que = &adapter->queues[txr->me]; /* ** This assumes the Rx queue and Tx ** queue are bound to the same CPU */ ixgbe_fdir_add_signature_filter_82599(&adapter->hw, input, common, que->msix); } #endif /* IXGBE_FDIR */ /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void ixgbe_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; u32 work, processed = 0; u16 limit = txr->process_limit; struct ixgbe_tx_buf *buf; union ixgbe_adv_tx_desc *txd; mtx_assert(&txr->tx_mtx, MA_OWNED); #ifdef DEV_NETMAP if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = &na->tx_rings[txr->me]; txd = txr->tx_base; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); /* * In netmap mode, all the work is done in the context * of the client thread. Interrupt handlers only wake up * clients, which may be sleeping on individual rings * or on a global resource for all rings. * To implement tx interrupt mitigation, we wake up the client * thread roughly every half ring, even if the NIC interrupts * more frequently. This is implemented as follows: * - ixgbe_txsync() sets kring->nr_kflags with the index of * the slot that should wake up the thread (nkr_num_slots * means the user thread should not be woken up); * - the driver ignores tx interrupts unless netmap_mitigate=0 * or the slot has the DD bit set. */ if (!netmap_mitigate || (kring->nr_kflags < kring->nkr_num_slots && txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { netmap_tx_irq(ifp, txr->me); } return; } #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { txr->queue_status = IXGBE_QUEUE_IDLE; return; } /* Get work starting point */ work = txr->next_to_clean; buf = &txr->tx_buffers[work]; txd = &txr->tx_base[work]; work -= txr->num_desc; /* The distance to ring end */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); do { union ixgbe_adv_tx_desc *eop= buf->eop; if (eop == NULL) /* No work */ break; if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; buf->map = NULL; } buf->eop = NULL; ++txr->tx_avail; /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; ++work; /* wrap the ring? */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; buf->map = NULL; } ++txr->tx_avail; buf->eop = NULL; } ++txr->packets; ++processed; ++ifp->if_opackets; txr->watchdog_time = ticks; /* Try the next packet */ ++txd; ++buf; ++work; /* reset with a wrap */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); work += txr->num_desc; txr->next_to_clean = work; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. */ if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG)) txr->queue_status = IXGBE_QUEUE_HUNG; if (txr->tx_avail == txr->num_desc) txr->queue_status = IXGBE_QUEUE_IDLE; return; } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t seg[1]; struct ixgbe_rx_buf *rxbuf; struct mbuf *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* Control the loop with one beyond */ if (++j == rxr->num_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->buf == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rxr->mbuf_sz); if (mp == NULL) goto update; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); } else mp = rxbuf->buf; mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; /* If we're dealing with an mbuf that was copied rather * than replaced, there's no need to go through busdma. */ if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->buf = NULL; goto update; } rxbuf->buf = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxbuf->addr = rxr->rx_base[i].read.pkt_addr = htole64(seg[0].ds_addr); } else { rxr->rx_base[i].read.pkt_addr = rxbuf->addr; rxbuf->flags &= ~IXGBE_RX_COPY; } refreshed = TRUE; /* Next is precalculated */ i = j; rxr->next_to_refresh = i; if (++j == rxr->num_desc) j = 0; } update: if (refreshed) /* Update hardware tail index */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int ixgbe_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct ixgbe_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; if (!(rxr->rx_buffers = (struct ixgbe_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } for (i = 0; i < rxr->num_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->ptag, BUS_DMA_NOWAIT, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX dma map\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ ixgbe_free_receive_structures(adapter); return (error); } /* ** Used to detect a descriptor that has ** been merged by Hardware RSC. */ static inline u32 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) { return (le32toh(rx->wb.lower.lo_dword.data) & IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; } /********************************************************************* * * Initialize Hardware RSC (LRO) feature on 82599 * for an RX ring, this is toggled by the LRO capability * even though it is transparent to the stack. * * NOTE: since this HW feature only works with IPV4 and * our testing has shown soft LRO to be as effective * I have decided to disable this by default. * **********************************************************************/ static void ixgbe_setup_hw_rsc(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct ixgbe_hw *hw = &adapter->hw; u32 rscctrl, rdrxctl; /* If turning LRO/RSC off we need to disable it */ if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); rscctrl &= ~IXGBE_RSCCTL_RSCEN; return; } rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) #endif /* DEV_NETMAP */ rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; rdrxctl |= IXGBE_RDRXCTL_RSCACKC; IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); rscctrl |= IXGBE_RSCCTL_RSCEN; /* ** Limit the total number of descriptors that ** can be combined, so it does not exceed 64K */ if (rxr->mbuf_sz == MCLBYTES) rscctrl |= IXGBE_RSCCTL_MAXDESC_16; else if (rxr->mbuf_sz == MJUMPAGESIZE) rscctrl |= IXGBE_RSCCTL_MAXDESC_8; else if (rxr->mbuf_sz == MJUM9BYTES) rscctrl |= IXGBE_RSCCTL_MAXDESC_4; else /* Using 16K cluster */ rscctrl |= IXGBE_RSCCTL_MAXDESC_1; IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); /* Enable TCP header recognition */ IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR)); /* Disable RSC for ACK packets */ IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); rxr->hw_rsc = TRUE; } static void ixgbe_free_receive_ring(struct rx_ring *rxr) { struct ixgbe_rx_buf *rxbuf; int i; for (i = 0; i < rxr->num_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->buf != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->buf->m_flags |= M_PKTHDR; m_freem(rxbuf->buf); rxbuf->buf = NULL; rxbuf->flags = 0; } } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int ixgbe_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct ixgbe_rx_buf *rxbuf; bus_dma_segment_t seg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(rxr->adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ adapter = rxr->adapter; ifp = adapter->ifp; dev = adapter->dev; /* Clear the ring contents */ IXGBE_RX_LOCK(rxr); #ifdef DEV_NETMAP /* same as in ixgbe_setup_transmit_ring() */ slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* Cache the size */ rxr->mbuf_sz = adapter->rx_mbuf_sz; /* Free current RX buffer structs and their mbufs */ ixgbe_free_receive_ring(rxr); /* Now replenish the mbufs */ for (int j = 0; j != rxr->num_desc; ++j) { struct mbuf *mp; rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP /* * In netmap mode, fill the map and set the buffer * address in the NIC ring, considering the offset * between the netmap and NIC rings (see comment in * ixgbe_setup_transmit_ring() ). No need to allocate * an mbuf, so end the block with a continue; */ if (slot) { int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(slot + sj, &paddr); netmap_load_map(rxr->ptag, rxbuf->pmap, addr); /* Update descriptor and the cached value */ rxr->rx_base[j].read.pkt_addr = htole64(paddr); rxbuf->addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ rxbuf->flags = 0; rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->buf == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->buf; mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update the descriptor and the cached value */ rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); rxbuf->addr = htole64(seg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; rxr->rx_copies = 0; rxr->rx_bytes = 0; rxr->discard = FALSE; rxr->vtag_strip = FALSE; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface: */ if (ixgbe_rsc_enable) ixgbe_setup_hw_rsc(rxr); else if (ifp->if_capenable & IFCAP_LRO) { int err = tcp_lro_init(lro); if (err) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX Soft LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IXGBE_RX_UNLOCK(rxr); return (0); fail: ixgbe_free_receive_ring(rxr); IXGBE_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int ixgbe_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int j; for (j = 0; j < adapter->num_queues; j++, rxr++) if (ixgbe_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'j' failed, so its the terminus. */ for (int i = 0; i < j; ++i) { rxr = &adapter->rx_rings[i]; ixgbe_free_receive_ring(rxr); } return (ENOBUFS); } /********************************************************************* * * Setup receive registers and features. * **********************************************************************/ #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 #define BSIZEPKT_ROUNDUP ((1<rx_rings; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 bufsz, rxctrl, fctrl, srrctl, rxcsum; u32 reta, mrqc = 0, hlreg, random[10]; /* * Make sure receives are disabled while * setting up the descriptor ring */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN); /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; #ifdef DEV_NETMAP /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; else hlreg |= IXGBE_HLREG0_RXCRCSTRP; #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 rdba = rxr->rxdma.dma_paddr; /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i), adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; srrctl |= bufsz; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0); /* Set the processing limit */ rxr->process_limit = ixgbe_rx_process_limit; } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); /* Setup RSS */ if (adapter->num_queues > 1) { int i, j; reta = 0; /* set up random bits */ arc4rand(&random, sizeof(random), 0); /* Set up the redirection table */ for (i = 0, j = 0; i < 128; i++, j++) { if (j == adapter->num_queues) j = 0; reta = (reta << 8) | (j * 0x11); if ((i & 3) == 3) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); } /* Now fill our hash function seeds */ for (int i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]); /* Perform hash on these packet types */ mrqc = IXGBE_MRQC_RSSEN | IXGBE_MRQC_RSS_FIELD_IPV4 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP | IXGBE_MRQC_RSS_FIELD_IPV4_UDP | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | IXGBE_MRQC_RSS_FIELD_IPV6_EX | IXGBE_MRQC_RSS_FIELD_IPV6 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP | IXGBE_MRQC_RSS_FIELD_IPV6_UDP | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); return; } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void ixgbe_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; ixgbe_free_receive_buffers(rxr); /* Free LRO memory */ tcp_lro_free(lro); /* Free the ring memory as well */ ixgbe_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void ixgbe_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct ixgbe_rx_buf *rxbuf; INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->buf != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->buf->m_flags |= M_PKTHDR; m_freem(rxbuf->buf); } rxbuf->buf = NULL; if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } return; } static __inline void ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IP/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. In case of IPv6 we do not yet support ext. hdrs. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IXGBE_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IXGBE_RX_LOCK(rxr); } static __inline void ixgbe_rx_discard(struct rx_ring *rxr, int i) { struct ixgbe_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; if (rbuf->fmp != NULL) {/* Partial chain ? */ rbuf->fmp->m_flags |= M_PKTHDR; m_freem(rbuf->fmp); rbuf->fmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->buf) { m_free(rbuf->buf); rbuf->buf = NULL; } rbuf->flags = 0; return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE for more work, FALSE for all clean. *********************************************************************/ static bool ixgbe_rxeof(struct ix_queue *que) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, nextp, processed = 0; u32 staterr = 0; u16 count = rxr->process_limit; union ixgbe_adv_rx_desc *cur; struct ixgbe_rx_buf *rbuf, *nbuf; IXGBE_RX_LOCK(rxr); #ifdef DEV_NETMAP /* Same as the txeof routine: wakeup clients on intr. */ if (netmap_rx_irq(ifp, rxr->me, &processed)) { IXGBE_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mp; u32 rsc, ptype; u16 len; u16 vtag = 0; bool eop; /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = NULL; nbuf = NULL; rsc = 0; cur->wb.upper.status_error = 0; rbuf = &rxr->rx_buffers[i]; mp = rbuf->buf; len = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IXGBE_RXDADV_PKTTYPE_MASK; eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); /* Make sure bad packets are discarded */ if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) || (rxr->discard)) { rxr->rx_discarded++; if (eop) rxr->discard = FALSE; else rxr->discard = TRUE; ixgbe_rx_discard(rxr, i); goto next_desc; } /* ** On 82599 which supports a hardware ** LRO (called HW RSC), packets need ** not be fragmented across sequential ** descriptors, rather the next descriptor ** is indicated in bits of the descriptor. ** This also means that we might proceses ** more than one packet at a time, something ** that has never been true before, it ** required eliminating global chain pointers ** in favor of what we are doing here. -jfv */ if (!eop) { /* ** Figure out the next descriptor ** of this frame. */ if (rxr->hw_rsc == TRUE) { rsc = ixgbe_rsc_count(cur); rxr->rsc_num += (rsc - 1); } if (rsc) { /* Get hardware index */ nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >> IXGBE_RXDADV_NEXTP_SHIFT); } else { /* Just sequential */ nextp = i + 1; if (nextp == adapter->num_rx_desc) nextp = 0; } nbuf = &rxr->rx_buffers[nextp]; prefetch(nbuf); } /* ** Rather than using the fmp/lmp global pointers ** we now keep the head of a packet chain in the ** buffer struct and pass this along from one ** descriptor to the next, until we get EOP. */ mp->m_len = len; /* ** See if there is a stored head ** that determines what we are */ sendmp = rbuf->fmp; if (sendmp != NULL) { /* secondary frag */ rbuf->buf = rbuf->fmp = NULL; mp->m_flags &= ~M_PKTHDR; sendmp->m_pkthdr.len += mp->m_len; } else { /* * Optimize. This might be a small packet, * maybe just a TCP ACK. Do a fast copy that * is cache aligned into a new mbuf, and * leave the old mbuf+cluster for re-use. */ if (eop && len <= IXGBE_RX_COPY_LEN) { sendmp = m_gethdr(M_NOWAIT, MT_DATA); if (sendmp != NULL) { sendmp->m_data += IXGBE_RX_COPY_ALIGN; ixgbe_bcopy(mp->m_data, sendmp->m_data, len); sendmp->m_len = len; rxr->rx_copies++; rbuf->flags |= IXGBE_RX_COPY; } } if (sendmp == NULL) { rbuf->buf = rbuf->fmp = NULL; sendmp = mp; } /* first desc of a non-ps chain */ sendmp->m_flags |= M_PKTHDR; sendmp->m_pkthdr.len = mp->m_len; } ++processed; /* Pass the head pointer on */ if (eop == 0) { nbuf->fmp = sendmp; sendmp = NULL; mp->m_next = nbuf->buf; } else { /* Sending this frame */ sendmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; rxr->rx_packets++; /* capture data for AIM */ rxr->bytes += sendmp->m_pkthdr.len; rxr->rx_bytes += sendmp->m_pkthdr.len; /* Process vlan info */ if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) vtag = le16toh(cur->wb.upper.vlan); if (vtag) { sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) ixgbe_rx_checksum(staterr, sendmp, ptype); #if __FreeBSD_version >= 800000 sendmp->m_pkthdr.flowid = que->msix; sendmp->m_flags |= M_FLOWID; #endif } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == rxr->num_desc) i = 0; /* Now send to the stack or do LRO */ if (sendmp != NULL) { rxr->next_to_check = i; ixgbe_rx_input(rxr, ifp, sendmp, ptype); i = rxr->next_to_check; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { ixgbe_refresh_mbufs(rxr, i); processed = 0; } } /* Refresh any remaining buf structs */ if (ixgbe_rx_unrefreshed(rxr)) ixgbe_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } IXGBE_RX_UNLOCK(rxr); /* ** Still have cleaning to do? */ if ((staterr & IXGBE_RXD_STAT_DD) != 0) return (TRUE); else return (FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) { u16 status = (u16) staterr; u8 errors = (u8) (staterr >> 24); bool sctp = FALSE; if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) sctp = TRUE; if (status & IXGBE_RXD_STAT_IPCS) { if (!(errors & IXGBE_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & IXGBE_RXD_STAT_L4CS) { u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) type = CSUM_SCTP_VALID; #endif if (!(errors & IXGBE_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (!sctp) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } static void ixgbe_setup_vlan_hw_support(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 ctrl; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IXGBE_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), adapter->shadow_vfta[i]); ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* Enable the Filter Table if enabled */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { ctrl &= ~IXGBE_VLNCTRL_CFIEN; ctrl |= IXGBE_VLNCTRL_VFE; } if (hw->mac.type == ixgbe_mac_82598EB) ctrl |= IXGBE_VLNCTRL_VME; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); /* Setup the queues for vlans */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); } rxr->vtag_strip = TRUE; } } static void ixgbe_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); /* Enable Fan Failure detection */ if (hw->device_id == IXGBE_DEV_ID_82598AT) mask |= IXGBE_EIMS_GPI_SDP1; switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: mask |= IXGBE_EIMS_ECC; mask |= IXGBE_EIMS_GPI_SDP0; mask |= IXGBE_EIMS_GPI_SDP1; mask |= IXGBE_EIMS_GPI_SDP2; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif break; case ixgbe_mac_X540: mask |= IXGBE_EIMS_ECC; /* Detect if Thermal Sensor is enabled */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) mask |= IXGBE_EIMS_TS; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif /* falls through */ default: break; } IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With RSS we use auto clear */ if (adapter->msix_mem) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } /* ** Now enable all queues, this is done separately to ** allow for handling the extended (beyond 32) MSIX ** vectors that can be used by 82599 */ for (int i = 0; i < adapter->num_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); return; } static void ixgbe_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); } else { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } IXGBE_WRITE_FLUSH(&adapter->hw); return; } u16 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) { u16 value; value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, reg, 2); return (value); } void ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) { pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, reg, value, 2); return; } /* ** Get the width and transaction speed of ** the slot this adapter is plugged into. */ static void ixgbe_get_slot_info(struct ixgbe_hw *hw) { device_t dev = ((struct ixgbe_osdep *)hw->back)->dev; struct ixgbe_mac_info *mac = &hw->mac; u16 link; u32 offset; /* For most devices simply call the shared code routine */ if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { ixgbe_get_bus_info(hw); goto display; } /* ** For the Quad port adapter we need to parse back ** up the PCI tree to find the speed of the expansion ** slot into which this adapter is plugged. A bit more work. */ dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif /* Now get the PCI Express Capabilities offset */ pci_find_cap(dev, PCIY_EXPRESS, &offset); /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); switch (link & IXGBE_PCI_LINK_WIDTH) { case IXGBE_PCI_LINK_WIDTH_1: hw->bus.width = ixgbe_bus_width_pcie_x1; break; case IXGBE_PCI_LINK_WIDTH_2: hw->bus.width = ixgbe_bus_width_pcie_x2; break; case IXGBE_PCI_LINK_WIDTH_4: hw->bus.width = ixgbe_bus_width_pcie_x4; break; case IXGBE_PCI_LINK_WIDTH_8: hw->bus.width = ixgbe_bus_width_pcie_x8; break; default: hw->bus.width = ixgbe_bus_width_unknown; break; } switch (link & IXGBE_PCI_LINK_SPEED) { case IXGBE_PCI_LINK_SPEED_2500: hw->bus.speed = ixgbe_bus_speed_2500; break; case IXGBE_PCI_LINK_SPEED_5000: hw->bus.speed = ixgbe_bus_speed_5000; break; case IXGBE_PCI_LINK_SPEED_8000: hw->bus.speed = ixgbe_bus_speed_8000; break; default: hw->bus.speed = ixgbe_bus_speed_unknown; break; } mac->ops.set_lan_id(hw); display: device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && (hw->bus.speed == ixgbe_bus_speed_2500))) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " "PCIE, or x4 PCIE Gen2 slot is required.\n"); } if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && (hw->bus.speed < ixgbe_bus_speed_8000))) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " "PCIE Gen3 slot is required.\n"); } return; } /* ** Setup the correct IVAR register for a particular MSIX interrupt ** (yes this is all very magic and confusing :) ** - entry is the register array entry ** - vector is the MSIX vector for this queue ** - type is RX/TX/MISC */ static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; else entry += (type * 64); index = (entry >> 2) & 0x1F; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); ivar &= ~(0xFF << (8 * (entry & 0x3))); ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } default: break; } } static void ixgbe_configure_ivars(struct adapter *adapter) { struct ix_queue *que = adapter->queues; u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else newitr = 0; for (int i = 0; i < adapter->num_queues; i++, que++) { /* First the RX queue entry */ ixgbe_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ ixgbe_set_ivar(adapter, i, que->msix, 1); /* Set an Initial EITR value */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr); } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1); } /* ** ixgbe_sfp_probe - called in the local timer to ** determine if a port had optics inserted. */ static bool ixgbe_sfp_probe(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { s32 ret = hw->phy.ops.identify_sfp(hw); if (ret) goto out; ret = hw->phy.ops.reset(hw); if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev,"Unsupported SFP+ module detected!"); printf(" Reload driver with supported module.\n"); adapter->sfp_probe = FALSE; goto out; } else device_printf(dev,"SFP+ module detected!\n"); /* We now have supported optics */ adapter->sfp_probe = FALSE; /* Set the optics type so system reports correctly */ ixgbe_setup_optics(adapter); result = TRUE; } out: return (result); } /* ** Tasklet handler for MSIX Link interrupts ** - do outside interrupt since it might sleep */ static void ixgbe_handle_link(void *context, int pending) { struct adapter *adapter = context; ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, 0); ixgbe_update_link_status(adapter); } /* ** Tasklet for handling SFP module interrupts */ static void ixgbe_handle_mod(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; u32 err; err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); return; } taskqueue_enqueue(adapter->tq, &adapter->msf_task); return; } /* ** Tasklet for handling MSF (multispeed fiber) interrupts */ static void ixgbe_handle_msf(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, TRUE); return; } #ifdef IXGBE_FDIR /* ** Tasklet for reinitializing the Flow Director filter table */ static void ixgbe_reinit_fdir(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; if (adapter->fdir_reinit != 1) /* Shouldn't happen */ return; ixgbe_reinit_fdir_tables_82599(&adapter->hw); adapter->fdir_reinit = 0; /* re-enable flow director interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); /* Restart the interface */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } #endif /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void ixgbe_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; u32 missed_rx = 0, bprc, lxon, lxoff, total; u64 total_missed_rx = 0; adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); /* ** Note: these are for the 8 possible traffic classes, ** which in current implementation is unused, ** therefore only 0 should read real data. */ for (int i = 0; i < 8; i++) { u32 mp; mp = IXGBE_READ_REG(hw, IXGBE_MPC(i)); /* missed_rx tallies misses for the gprc workaround */ missed_rx += mp; /* global total per queue */ adapter->stats.mpc[i] += mp; /* Running comprehensive total for stats display */ total_missed_rx += adapter->stats.mpc[i]; if (hw->mac.type == ixgbe_mac_82598EB) { adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); } else adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); adapter->stats.pxon2offc[i] += IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); } for (int i = 0; i < 16; i++) { adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); /* Hardware workaround, gprc counts missed packets */ adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); adapter->stats.gprc -= missed_rx; if (hw->mac.type != ixgbe_mac_82598EB) { adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); } else { adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); /* 82598 only has a counter in the high register */ adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH); } /* * Workaround: mprc hardware is incorrectly counting * broadcasts, so for now we subtract those. */ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); adapter->stats.bprc += bprc; adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); if (hw->mac.type == ixgbe_mac_82598EB) adapter->stats.mprc -= bprc; adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); adapter->stats.lxontxc += lxon; lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); adapter->stats.lxofftxc += lxoff; total = lxon + lxoff; adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); adapter->stats.gptc -= total; adapter->stats.mptc -= total; adapter->stats.ptc64 -= total; adapter->stats.gotc -= total * ETHER_MIN_LEN; adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC); adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC); adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); /* Only read FCOE on 82599 */ if (hw->mac.type != ixgbe_mac_82598EB) { adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); } /* Fill out the OS statistics structure */ ifp->if_ipackets = adapter->stats.gprc; ifp->if_opackets = adapter->stats.gptc; ifp->if_ibytes = adapter->stats.gorc; ifp->if_obytes = adapter->stats.gotc; ifp->if_imcasts = adapter->stats.mprc; ifp->if_omcasts = adapter->stats.mptc; ifp->if_collisions = 0; /* Rx Errors */ ifp->if_iqdrops = total_missed_rx; ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec; } /** ixgbe_sysctl_tdh_handler - Handler function * Retrieves the TDH value from the hardware */ static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) { int error; struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); if (!txr) return 0; unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_tdt_handler - Handler function * Retrieves the TDT value from the hardware */ static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) { int error; struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); if (!txr) return 0; unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_rdh_handler - Handler function * Retrieves the RDH value from the hardware */ static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) { int error; struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); if (!rxr) return 0; unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_rdt_handler - Handler function * Retrieves the RDT value from the hardware */ static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) { int error; struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); if (!rxr) return 0; unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { int error; struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; reg &= ~0xfff; /* default, no limitation */ ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8 ); } IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return 0; } /* * Add sysctl variables, one per statistic, to the system. */ static void ixgbe_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); for (int i = 0; i < adapter->num_queues; i++, txr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], sizeof(&adapter->queues[i]), ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(adapter->queues[i].irqs), "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txr->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); } for (int i = 0; i < adapter->num_queues; i++, rxr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdt_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get the own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", CTLFLAG_RD, &stats->illerrc, "Illegal Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", CTLFLAG_RD, &stats->errbc, "Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", CTLFLAG_RD, &stats->mspdc, "MAC Short Packets Discarded"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", CTLFLAG_RD, &stats->mlfc, "MAC Local Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", CTLFLAG_RD, &stats->mrfc, "MAC Remote Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); /* Flow Control stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->lxontxc, "Link XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->lxonrxc, "Link XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->lxofftxc, "Link XOFF Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->lxoffrxc, "Link XOFF Received"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", CTLFLAG_RD, &stats->ruc, "Receive Undersized"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", CTLFLAG_RD, &stats->rjc, "Received Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", CTLFLAG_RD, &stats->mngprc, "Management Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", CTLFLAG_RD, &stats->mngptc, "Management Packets Dropped"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", CTLFLAG_RD, &stats->xec, "Checksum Errors"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", CTLFLAG_RD, &stats->mngptc, "Management Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error, last; struct adapter *adapter = (struct adapter *) arg1; last = adapter->fc; error = sysctl_handle_int(oidp, &adapter->fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ if (adapter->fc == last) return (0); switch (adapter->fc) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = adapter->fc; if (adapter->num_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; if (adapter->num_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: adapter->fc = last; return (EINVAL); } /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); return error; } /* ** Control link advertise speed: ** 1 - advertise only 1G ** 2 - advertise 100Mb ** 3 - advertise normal */ static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS) { int error = 0; struct adapter *adapter; device_t dev; struct ixgbe_hw *hw; ixgbe_link_speed speed, last; adapter = (struct adapter *) arg1; dev = adapter->dev; hw = &adapter->hw; last = adapter->advertise; error = sysctl_handle_int(oidp, &adapter->advertise, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (adapter->advertise == last) /* no change */ return (0); if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) return (EINVAL); if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) { device_printf(dev, "Set Advertise: 100Mb on X540 only\n"); return (EINVAL); } if (adapter->advertise == 1) speed = IXGBE_LINK_SPEED_1GB_FULL; else if (adapter->advertise == 2) speed = IXGBE_LINK_SPEED_100_FULL; else if (adapter->advertise == 3) speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_10GB_FULL; else { /* bogus value */ adapter->advertise = last; return (EINVAL); } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); return (error); } /* ** Thermal Shutdown Trigger ** - cause a Thermal Overtemp IRQ ** - this now requires firmware enabling */ static int ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS) { int error, fire = 0; struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; if (hw->mac.type != ixgbe_mac_X540) return (0); error = sysctl_handle_int(oidp, &fire, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (fire) { u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); reg |= IXGBE_EICR_TS; IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); } return (0); } /* ** Enable the hardware to drop packets when the buffer is ** full. This is useful when multiqueue,so that no single ** queue being full stalls the entire RX engine. We only ** enable this when Multiqueue AND when Flow Control is ** disabled. */ static void ixgbe_enable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++) { u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); } } static void ixgbe_disable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++) { u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); } } Index: stable/9/sys/dev/ixgbe/ixv.c =================================================================== --- stable/9/sys/dev/ixgbe/ixv.c (revision 273911) +++ stable/9/sys/dev/ixgbe/ixv.c (revision 273912) @@ -1,4007 +1,4007 @@ /****************************************************************************** Copyright (c) 2001-2013, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "ixv.h" /********************************************************************* * Driver version *********************************************************************/ char ixv_driver_version[] = "1.1.4"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixv_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixv_vendor_info_t ixv_vendor_info_array[] = { {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixv_strings[] = { "Intel(R) PRO/10GbE Virtual Function Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixv_probe(device_t); static int ixv_attach(device_t); static int ixv_detach(device_t); static int ixv_shutdown(device_t); #if __FreeBSD_version < 800000 static void ixv_start(struct ifnet *); static void ixv_start_locked(struct tx_ring *, struct ifnet *); #else static int ixv_mq_start(struct ifnet *, struct mbuf *); static int ixv_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void ixv_qflush(struct ifnet *); #endif static int ixv_ioctl(struct ifnet *, u_long, caddr_t); static void ixv_init(void *); static void ixv_init_locked(struct adapter *); static void ixv_stop(void *); static void ixv_media_status(struct ifnet *, struct ifmediareq *); static int ixv_media_change(struct ifnet *); static void ixv_identify_hardware(struct adapter *); static int ixv_allocate_pci_resources(struct adapter *); static int ixv_allocate_msix(struct adapter *); static int ixv_allocate_queues(struct adapter *); static int ixv_setup_msix(struct adapter *); static void ixv_free_pci_resources(struct adapter *); static void ixv_local_timer(void *); static void ixv_setup_interface(device_t, struct adapter *); static void ixv_config_link(struct adapter *); static int ixv_allocate_transmit_buffers(struct tx_ring *); static int ixv_setup_transmit_structures(struct adapter *); static void ixv_setup_transmit_ring(struct tx_ring *); static void ixv_initialize_transmit_units(struct adapter *); static void ixv_free_transmit_structures(struct adapter *); static void ixv_free_transmit_buffers(struct tx_ring *); static int ixv_allocate_receive_buffers(struct rx_ring *); static int ixv_setup_receive_structures(struct adapter *); static int ixv_setup_receive_ring(struct rx_ring *); static void ixv_initialize_receive_units(struct adapter *); static void ixv_free_receive_structures(struct adapter *); static void ixv_free_receive_buffers(struct rx_ring *); static void ixv_enable_intr(struct adapter *); static void ixv_disable_intr(struct adapter *); static bool ixv_txeof(struct tx_ring *); static bool ixv_rxeof(struct ix_queue *, int); static void ixv_rx_checksum(u32, struct mbuf *, u32); static void ixv_set_multi(struct adapter *); static void ixv_update_link_status(struct adapter *); static void ixv_refresh_mbufs(struct rx_ring *, int); static int ixv_xmit(struct tx_ring *, struct mbuf **); static int ixv_sysctl_stats(SYSCTL_HANDLER_ARGS); static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); static int ixv_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixv_dma_malloc(struct adapter *, bus_size_t, struct ixv_dma_alloc *, int); static void ixv_dma_free(struct adapter *, struct ixv_dma_alloc *); static void ixv_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); static bool ixv_tx_ctx_setup(struct tx_ring *, struct mbuf *); static bool ixv_tso_setup(struct tx_ring *, struct mbuf *, u32 *); static void ixv_set_ivar(struct adapter *, u8, u8, s8); static void ixv_configure_ivars(struct adapter *); static u8 * ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static void ixv_setup_vlan_support(struct adapter *); static void ixv_register_vlan(void *, struct ifnet *, u16); static void ixv_unregister_vlan(void *, struct ifnet *, u16); static void ixv_save_stats(struct adapter *); static void ixv_init_stats(struct adapter *); static void ixv_update_stats(struct adapter *); static __inline void ixv_rx_discard(struct rx_ring *, int); static __inline void ixv_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); /* The MSI/X Interrupt handlers */ static void ixv_msix_que(void *); static void ixv_msix_mbx(void *); /* Deferred interrupt tasklets */ static void ixv_handle_que(void *, int); static void ixv_handle_mbx(void *, int); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixv_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixv_probe), DEVMETHOD(device_attach, ixv_attach), DEVMETHOD(device_detach, ixv_detach), DEVMETHOD(device_shutdown, ixv_shutdown), DEVMETHOD_END }; static driver_t ixv_driver = { "ix", ixv_methods, sizeof(struct adapter), }; extern devclass_t ixgbe_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixgbe_devclass, 0, 0); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); /* ** TUNEABLE PARAMETERS: */ /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int ixv_enable_aim = FALSE; TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim); /* How many packets rxeof tries to clean at a time */ static int ixv_rx_process_limit = 128; TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit); /* Flow control setting, default to full */ static int ixv_flow_control = ixgbe_fc_full; TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); /* * Header split: this causes the hardware to DMA * the header into a seperate mbuf from the payload, * it can be a performance win in some workloads, but * in others it actually hurts, its off by default. */ static int ixv_header_split = FALSE; TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); /* ** Number of TX descriptors per ring, ** setting higher than RX as this seems ** the better performing choice. */ static int ixv_txd = DEFAULT_TXD; TUNABLE_INT("hw.ixv.txd", &ixv_txd); /* Number of RX descriptors per ring */ static int ixv_rxd = DEFAULT_RXD; TUNABLE_INT("hw.ixv.rxd", &ixv_rxd); /* ** Shadow VFTA table, this is needed because ** the real filter table gets cleared during ** a soft reset and we need to repopulate it. */ static u32 ixv_shadow_vfta[VFTA_SIZE]; /********************************************************************* * Device identification routine * * ixv_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixv_probe(device_t dev) { ixv_vendor_info_t *ent; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; char adapter_name[256]; pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixv_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", ixv_strings[ent->index], ixv_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixv_attach(device_t dev) { struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; INIT_DEBUGOUT("ixv_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; /* Core Lock Init*/ IXV_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixv_sysctl_stats, "I", "Statistics"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "flow_control", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixv_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, + OID_AUTO, "enable_aim", CTLFLAG_RW, &ixv_enable_aim, 1, "Interrupt Moderation"); /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware revision */ ixv_identify_hardware(adapter); /* Do base PCI setup - map BAR0 */ if (ixv_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Do descriptor calc and sanity checks */ if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) { device_printf(dev, "TXD config issue, using default!\n"); adapter->num_tx_desc = DEFAULT_TXD; } else adapter->num_tx_desc = ixv_txd; if (((ixv_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || ixv_rxd < MIN_RXD || ixv_rxd > MAX_RXD) { device_printf(dev, "RXD config issue, using default!\n"); adapter->num_rx_desc = DEFAULT_RXD; } else adapter->num_rx_desc = ixv_rxd; /* Allocate our TX/RX Queues */ if (ixv_allocate_queues(adapter)) { error = ENOMEM; goto err_out; } /* ** Initialize the shared code: its ** at this point the mac type is set. */ error = ixgbe_init_shared_code(hw); if (error) { device_printf(dev,"Shared Code Initialization Failure\n"); error = EIO; goto err_late; } /* Setup the mailbox */ ixgbe_init_mbx_params_vf(hw); ixgbe_reset_hw(hw); /* Get Hardware Flow Control setting */ hw->fc.requested_mode = ixgbe_fc_full; hw->fc.pause_time = IXV_FC_PAUSE; hw->fc.low_water[0] = IXV_FC_LO; hw->fc.high_water[0] = IXV_FC_HI; hw->fc.send_xon = TRUE; error = ixgbe_init_hw(hw); if (error) { device_printf(dev,"Hardware Initialization Failure\n"); error = EIO; goto err_late; } error = ixv_allocate_msix(adapter); if (error) goto err_late; /* Setup OS specific network interface */ ixv_setup_interface(dev, adapter); /* Sysctl for limiting the amount of work done in the taskqueue */ ixv_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, ixv_rx_process_limit); /* Do the stats setup */ ixv_save_stats(adapter); ixv_init_stats(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixv_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixv_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); INIT_DEBUGOUT("ixv_attach: end"); return (0); err_late: ixv_free_transmit_structures(adapter); ixv_free_receive_structures(adapter); err_out: ixv_free_pci_resources(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixv_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ix_queue *que = adapter->queues; INIT_DEBUGOUT("ixv_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } IXV_CORE_LOCK(adapter); ixv_stop(adapter); IXV_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq) { taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } /* Drain the Link queue */ if (adapter->tq) { taskqueue_drain(adapter->tq, &adapter->mbx_task); taskqueue_free(adapter->tq); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); ixv_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); ixv_free_transmit_structures(adapter); ixv_free_receive_structures(adapter); IXV_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixv_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); IXV_CORE_LOCK(adapter); ixv_stop(adapter); IXV_CORE_UNLOCK(adapter); return (0); } #if __FreeBSD_version < 800000 /********************************************************************* * Transmit entry point * * ixv_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void ixv_start_locked(struct tx_ring *txr, struct ifnet * ifp) { struct mbuf *m_head; struct adapter *adapter = txr->adapter; IXV_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (ixv_xmit(txr, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_check = TRUE; txr->watchdog_time = ticks; } return; } /* * Legacy TX start - called by the stack, this * always uses the first tx ring, and should * not be used with multiqueue tx enabled. */ static void ixv_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXV_TX_LOCK(txr); ixv_start_locked(txr, ifp); IXV_TX_UNLOCK(txr); } return; } #else /* ** Multiqueue Transmit driver ** */ static int ixv_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct ix_queue *que; struct tx_ring *txr; int i = 0, err = 0; /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; if (IXV_TX_TRYLOCK(txr)) { err = ixv_mq_start_locked(ifp, txr, m); IXV_TX_UNLOCK(txr); } else { err = drbr_enqueue(ifp, txr->br, m); taskqueue_enqueue(que->tq, &que->que_task); } return (err); } static int ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) { struct adapter *adapter = txr->adapter; struct mbuf *next; int enqueued, err = 0; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || adapter->link_active == 0) { if (m != NULL) err = drbr_enqueue(ifp, txr->br, m); return (err); } /* Do a clean if descriptors are low */ if (txr->tx_avail <= IXV_TX_CLEANUP_THRESHOLD) ixv_txeof(txr); enqueued = 0; if (m != NULL) { err = drbr_enqueue(ifp, txr->br, m); if (err) { return (err); } } /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = ixv_xmit(txr, &next)) != 0) { if (next == NULL) { drbr_advance(ifp, txr->br); } else { drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enqueued++; ifp->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) ifp->if_omcasts++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; if (txr->tx_avail <= IXV_TX_OP_THRESHOLD) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } } if (enqueued > 0) { /* Set watchdog on */ txr->watchdog_check = TRUE; txr->watchdog_time = ticks; } return (err); } /* ** Flush all ring buffers */ static void ixv_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IXV_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IXV_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /********************************************************************* * Ioctl entry point * * ixv_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixv_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *) data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixv_init(adapter); if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXV_MAX_FRAME_SIZE - ETHER_HDR_LEN) { error = EINVAL; } else { IXV_CORE_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ixv_init_locked(adapter); IXV_CORE_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXV_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ixv_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixv_stop(adapter); adapter->if_flags = ifp->if_flags; IXV_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXV_CORE_LOCK(adapter); ixv_disable_intr(adapter); ixv_set_multi(adapter); ixv_enable_intr(adapter); IXV_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXV_CORE_LOCK(adapter); ixv_init_locked(adapter); IXV_CORE_UNLOCK(adapter); } VLAN_CAPABILITIES(ifp); break; } default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ #define IXGBE_MHADD_MFS_SHIFT 16 static void ixv_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; u32 mhadd, gpie; INIT_DEBUGOUT("ixv_init: begin"); mtx_assert(&adapter->core_mtx, MA_OWNED); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); hw->addr_ctrl.rar_used_count = 1; /* Prepare transmit descriptors and buffers */ if (ixv_setup_transmit_structures(adapter)) { device_printf(dev,"Could not setup transmit structures\n"); ixv_stop(adapter); return; } ixgbe_reset_hw(hw); ixv_initialize_transmit_units(adapter); /* Setup Multicast table */ ixv_set_multi(adapter); /* ** Determine the correct mbuf pool ** for doing jumbo/headersplit */ if (ifp->if_mtu > ETHERMTU) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MCLBYTES; /* Prepare receive descriptors and buffers */ if (ixv_setup_receive_structures(adapter)) { device_printf(dev,"Could not setup receive structures\n"); ixv_stop(adapter); return; } /* Configure RX settings */ ixv_initialize_receive_units(adapter); /* Enable Enhanced MSIX mode */ gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE); gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_EIAME; gpie |= IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 ifp->if_hwassist |= CSUM_SCTP; #endif } /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Set up VLAN offload and filter */ ixv_setup_vlan_support(adapter); callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); /* Set up MSI/X routing */ ixv_configure_ivars(adapter); /* Set up auto-mask */ IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->mbxvec), IXV_LINK_ITR); /* Stats init */ ixv_init_stats(adapter); /* Config/Enable Link */ ixv_config_link(adapter); /* And now turn on interrupts */ ixv_enable_intr(adapter); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } static void ixv_init(void *arg) { struct adapter *adapter = arg; IXV_CORE_LOCK(adapter); ixv_init_locked(adapter); IXV_CORE_UNLOCK(adapter); return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static inline void ixv_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u32 queue = 1 << vector; u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); } static inline void ixv_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); } static inline void ixv_rearm_queues(struct adapter *adapter, u64 queues) { u32 mask = (IXGBE_EIMS_RTX_QUEUE & queues); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, mask); } static void ixv_handle_que(void *context, int pending) { struct ix_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixv_rxeof(que, adapter->rx_process_limit); IXV_TX_LOCK(txr); ixv_txeof(txr); #if __FreeBSD_version >= 800000 if (!drbr_empty(ifp, txr->br)) ixv_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixv_start_locked(txr, ifp); #endif IXV_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } /* Reenable this interrupt */ ixv_enable_queue(adapter, que->msix); return; } /********************************************************************* * * MSI Queue Interrupt Service routine * **********************************************************************/ void ixv_msix_que(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; bool more_tx, more_rx; u32 newitr = 0; ixv_disable_queue(adapter, que->msix); ++que->irqs; more_rx = ixv_rxeof(que, adapter->rx_process_limit); IXV_TX_LOCK(txr); more_tx = ixv_txeof(txr); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ #if __FreeBSD_version < 800000 if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd)) #else if (!drbr_empty(adapter->ifp, txr->br)) #endif more_tx = 1; IXV_TX_UNLOCK(txr); more_rx = ixv_rxeof(que, adapter->rx_process_limit); /* Do AIM now? */ if (ixv_enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); newitr |= newitr << 16; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ ixv_enable_queue(adapter, que->msix); return; } static void ixv_msix_mbx(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg; ++adapter->mbx_irq; /* First get the cause */ reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); /* Link status change */ if (reg & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->mbx_task); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixv_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("ixv_media_status: begin"); IXV_CORE_LOCK(adapter); ixv_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IXV_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_FDX; break; } IXV_CORE_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixv_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("ixv_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; default: device_printf(adapter->dev, "Only auto media type\n"); return (EINVAL); } return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors, allowing the * TX engine to transmit the packets. * - return 0 on success, positive on failure * **********************************************************************/ static int ixv_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; u32 paylen = 0; int i, j, error, nsegs; int first, last = 0; struct mbuf *m_head; bus_dma_segment_t segs[32]; bus_dmamap_t map; struct ixv_tx_buf *txbuf, *txbuf_mapped; union ixgbe_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; txbuf_mapped = txbuf; map = txbuf->map; /* * Map the packet for DMA. */ error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again */ error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error == ENOMEM) { adapter->no_tx_dma_setup++; return (error); } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } else if (error == ENOMEM) { adapter->no_tx_dma_setup++; return (error); } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { txr->no_desc_avail++; error = ENOBUFS; goto xmit_fail; } m_head = *m_headp; /* ** Set up the appropriate offload context ** this becomes the first descriptor of ** a packet. */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { if (ixv_tso_setup(txr, m_head, &paylen)) { cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; ++adapter->tso_tx; } else return (ENXIO); } else if (ixv_tx_ctx_setup(txr, m_head)) olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; /* Record payload length */ if (paylen == 0) olinfo_status |= m_head->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(txr->txd_cmd | cmd_type_len |seglen); txd->read.olinfo_status = htole32(olinfo_status); last = i; /* descriptor that will get completion IRQ */ if (++i == adapter->num_tx_desc) i = 0; txbuf->m_head = NULL; txbuf->eop_index = -1; } txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the index of the descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop_index = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(txr->me), i); return (0); xmit_fail: bus_dmamap_unload(txr->txtag, txbuf->map); return (error); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ #define IXGBE_RAR_ENTRIES 16 static void ixv_set_multi(struct adapter *adapter) { u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; u8 *update_ptr; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixv_set_multi: begin"); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif update_ptr = mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixv_mc_array_itr, TRUE); return; } /* * This is an iterator function now needed by the multicast * shared code. It simply feeds the shared code routine the * addresses in the array of ixv_set_multi() one by one. */ static u8 * ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; *update_ptr = newptr; return addr; } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixv_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; int i; mtx_assert(&adapter->core_mtx, MA_OWNED); ixv_update_link_status(adapter); /* Stats Update */ ixv_update_stats(adapter); /* * If the interface has been paused * then don't do the watchdog check */ if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) goto out; /* ** Check for time since any descriptor was cleaned */ for (i = 0; i < adapter->num_queues; i++, txr++) { IXV_TX_LOCK(txr); if (txr->watchdog_check == FALSE) { IXV_TX_UNLOCK(txr); continue; } if ((ticks - txr->watchdog_time) > IXV_WATCHDOG) goto hung; IXV_TX_UNLOCK(txr); } out: ixv_rearm_queues(adapter, adapter->que_mask); callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); return; hung: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, IXGBE_READ_REG(&adapter->hw, IXGBE_VFTDH(i)), IXGBE_READ_REG(&adapter->hw, IXGBE_VFTDT(i))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; IXV_TX_UNLOCK(txr); ixv_init_locked(adapter); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixv_update_link_status(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; device_t dev = adapter->dev; if (adapter->link_up){ if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev,"Link is up %d Gbps %s \n", ((adapter->link_speed == 128)? 10:1), "Full Duplex"); adapter->link_active = TRUE; if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; for (int i = 0; i < adapter->num_queues; i++, txr++) txr->watchdog_check = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixv_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; ifp = adapter->ifp; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixv_stop: begin\n"); ixv_disable_intr(adapter); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ixgbe_reset_hw(hw); adapter->hw.adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixv_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; u16 pci_cmd_word; /* ** Make sure BUSMASTER is set, on a VM under ** KVM it may not be and will break things. */ pci_enable_busmaster(dev); pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); return; } /********************************************************************* * * Setup MSIX Interrupt resources and handlers * **********************************************************************/ static int ixv_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; int error, rid, vector = 0; for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector + 1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixv_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register QUE handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; adapter->que_mask |= (u64)(1 << que->msix); /* ** Bind the msix vector, and thus the ** ring to the corresponding cpu. */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); TASK_INIT(&que->que_task, 0, ixv_handle_que, que); que->tq = taskqueue_create_fast("ixv_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* and Mailbox */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate" " bus resource: MBX interrupt [%d]\n", rid); return (ENXIO); } /* Set the mbx handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixv_msix_mbx, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "mbx"); #endif adapter->mbxvec = vector; /* Tasklets for Mailbox */ TASK_INIT(&adapter->mbx_task, 0, ixv_handle_mbx, adapter); adapter->tq = taskqueue_create_fast("ixv_mbx", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s mbxq", device_get_nameunit(adapter->dev)); /* ** Due to a broken design QEMU will fail to properly ** enable the guest for MSIX unless the vectors in ** the table are all set up, so we must rewrite the ** ENABLE in the MSIX control register again at this ** point to cause it to successfully initialize us. */ if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { int msix_ctrl; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } return (0); } /* * Setup MSIX resources, note that the VF * device MUST use MSIX, there is no fallback. */ static int ixv_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want; /* First try MSI/X */ rid = PCIR_BAR(3); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { device_printf(adapter->dev, "Unable to map MSIX table \n"); goto out; } /* ** Want two vectors: one for a queue, ** plus an additional for mailbox. */ want = 2; if ((pci_alloc_msix(dev, &want) == 0) && (want == 2)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", want); return (want); } /* Release in case alloc was insufficient */ pci_release_msi(dev); out: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rid, adapter->msix_mem); adapter->msix_mem = NULL; } device_printf(adapter->dev,"MSIX config error\n"); return (ENXIO); } static int ixv_allocate_pci_resources(struct adapter *adapter) { int rid; device_t dev = adapter->dev; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev,"Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; adapter->num_queues = 1; adapter->hw.back = &adapter->osdep; /* ** Now setup MSI/X, should ** return us the number of ** configured vectors. */ adapter->msix = ixv_setup_msix(adapter); if (adapter->msix == ENXIO) return (ENXIO); else return (0); } static void ixv_free_pci_resources(struct adapter * adapter) { struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; memrid = PCIR_BAR(MSIX_BAR); /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* ** Release all msix queue resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->mbxvec) /* we are doing MSIX */ rid = adapter->mbxvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static void ixv_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixv_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: can not if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = 1000000000; ifp->if_init = ixv_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixv_ioctl; #if __FreeBSD_version >= 800000 ifp->if_transmit = ixv_mq_start; ifp->if_qflush = ixv_qflush; #else ifp->if_start = ixv_start; #endif ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2; ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; /* Don't enable LRO by default */ ifp->if_capabilities |= IFCAP_LRO; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixv_media_change, ixv_media_status); ifmedia_add(&adapter->media, IFM_ETHER | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return; } static void ixv_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; if (hw->mac.ops.check_link) err = hw->mac.ops.check_link(hw, &autoneg, &adapter->link_up, FALSE); if (err) goto out; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); out: return; } /******************************************************************** * Manage DMA'able memory. *******************************************************************/ static void ixv_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } static int ixv_dma_malloc(struct adapter *adapter, bus_size_t size, struct ixv_dma_alloc *dma, int mapflags) { device_t dev = adapter->dev; int r; r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &dma->dma_tag); if (r != 0) { device_printf(dev,"ixv_dma_malloc: bus_dma_tag_create failed; " "error %u\n", r); goto fail_0; } r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) { device_printf(dev,"ixv_dma_malloc: bus_dmamem_alloc failed; " "error %u\n", r); goto fail_1; } r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, ixv_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) { device_printf(dev,"ixv_dma_malloc: bus_dmamap_load failed; " "error %u\n", r); goto fail_2; } dma->dma_size = size; return (0); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); fail_1: bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (r); } static void ixv_dma_free(struct adapter *adapter, struct ixv_dma_alloc *dma) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int ixv_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize, error = 0; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct ix_queue *) malloc(sizeof(struct ix_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* First allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Next allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } /* For the ring itself */ tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX side lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (ixv_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (ixv_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(IXV_BR_SIZE, M_DEVBUF, M_WAITOK, &txr->tx_mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up buf ring\n"); error = ENOMEM; goto err_tx_desc; } #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; /* Set up some basics */ rxr->adapter = adapter; rxr->me = i; /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), rxr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (ixv_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (ixv_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) ixv_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) ixv_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int ixv_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct ixv_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create( bus_get_dma_tag(adapter->dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IXV_TSO_SIZE, /* maxsize */ 32, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct ixv_tx_buf *) malloc(sizeof(struct ixv_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ ixv_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void ixv_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ixv_tx_buf *txbuf; int i; /* Clear the old ring contents */ IXV_TX_LOCK(txr); bzero((void *)txr->tx_base, (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } /* Clear the EOP index */ txbuf->eop_index = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IXV_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static int ixv_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) ixv_setup_transmit_ring(txr); return (0); } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void ixv_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 tdba = txr->txdma.dma_paddr; u32 txctrl, txdctl; /* Set WTHRESH to 8, burst writeback */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); txdctl |= (8 << 16); IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); /* Now enable */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); txdctl |= IXGBE_TXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); /* Set the HW Tx Head and Tail indices */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(i), 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(i), 0); /* Setup Transmit Descriptor Cmd Settings */ txr->txd_cmd = IXGBE_TXD_CMD_IFCS; txr->watchdog_check = FALSE; /* Set Ring parameters */ IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i), adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc)); txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(i)); txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i), txctrl); break; } return; } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void ixv_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IXV_TX_LOCK(txr); ixv_free_transmit_buffers(txr); ixv_dma_free(adapter, &txr->txdma); IXV_TX_UNLOCK(txr); IXV_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void ixv_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ixv_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * * Advanced Context Descriptor setup for VLAN or CSUM * **********************************************************************/ static bool ixv_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp) { struct adapter *adapter = txr->adapter; struct ixgbe_adv_tx_context_desc *TXD; struct ixv_tx_buf *tx_buffer; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; bool offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; tx_buffer = &txr->tx_buffers[ctxd]; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the descriptor itself. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) return FALSE; /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; if (mp->m_len < ehdrlen + ip_hlen) return (FALSE); ipproto = ip->ip_p; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); if (mp->m_len < ehdrlen + ip_hlen) return (FALSE); ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } /* Now copy bits into descriptor */ TXD->vlan_macip_lens |= htole32(vlan_macip_lens); TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(0); tx_buffer->m_head = NULL; tx_buffer->eop_index = -1; /* We've consumed the first desc, adjust counters */ if (++ctxd == adapter->num_tx_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (offload); } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static bool ixv_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen) { struct adapter *adapter = txr->adapter; struct ixgbe_adv_tx_context_desc *TXD; struct ixv_tx_buf *tx_buffer; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0; u16 vtag = 0; int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; struct ip *ip; struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ehdrlen = ETHER_HDR_LEN; /* Ensure we have at least the IP+TCP header in the first mbuf. */ if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr)) return FALSE; ctxd = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[ctxd]; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return FALSE; /* 0 */ ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); tcp_hlen = th->th_off << 2; hdrlen = ehdrlen + ip_hlen + tcp_hlen; /* This is used in the transmit desc in encap */ *paylen = mp->m_pkthdr.len - hdrlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens |= htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); tx_buffer->m_head = NULL; tx_buffer->eop_index = -1; if (++ctxd == adapter->num_tx_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; return TRUE; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static bool ixv_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; u32 first, last, done; struct ixv_tx_buf *tx_buffer; struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc; mtx_assert(&txr->tx_mtx, MA_OWNED); if (txr->tx_avail == adapter->num_tx_desc) return FALSE; first = txr->next_to_clean; tx_buffer = &txr->tx_buffers[first]; /* For cleanup we just use legacy struct */ tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first]; last = tx_buffer->eop_index; if (last == -1) return FALSE; eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last]; /* ** Get the index of the first descriptor ** BEYOND the EOP and call that 'done'. ** I do this so the comparison in the ** inner while loop below can be simple */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); /* ** Only the EOP descriptor of a packet now has the DD ** bit set, this is what we look for... */ while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; if (tx_buffer->m_head) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; tx_buffer->map = NULL; } tx_buffer->eop_index = -1; txr->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first]; } ++ifp->if_opackets; /* See if there is more work now */ last = tx_buffer->eop_index; if (last != -1) { eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last]; /* Get next done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack that * it is OK to send packets. If there are no pending descriptors, * clear the timeout. Otherwise, if some descriptors have been freed, * restart the timeout. */ if (txr->tx_avail > IXV_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (txr->tx_avail == adapter->num_tx_desc) { txr->watchdog_check = FALSE; return FALSE; } } return TRUE; } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void ixv_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct ixv_rx_buf *rxbuf; struct mbuf *mh, *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* Get the control variable, one beyond refresh point */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if ((rxbuf->m_head == NULL) && (rxr->hdr_split)) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) goto update; mh->m_pkthdr.len = mh->m_len = MHLEN; mh->m_len = MHLEN; mh->m_flags |= M_PKTHDR; m_adj(mh, ETHER_ALIGN); /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("GET BUF: dmamap load" " failure - %d\n", error); m_free(mh); goto update; } rxbuf->m_head = mh; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr); } if (rxbuf->m_pack == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) goto update; } else mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("GET BUF: dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->m_pack = NULL; goto update; } rxbuf->m_pack = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr); refreshed = TRUE; rxr->next_to_refresh = i = j; /* Calculate next index */ if (++j == adapter->num_rx_desc) j = 0; } update: if (refreshed) /* update tail index */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFRDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int ixv_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct ixv_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct ixv_rx_buf) * adapter->num_rx_desc; if (!(rxr->rx_buffers = (struct ixv_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUMPAGESIZE, /* maxsize */ 1, /* nsegments */ MJUMPAGESIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->htag, BUS_DMA_NOWAIT, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head map\n"); goto fail; } error = bus_dmamap_create(rxr->ptag, BUS_DMA_NOWAIT, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX pkt map\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ ixv_free_receive_structures(adapter); return (error); } static void ixv_free_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ixv_rx_buf *rxbuf; int i; adapter = rxr->adapter; for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int ixv_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct ixv_rx_buf *rxbuf; bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; adapter = rxr->adapter; ifp = adapter->ifp; dev = adapter->dev; /* Clear the ring contents */ IXV_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* Free current RX buffer structs and their mbufs */ ixv_free_receive_ring(rxr); /* Configure header split? */ if (ixv_header_split) rxr->hdr_split = TRUE; /* Now replenish the mbufs */ for (int j = 0; j != adapter->num_rx_desc; ++j) { struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; /* ** Dont allocate mbufs if not ** doing header split, its wasteful */ if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(rxbuf->m_head, ETHER_ALIGN); mh = rxbuf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, rxbuf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; rxr->rx_bytes = 0; rxr->discard = FALSE; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface: */ if (ifp->if_capenable & IFCAP_LRO) { int err = tcp_lro_init(lro); if (err) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX Soft LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IXV_RX_UNLOCK(rxr); return (0); fail: ixv_free_receive_ring(rxr); IXV_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int ixv_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int j; for (j = 0; j < adapter->num_queues; j++, rxr++) if (ixv_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'j' failed, so its the terminus. */ for (int i = 0; i < j; ++i) { rxr = &adapter->rx_rings[i]; ixv_free_receive_ring(rxr); } return (ENOBUFS); } /********************************************************************* * * Setup receive registers and features. * **********************************************************************/ #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 static void ixv_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 bufsz, fctrl, rxcsum, hlreg; /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) { hlreg |= IXGBE_HLREG0_JUMBOEN; bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; } else { hlreg &= ~IXGBE_HLREG0_JUMBOEN; bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; } IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 rdba = rxr->rxdma.dma_paddr; u32 reg, rxdctl; /* Do the queue enabling first */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); for (int k = 0; k < 10; k++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i), adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i)); reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; reg |= bufsz; if (rxr->hdr_split) { /* Use a standard mbuf for the header */ reg |= ((IXV_RX_HDR << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) & IXGBE_SRRCTL_BSIZEHDR_MASK); reg |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } else reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), reg); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), adapter->num_rx_desc - 1); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); return; } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void ixv_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; ixv_free_receive_buffers(rxr); /* Free LRO memory */ tcp_lro_free(lro); /* Free the ring memory as well */ ixv_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void ixv_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct ixv_rx_buf *rxbuf; INIT_DEBUGOUT("free_receive_structures: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; if (rxbuf->hmap != NULL) { bus_dmamap_destroy(rxr->htag, rxbuf->hmap); rxbuf->hmap = NULL; } if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } return; } static __inline void ixv_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IXV_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IXV_RX_LOCK(rxr); } static __inline void ixv_rx_discard(struct rx_ring *rxr, int i) { struct ixv_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; if (rbuf->fmp != NULL) {/* Partial chain ? */ rbuf->fmp->m_flags |= M_PKTHDR; m_freem(rbuf->fmp); rbuf->fmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; } if (rbuf->m_pack) { m_free(rbuf->m_pack); rbuf->m_pack = NULL; } return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE for more work, FALSE for all clean. *********************************************************************/ static bool ixv_rxeof(struct ix_queue *que, int count) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, nextp, processed = 0; u32 staterr = 0; union ixgbe_adv_rx_desc *cur; struct ixv_rx_buf *rbuf, *nbuf; IXV_RX_LOCK(rxr); for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; u32 rsc, ptype; u16 hlen, plen, hdr, vtag; bool eop; /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = NULL; nbuf = NULL; rsc = 0; cur->wb.upper.status_error = 0; rbuf = &rxr->rx_buffers[i]; mh = rbuf->m_head; mp = rbuf->m_pack; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IXGBE_RXDADV_PKTTYPE_MASK; hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); vtag = le16toh(cur->wb.upper.vlan); eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); /* Make sure all parts of a bad packet are discarded */ if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) || (rxr->discard)) { ifp->if_ierrors++; rxr->rx_discarded++; if (!eop) rxr->discard = TRUE; else rxr->discard = FALSE; ixv_rx_discard(rxr, i); goto next_desc; } if (!eop) { nextp = i + 1; if (nextp == adapter->num_rx_desc) nextp = 0; nbuf = &rxr->rx_buffers[nextp]; prefetch(nbuf); } /* ** The header mbuf is ONLY used when header ** split is enabled, otherwise we get normal ** behavior, ie, both header and payload ** are DMA'd into the payload buffer. ** ** Rather than using the fmp/lmp global pointers ** we now keep the head of a packet chain in the ** buffer struct and pass this along from one ** descriptor to the next, until we get EOP. */ if (rxr->hdr_split && (rbuf->fmp == NULL)) { /* This must be an initial descriptor */ hlen = (hdr & IXGBE_RXDADV_HDRBUFLEN_MASK) >> IXGBE_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IXV_RX_HDR) hlen = IXV_RX_HDR; mh->m_len = hlen; mh->m_flags |= M_PKTHDR; mh->m_next = NULL; mh->m_pkthdr.len = mh->m_len; /* Null buf pointer so it is refreshed */ rbuf->m_head = NULL; /* ** Check the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp->m_len = plen; mp->m_next = NULL; mp->m_flags &= ~M_PKTHDR; mh->m_next = mp; mh->m_pkthdr.len += mp->m_len; /* Null buf pointer so it is refreshed */ rbuf->m_pack = NULL; rxr->rx_split_packets++; } /* ** Now create the forward ** chain so when complete ** we wont have to. */ if (eop == 0) { /* stash the chain head */ nbuf->fmp = mh; /* Make forward chain */ if (plen) mp->m_next = nbuf->m_pack; else mh->m_next = nbuf->m_pack; } else { /* Singlet, prepare to send */ sendmp = mh; if ((adapter->num_vlans) && (staterr & IXGBE_RXD_STAT_VP)) { sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mp->m_len = plen; /* ** See if there is a stored head ** that determines what we are */ sendmp = rbuf->fmp; rbuf->m_pack = rbuf->fmp = NULL; if (sendmp != NULL) /* secondary frag */ sendmp->m_pkthdr.len += mp->m_len; else { /* first desc of a non-ps chain */ sendmp = mp; sendmp->m_flags |= M_PKTHDR; sendmp->m_pkthdr.len = mp->m_len; if (staterr & IXGBE_RXD_STAT_VP) { sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } } /* Pass the head pointer on */ if (eop == 0) { nbuf->fmp = sendmp; sendmp = NULL; mp->m_next = nbuf->m_pack; } } ++processed; /* Sending this frame? */ if (eop) { sendmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; rxr->rx_packets++; /* capture data for AIM */ rxr->bytes += sendmp->m_pkthdr.len; rxr->rx_bytes += sendmp->m_pkthdr.len; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) ixv_rx_checksum(staterr, sendmp, ptype); #if __FreeBSD_version >= 800000 sendmp->m_pkthdr.flowid = que->msix; sendmp->m_flags |= M_FLOWID; #endif } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Now send to the stack or do LRO */ if (sendmp != NULL) ixv_rx_input(rxr, ifp, sendmp, ptype); /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { ixv_refresh_mbufs(rxr, i); processed = 0; } } /* Refresh any remaining buf structs */ if (ixv_rx_unrefreshed(rxr)) ixv_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } IXV_RX_UNLOCK(rxr); /* ** We still have cleaning to do? ** Schedule another interrupt if so. */ if ((staterr & IXGBE_RXD_STAT_DD) != 0) { ixv_rearm_queues(adapter, (u64)(1 << que->msix)); return (TRUE); } return (FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void ixv_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) { u16 status = (u16) staterr; u8 errors = (u8) (staterr >> 24); bool sctp = FALSE; if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) sctp = TRUE; if (status & IXGBE_RXD_STAT_IPCS) { if (!(errors & IXGBE_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & IXGBE_RXD_STAT_L4CS) { u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) type = CSUM_SCTP_VALID; #endif if (!(errors & IXGBE_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (!sctp) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } static void ixv_setup_vlan_support(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 ctrl, vid, vfta, retry; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Enable the queues */ for (int i = 0; i < adapter->num_queues; i++) { ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); } /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < VFTA_SIZE; i++) { if (ixv_shadow_vfta[i] == 0) continue; vfta = ixv_shadow_vfta[i]; /* ** Reconstruct the vlan id's ** based on the bits set in each ** of the array ints. */ for ( int j = 0; j < 32; j++) { retry = 0; if ((vfta & (1 << j)) == 0) continue; vid = (i * 32) + j; /* Call the shared code mailbox routine */ while (ixgbe_set_vfta(hw, vid, 0, TRUE)) { if (++retry > 5) break; } } } } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXV_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ ixv_init_locked(adapter); IXV_CORE_UNLOCK(adapter); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXV_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixv_init_locked(adapter); IXV_CORE_UNLOCK(adapter); } static void ixv_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); mask = IXGBE_EIMS_ENABLE_MASK; mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); for (int i = 0; i < adapter->num_queues; i++, que++) ixv_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); return; } static void ixv_disable_intr(struct adapter *adapter) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); return; } /* ** Setup the correct IVAR register for a particular MSIX interrupt ** - entry is the register array entry ** - vector is the MSIX vector for this queue ** - type is RX/TX/MISC */ static void ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; if (type == -1) { /* MISC IVAR */ ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); ivar &= ~0xFF; ivar |= vector; IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); } } static void ixv_configure_ivars(struct adapter *adapter) { struct ix_queue *que = adapter->queues; for (int i = 0; i < adapter->num_queues; i++, que++) { /* First the RX queue entry */ ixv_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ ixv_set_ivar(adapter, i, que->msix, 1); /* Set an initial value in EITR */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), IXV_EITR_DEFAULT); } /* For the Link interrupt */ ixv_set_ivar(adapter, 1, adapter->mbxvec, -1); } /* ** Tasklet handler for MSIX MBX interrupts ** - do outside interrupt since it might sleep */ static void ixv_handle_mbx(void *context, int pending) { struct adapter *adapter = context; ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, 0); ixv_update_link_status(adapter); } /* ** The VF stats registers never have a truely virgin ** starting point, so this routine tries to make an ** artificial one, marking ground zero on attach as ** it were. */ static void ixv_save_stats(struct adapter *adapter) { if (adapter->stats.vfgprc || adapter->stats.vfgptc) { adapter->stats.saved_reset_vfgprc += adapter->stats.vfgprc - adapter->stats.base_vfgprc; adapter->stats.saved_reset_vfgptc += adapter->stats.vfgptc - adapter->stats.base_vfgptc; adapter->stats.saved_reset_vfgorc += adapter->stats.vfgorc - adapter->stats.base_vfgorc; adapter->stats.saved_reset_vfgotc += adapter->stats.vfgotc - adapter->stats.base_vfgotc; adapter->stats.saved_reset_vfmprc += adapter->stats.vfmprc - adapter->stats.base_vfmprc; } } static void ixv_init_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; adapter->stats.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); adapter->stats.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); adapter->stats.last_vfgorc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); adapter->stats.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); adapter->stats.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); adapter->stats.last_vfgotc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); adapter->stats.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); adapter->stats.base_vfgprc = adapter->stats.last_vfgprc; adapter->stats.base_vfgorc = adapter->stats.last_vfgorc; adapter->stats.base_vfgptc = adapter->stats.last_vfgptc; adapter->stats.base_vfgotc = adapter->stats.last_vfgotc; adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } #define UPDATE_STAT_32(reg, last, count) \ { \ u32 current = IXGBE_READ_REG(hw, reg); \ if (current < last) \ count += 0x100000000LL; \ last = current; \ count &= 0xFFFFFFFF00000000LL; \ count |= current; \ } #define UPDATE_STAT_36(lsb, msb, last, count) \ { \ u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ u64 cur_msb = IXGBE_READ_REG(hw, msb); \ u64 current = ((cur_msb << 32) | cur_lsb); \ if (current < last) \ count += 0x1000000000LL; \ last = current; \ count &= 0xFFFFFFF000000000LL; \ count |= current; \ } /* ** ixv_update_stats - Update the board statistics counters. */ void ixv_update_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.last_vfgprc, adapter->stats.vfgprc); UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.last_vfgptc, adapter->stats.vfgptc); UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, adapter->stats.last_vfgorc, adapter->stats.vfgorc); UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, adapter->stats.last_vfgotc, adapter->stats.vfgotc); UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.last_vfmprc, adapter->stats.vfmprc); } /********************************************************************** * * This routine is called only when ixgbe_display_debug_stats is enabled. * This routine provides a way to take a look at important statistics * maintained by the driver and hardware. * **********************************************************************/ static void ixv_print_hw_stats(struct adapter * adapter) { device_t dev = adapter->dev; device_printf(dev,"Std Mbuf Failed = %lu\n", adapter->mbuf_defrag_failed); device_printf(dev,"Driver dropped packets = %lu\n", adapter->dropped_pkts); device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events); device_printf(dev,"Good Packets Rcvd = %llu\n", (long long)adapter->stats.vfgprc); device_printf(dev,"Good Packets Xmtd = %llu\n", (long long)adapter->stats.vfgptc); device_printf(dev,"TSO Transmissions = %lu\n", adapter->tso_tx); } /********************************************************************** * * This routine is called only when em_display_debug_stats is enabled. * This routine provides a way to take a look at important statistics * maintained by the driver and hardware. * **********************************************************************/ static void ixv_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; struct rx_ring *rxr; struct tx_ring *txr; struct lro_ctrl *lro; device_printf(dev,"Error Byte Count = %u \n", IXGBE_READ_REG(hw, IXGBE_ERRBC)); for (int i = 0; i < adapter->num_queues; i++, que++) { txr = que->txr; rxr = que->rxr; lro = &rxr->lro; device_printf(dev,"QUE(%d) IRQs Handled: %lu\n", que->msix, (long)que->irqs); device_printf(dev,"RX(%d) Packets Received: %lld\n", rxr->me, (long long)rxr->rx_packets); device_printf(dev,"RX(%d) Split RX Packets: %lld\n", rxr->me, (long long)rxr->rx_split_packets); device_printf(dev,"RX(%d) Bytes Received: %lu\n", rxr->me, (long)rxr->rx_bytes); device_printf(dev,"RX(%d) LRO Queued= %d\n", rxr->me, lro->lro_queued); device_printf(dev,"RX(%d) LRO Flushed= %d\n", rxr->me, lro->lro_flushed); device_printf(dev,"TX(%d) Packets Sent: %lu\n", txr->me, (long)txr->total_packets); device_printf(dev,"TX(%d) NO Desc Avail: %lu\n", txr->me, (long)txr->no_desc_avail); } device_printf(dev,"MBX IRQ Handled: %lu\n", (long)adapter->mbx_irq); return; } static int ixv_sysctl_stats(SYSCTL_HANDLER_ARGS) { int error; int result; struct adapter *adapter; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; ixv_print_hw_stats(adapter); } return error; } static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) { int error, result; struct adapter *adapter; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; ixv_print_debug_info(adapter); } return error; } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int ixv_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; struct adapter *adapter; error = sysctl_handle_int(oidp, &ixv_flow_control, 0, req); if (error) return (error); adapter = (struct adapter *) arg1; switch (ixv_flow_control) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = ixv_flow_control; break; case ixgbe_fc_none: default: adapter->hw.fc.requested_mode = ixgbe_fc_none; } ixgbe_fc_enable(&adapter->hw); return error; } static void ixv_add_rx_process_limit(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); + OID_AUTO, name, CTLFLAG_RW, limit, value, description); } Index: stable/9/sys/dev/ixgbe =================================================================== --- stable/9/sys/dev/ixgbe (revision 273911) +++ stable/9/sys/dev/ixgbe (revision 273912) Property changes on: stable/9/sys/dev/ixgbe ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev/ixgbe:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/mpr/mpr.c =================================================================== --- stable/9/sys/dev/mpr/mpr.c (revision 273911) +++ stable/9/sys/dev/mpr/mpr.c (revision 273912) @@ -1,2795 +1,2795 @@ /*- * Copyright (c) 2009 Yahoo! Inc. * Copyright (c) 2012-2014 LSI Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* Communications core for LSI MPT2 */ /* TODO Move headers to mprvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mpr_diag_reset(struct mpr_softc *sc, int sleep_flag); static int mpr_init_queues(struct mpr_softc *sc); static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag); static int mpr_transition_operational(struct mpr_softc *sc); static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching); static void mpr_iocfacts_free(struct mpr_softc *sc); static void mpr_startup(void *arg); static int mpr_send_iocinit(struct mpr_softc *sc); static int mpr_alloc_queues(struct mpr_softc *sc); static int mpr_alloc_replies(struct mpr_softc *sc); static int mpr_alloc_requests(struct mpr_softc *sc); static int mpr_attach_log(struct mpr_softc *sc); static __inline void mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm); static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply); static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm); static void mpr_periodic(void *); static int mpr_reregister_events(struct mpr_softc *sc); static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm); static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts); static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag); SYSCTL_NODE(_hw, OID_AUTO, mpr, CTLFLAG_RD, 0, "MPR Driver Parameters"); MALLOC_DEFINE(M_MPR, "mpr", "mpr driver memory"); /* * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of * any state and back to its initialization state machine. */ static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; /* * Added this union to smoothly convert le64toh cm->cm_desc.Words. * Compiler only supports unint64_t to be passed as an argument. * Otherwise it will through this error: * "aggregate value used where an integer was expected" */ typedef union _reply_descriptor { u64 word; struct { u32 low; u32 high; } u; }reply_descriptor,address_descriptor; /* Rate limit chain-fail messages to 1 per minute */ static struct timeval mpr_chainfail_interval = { 60, 0 }; /* * sleep_flag can be either CAN_SLEEP or NO_SLEEP. * If this function is called from process context, it can sleep * and there is no harm to sleep, in case if this fuction is called * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. * based on sleep flags driver will call either msleep, pause or DELAY. * msleep and pause are of same variant, but pause is used when mpr_mtx * is not hold by driver. */ static int mpr_diag_reset(struct mpr_softc *sc,int sleep_flag) { uint32_t reg; int i, error, tries = 0; uint8_t first_wait_done = FALSE; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* Clear any pending interrupts */ mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* * Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. */ #if __FreeBSD_version >= 1000029 if (curthread->td_no_sleeping) #else //__FreeBSD_version < 1000029 if (curthread->td_pflags & TDP_NOSLEEPING) #endif //__FreeBSD_version >= 1000029 sleep_flag = NO_SLEEP; /* Push the magic sequence */ error = ETIMEDOUT; while (tries++ < 20) { for (i = 0; i < sizeof(mpt2_reset_magic); i++) mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, mpt2_reset_magic[i]); /* wait 100 msec */ if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdiag", hz/10); else if (sleep_flag == CAN_SLEEP) pause("mprdiag", hz/10); else DELAY(100 * 1000); reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { error = 0; break; } } if (error) return (error); /* Send the actual reset. XXX need to refresh the reg? */ mpr_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, reg | MPI2_DIAG_RESET_ADAPTER); /* Wait up to 300 seconds in 50ms intervals */ error = ETIMEDOUT; for (i = 0; i < 6000; i++) { /* * Wait 50 msec. If this is the first time through, wait 256 * msec to satisfy Diag Reset timing requirements. */ if (first_wait_done) { if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdiag", hz/20); else if (sleep_flag == CAN_SLEEP) pause("mprdiag", hz/20); else DELAY(50 * 1000); } else { DELAY(256 * 1000); first_wait_done = TRUE; } /* * Check for the RESET_ADAPTER bit to be cleared first, then * wait for the RESET state to be cleared, which takes a little * longer. */ reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_RESET_ADAPTER) { continue; } reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { error = 0; break; } } if (error) return (error); mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); return (0); } static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag) { MPR_FUNCTRACE(sc); mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << MPI2_DOORBELL_FUNCTION_SHIFT); if (mpr_wait_db_ack(sc, 5, sleep_flag) != 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed : <%s>\n", __func__); return (ETIMEDOUT); } return (0); } static int mpr_transition_ready(struct mpr_softc *sc) { uint32_t reg, state; int error, tries = 0; int sleep_flags; MPR_FUNCTRACE(sc); /* If we are in attach call, do not sleep */ sleep_flags = (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE) ? CAN_SLEEP : NO_SLEEP; error = 0; while (tries++ < 1200) { reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_dprint(sc, MPR_INIT, "Doorbell= 0x%x\n", reg); /* * Ensure the IOC is ready to talk. If it's not, try * resetting it. */ if (reg & MPI2_DOORBELL_USED) { mpr_diag_reset(sc, sleep_flags); DELAY(50000); continue; } /* Is the adapter owned by another peer? */ if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { device_printf(sc->mpr_dev, "IOC is under the control " "of another peer host, aborting initialization.\n"); return (ENXIO); } state = reg & MPI2_IOC_STATE_MASK; if (state == MPI2_IOC_STATE_READY) { /* Ready to go! */ error = 0; break; } else if (state == MPI2_IOC_STATE_FAULT) { mpr_dprint(sc, MPR_FAULT, "IOC in fault state 0x%x\n", state & MPI2_DOORBELL_FAULT_CODE_MASK); mpr_diag_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_OPERATIONAL) { /* Need to take ownership */ mpr_message_unit_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_RESET) { /* Wait a bit, IOC might be in transition */ mpr_dprint(sc, MPR_FAULT, "IOC in unexpected reset state\n"); } else { mpr_dprint(sc, MPR_FAULT, "IOC in unknown state 0x%x\n", state); error = EINVAL; break; } /* Wait 50ms for things to settle down. */ DELAY(50000); } if (error) device_printf(sc->mpr_dev, "Cannot transition IOC to ready\n"); return (error); } static int mpr_transition_operational(struct mpr_softc *sc) { uint32_t reg, state; int error; MPR_FUNCTRACE(sc); error = 0; reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_dprint(sc, MPR_INIT, "Doorbell= 0x%x\n", reg); state = reg & MPI2_IOC_STATE_MASK; if (state != MPI2_IOC_STATE_READY) { if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_FAULT, "%s failed to transition ready\n", __func__); return (error); } } error = mpr_send_iocinit(sc); return (error); } /* * This is called during attach and when re-initializing due to a Diag Reset. * IOC Facts is used to allocate many of the structures needed by the driver. * If called from attach, de-allocation is not required because the driver has * not allocated any structures yet, but if called from a Diag Reset, previously * allocated structures based on IOC Facts will need to be freed and re- * allocated bases on the latest IOC Facts. */ static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching) { int error, i; Mpi2IOCFactsReply_t saved_facts; uint8_t saved_mode, reallocating; struct mprsas_lun *lun, *lun_tmp; struct mprsas_target *targ; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* Save old IOC Facts and then only reallocate if Facts have changed */ if (!attaching) { bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); } /* * Get IOC Facts. In all cases throughout this function, panic if doing * a re-initialization and only return the error if attaching so the OS * can handle it. */ if ((error = mpr_get_iocfacts(sc, sc->facts)) != 0) { if (attaching) { mpr_dprint(sc, MPR_FAULT, "%s failed to get IOC Facts " "with error %d\n", __func__, error); return (error); } else { panic("%s failed to get IOC Facts with error %d\n", __func__, error); } } mpr_print_iocfacts(sc, sc->facts); snprintf(sc->fw_version, sizeof(sc->fw_version), "%02d.%02d.%02d.%02d", sc->facts->FWVersion.Struct.Major, sc->facts->FWVersion.Struct.Minor, sc->facts->FWVersion.Struct.Unit, sc->facts->FWVersion.Struct.Dev); mpr_printf(sc, "Firmware: %s, Driver: %s\n", sc->fw_version, MPR_DRIVER_VERSION); mpr_printf(sc, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc"); /* * If the chip doesn't support event replay then a hard reset will be * required to trigger a full discovery. Do the reset here then * retransition to Ready. A hard reset might have already been done, * but it doesn't hurt to do it again. Only do this if attaching, not * for a Diag Reset. */ if (attaching) { if ((sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0) { mpr_diag_reset(sc, NO_SLEEP); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_FAULT, "%s failed to " "transition to ready with error %d\n", __func__, error); return (error); } } } /* * Set flag if IR Firmware is loaded. If the RAID Capability has * changed from the previous IOC Facts, log a warning, but only if * checking this after a Diag Reset and not during attach. */ saved_mode = sc->ir_firmware; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) sc->ir_firmware = 1; if (!attaching) { if (sc->ir_firmware != saved_mode) { mpr_dprint(sc, MPR_FAULT, "%s new IR/IT mode in IOC " "Facts does not match previous mode\n", __func__); } } /* Only deallocate and reallocate if relevant IOC Facts have changed */ reallocating = FALSE; if ((!attaching) && ((saved_facts.MsgVersion != sc->facts->MsgVersion) || (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || (saved_facts.RequestCredit != sc->facts->RequestCredit) || (saved_facts.ProductID != sc->facts->ProductID) || (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || (saved_facts.IOCRequestFrameSize != sc->facts->IOCRequestFrameSize) || (saved_facts.MaxTargets != sc->facts->MaxTargets) || (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || (saved_facts.MaxReplyDescriptorPostQueueDepth != sc->facts->MaxReplyDescriptorPostQueueDepth) || (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || (saved_facts.MaxPersistentEntries != sc->facts->MaxPersistentEntries))) { reallocating = TRUE; } /* * Some things should be done if attaching or re-allocating after a Diag * Reset, but are not needed after a Diag Reset if the FW has not * changed. */ if (attaching || reallocating) { /* * Check if controller supports FW diag buffers and set flag to * enable each type. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. enabled = TRUE; /* * Set flag if EEDP is supported and if TLR is supported. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) sc->eedp_enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) sc->control_TLR = TRUE; /* * Size the queues. Since the reply queues always need one free * entry, we'll just deduct one reply message here. */ sc->num_reqs = MIN(MPR_REQ_FRAMES, sc->facts->RequestCredit); sc->num_replies = MIN(MPR_REPLY_FRAMES + MPR_EVT_REPLY_FRAMES, sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; /* * Initialize all Tail Queues */ TAILQ_INIT(&sc->req_list); TAILQ_INIT(&sc->high_priority_req_list); TAILQ_INIT(&sc->chain_list); TAILQ_INIT(&sc->tm_list); } /* * If doing a Diag Reset and the FW is significantly different * (reallocating will be set above in IOC Facts comparison), then all * buffers based on the IOC Facts will need to be freed before they are * reallocated. */ if (reallocating) { mpr_iocfacts_free(sc); /* * The number of targets is based on IOC Facts, so free all of * the allocated LUNs for each target and then the target buffer * itself. */ for (i=0; i< saved_facts.MaxTargets; i++) { targ = &sc->sassc->targets[i]; SLIST_FOREACH_SAFE(lun, &targ->luns, lun_link, lun_tmp) { free(lun, M_MPR); } } free(sc->sassc->targets, M_MPR); sc->sassc->targets = malloc(sizeof(struct mprsas_target) * sc->facts->MaxTargets, M_MPR, M_WAITOK|M_ZERO); if (!sc->sassc->targets) { panic("%s failed to alloc targets with error %d\n", __func__, ENOMEM); } } /* * Any deallocation has been completed. Now start reallocating * if needed. Will only need to reallocate if attaching or if the new * IOC Facts are different from the previous IOC Facts after a Diag * Reset. Targets have already been allocated above if needed. */ if (attaching || reallocating) { if (((error = mpr_alloc_queues(sc)) != 0) || ((error = mpr_alloc_replies(sc)) != 0) || ((error = mpr_alloc_requests(sc)) != 0)) { if (attaching ) { mpr_dprint(sc, MPR_FAULT, "%s failed to alloc " "queues with error %d\n", __func__, error); mpr_free(sc); return (error); } else { panic("%s failed to alloc queues with error " "%d\n", __func__, error); } } } /* Always initialize the queues */ bzero(sc->free_queue, sc->fqdepth * 4); mpr_init_queues(sc); /* * Always get the chip out of the reset state, but only panic if not * attaching. If attaching and there is an error, that is handled by * the OS. */ error = mpr_transition_operational(sc); if (error != 0) { if (attaching) { mpr_printf(sc, "%s failed to transition to " "operational with error %d\n", __func__, error); mpr_free(sc); return (error); } else { panic("%s failed to transition to operational with " "error %d\n", __func__, error); } } /* * Finish the queue initialization. * These are set here instead of in mpr_init_queues() because the * IOC resets these values during the state transition in * mpr_transition_operational(). The free index is set to 1 * because the corresponding index in the IOC is set to 0, and the * IOC treats the queues as full if both are set to the same value. * Hence the reason that the queue can't hold all of the possible * replies. */ sc->replypostindex = 0; mpr_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); /* * Attach the subsystems so they can prepare their event masks. */ /* XXX Should be dynamic so that IM/IR and user modules can attach */ if (attaching) { if (((error = mpr_attach_log(sc)) != 0) || ((error = mpr_attach_sas(sc)) != 0) || ((error = mpr_attach_user(sc)) != 0)) { mpr_printf(sc, "%s failed to attach all subsystems: " "error %d\n", __func__, error); mpr_free(sc); return (error); } if ((error = mpr_pci_setup_interrupts(sc)) != 0) { mpr_printf(sc, "%s failed to setup interrupts\n", __func__); mpr_free(sc); return (error); } } return (error); } /* * This is called if memory is being free (during detach for example) and when * buffers need to be reallocated due to a Diag Reset. */ static void mpr_iocfacts_free(struct mpr_softc *sc) { struct mpr_command *cm; int i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if (sc->free_busaddr != 0) bus_dmamap_unload(sc->queues_dmat, sc->queues_map); if (sc->free_queue != NULL) bus_dmamem_free(sc->queues_dmat, sc->free_queue, sc->queues_map); if (sc->queues_dmat != NULL) bus_dma_tag_destroy(sc->queues_dmat); if (sc->chain_busaddr != 0) bus_dmamap_unload(sc->chain_dmat, sc->chain_map); if (sc->chain_frames != NULL) bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); if (sc->chain_dmat != NULL) bus_dma_tag_destroy(sc->chain_dmat); if (sc->sense_busaddr != 0) bus_dmamap_unload(sc->sense_dmat, sc->sense_map); if (sc->sense_frames != NULL) bus_dmamem_free(sc->sense_dmat, sc->sense_frames, sc->sense_map); if (sc->sense_dmat != NULL) bus_dma_tag_destroy(sc->sense_dmat); if (sc->reply_busaddr != 0) bus_dmamap_unload(sc->reply_dmat, sc->reply_map); if (sc->reply_frames != NULL) bus_dmamem_free(sc->reply_dmat, sc->reply_frames, sc->reply_map); if (sc->reply_dmat != NULL) bus_dma_tag_destroy(sc->reply_dmat); if (sc->req_busaddr != 0) bus_dmamap_unload(sc->req_dmat, sc->req_map); if (sc->req_frames != NULL) bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); if (sc->req_dmat != NULL) bus_dma_tag_destroy(sc->req_dmat); if (sc->chains != NULL) free(sc->chains, M_MPR); if (sc->commands != NULL) { for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); } free(sc->commands, M_MPR); } if (sc->buffer_dmat != NULL) bus_dma_tag_destroy(sc->buffer_dmat); } /* * The terms diag reset and hard reset are used interchangeably in the MPI * docs to mean resetting the controller chip. In this code diag reset * cleans everything up, and the hard reset function just sends the reset * sequence to the chip. This should probably be refactored so that every * subsystem gets a reset notification of some sort, and can clean up * appropriately. */ int mpr_reinit(struct mpr_softc *sc) { int error; struct mprsas_softc *sassc; sassc = sc->sassc; MPR_FUNCTRACE(sc); mtx_assert(&sc->mpr_mtx, MA_OWNED); if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) { mpr_dprint(sc, MPR_INIT, "%s reset already in progress\n", __func__); return 0; } mpr_dprint(sc, MPR_INFO, "Reinitializing controller,\n"); /* make sure the completion callbacks can recognize they're getting * a NULL cm_reply due to a reset. */ sc->mpr_flags |= MPR_FLAGS_DIAGRESET; /* * Mask interrupts here. */ mpr_dprint(sc, MPR_INIT, "%s mask interrupts\n", __func__); mpr_mask_intr(sc); error = mpr_diag_reset(sc, CAN_SLEEP); if (error != 0) { panic("%s hard reset failed with error %d\n", __func__, error); } /* Restore the PCI state, including the MSI-X registers */ mpr_pci_restore(sc); /* Give the I/O subsystem special priority to get itself prepared */ mprsas_handle_reinit(sc); /* * Get IOC Facts and allocate all structures based on this information. * The attach function will also call mpr_iocfacts_allocate at startup. * If relevant values have changed in IOC Facts, this function will free * all of the memory based on IOC Facts and reallocate that memory. */ if ((error = mpr_iocfacts_allocate(sc, FALSE)) != 0) { panic("%s IOC Facts based allocation failed with error %d\n", __func__, error); } /* * Mapping structures will be re-allocated after getting IOC Page8, so * free these structures here. */ mpr_mapping_exit(sc); /* * The static page function currently read is IOC Page8. Others can be * added in future. It's possible that the values in IOC Page8 have * changed after a Diag Reset due to user modification, so always read * these. Interrupts are masked, so unmask them before getting config * pages. */ mpr_unmask_intr(sc); sc->mpr_flags &= ~MPR_FLAGS_DIAGRESET; mpr_base_static_config_pages(sc); /* * Some mapping info is based in IOC Page8 data, so re-initialize the * mapping tables. */ mpr_mapping_initialize(sc); /* * Restart will reload the event masks clobbered by the reset, and * then enable the port. */ mpr_reregister_events(sc); /* the end of discovery will release the simq, so we're done. */ mpr_dprint(sc, MPR_INFO, "%s finished sc %p post %u free %u\n", __func__, sc, sc->replypostindex, sc->replyfreeindex); mprsas_release_simq_reinit(sassc); return 0; } /* Wait for the chip to ACK a word that we've put into its FIFO * Wait for seconds. In single loop wait for busy loop * for 500 microseconds. * Total is [ 0.5 * (2000 * ) ] in miliseconds. * */ static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag) { u32 cntdn, count; u32 int_status; u32 doorbell; count = 0; cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; do { int_status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { mpr_dprint(sc, MPR_INIT, "%s: successful count(%d), " "timeout(%d)\n", __func__, count, timeout); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { doorbell = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mpr_dprint(sc, MPR_FAULT, "fault_state(0x%04x)!\n", doorbell); return (EFAULT); } } else if (int_status == 0xFFFFFFFF) goto out; /* * If it can sleep, sleep for 1 milisecond, else busy loop for * 0.5 milisecond */ if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdba", hz/1000); else if (sleep_flag == CAN_SLEEP) pause("mprdba", hz/1000); else DELAY(500); count++; } while (--cntdn); out: mpr_dprint(sc, MPR_FAULT, "%s: failed due to timeout count(%d), " "int_status(%x)!\n", __func__, count, int_status); return (ETIMEDOUT); } /* Wait for the chip to signal that the next word in its FIFO can be fetched */ static int mpr_wait_db_int(struct mpr_softc *sc) { int retry; for (retry = 0; retry < MPR_DB_MAX_WAIT; retry++) { if ((mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & MPI2_HIS_IOC2SYS_DB_STATUS) != 0) return (0); DELAY(2000); } return (ETIMEDOUT); } /* Step through the synchronous command state machine, i.e. "Doorbell mode" */ static int mpr_request_sync(struct mpr_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, int req_sz, int reply_sz, int timeout) { uint32_t *data32; uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; #if __FreeBSD_version >= 1000029 if (curthread->td_no_sleeping) #else //__FreeBSD_version < 1000029 if (curthread->td_pflags & TDP_NOSLEEPING) #endif //__FreeBSD_version >= 1000029 sleep_flags = NO_SLEEP; /* Step 1 */ mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Step 2 */ if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) return (EBUSY); /* Step 3 * Announce that a message is coming through the doorbell. Messages * are pushed at 32bit words, so round up if needed. */ count = (req_sz + 3) / 4; mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); /* Step 4 */ if (mpr_wait_db_int(sc) || (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell failed to activate\n"); return (ENXIO); } mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed\n"); return (ENXIO); } /* Step 5 */ /* Clock out the message data synchronously in 32-bit dwords*/ data32 = (uint32_t *)req; for (i = 0; i < count; i++) { mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout while writing doorbell\n"); return (ENXIO); } } /* Step 6 */ /* Clock in the reply in 16-bit words. The total length of the * message is always in the 4th byte, so clock out the first 2 words * manually, then loop the rest. */ data16 = (uint16_t *)reply; if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 0\n"); return (ENXIO); } data16[0] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 1\n"); return (ENXIO); } data16[1] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Number of 32bit words in the message */ ioc_sz = reply->MsgLength; /* * Figure out how many 16bit words to clock in without overrunning. * The precision loss with dividing reply_sz can safely be * ignored because the messages can only be multiples of 32bits. */ residual = 0; count = MIN((reply_sz / 4), ioc_sz) * 2; if (count < ioc_sz * 2) { residual = ioc_sz * 2 - count; mpr_dprint(sc, MPR_ERROR, "Driver error, throwing away %d " "residual message words\n", residual); } for (i = 2; i < count; i++) { if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell %d\n", i); return (ENXIO); } data16[i] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* * Pull out residual words that won't fit into the provided buffer. * This keeps the chip from hanging due to a driver programming * error. */ while (residual--) { if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell\n"); return (ENXIO); } (void)mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* Step 7 */ if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout waiting to exit doorbell\n"); return (ENXIO); } if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) mpr_dprint(sc, MPR_FAULT, "Warning, doorbell still active\n"); mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); return (0); } static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm) { reply_descriptor rd; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_TRACE, "%s SMID %u cm %p ccb %p\n", __func__, cm->cm_desc.Default.SMID, cm, cm->cm_ccb); if (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE && !(sc->mpr_flags & MPR_FLAGS_SHUTDOWN)) mtx_assert(&sc->mpr_mtx, MA_OWNED); if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; rd.u.low = cm->cm_desc.Words.Low; rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); /* TODO-We may need to make below regwrite atomic */ mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, rd.u.high); } /* * Just the FACTS, ma'am. */ static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts) { MPI2_DEFAULT_REPLY *reply; MPI2_IOC_FACTS_REQUEST request; int error, req_sz, reply_sz; MPR_FUNCTRACE(sc); req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); reply = (MPI2_DEFAULT_REPLY *)facts; bzero(&request, req_sz); request.Function = MPI2_FUNCTION_IOC_FACTS; error = mpr_request_sync(sc, &request, reply, req_sz, reply_sz, 5); return (error); } static int mpr_send_iocinit(struct mpr_softc *sc) { MPI2_IOC_INIT_REQUEST init; MPI2_DEFAULT_REPLY reply; int req_sz, reply_sz, error; struct timeval now; uint64_t time_in_msec; MPR_FUNCTRACE(sc); req_sz = sizeof(MPI2_IOC_INIT_REQUEST); reply_sz = sizeof(MPI2_IOC_INIT_REPLY); bzero(&init, req_sz); bzero(&reply, reply_sz); /* * Fill in the init block. Note that most addresses are * deliberately in the lower 32bits of memory. This is a micro- * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. */ init.Function = MPI2_FUNCTION_IOC_INIT; init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; init.MsgVersion = htole16(MPI2_VERSION); init.HeaderVersion = htole16(MPI2_HEADER_VERSION); init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize); init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); init.ReplyFreeQueueDepth = htole16(sc->fqdepth); init.SenseBufferAddressHigh = 0; init.SystemReplyAddressHigh = 0; init.SystemRequestFrameBaseAddress.High = 0; init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr); init.ReplyDescriptorPostQueueAddress.High = 0; init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr); init.ReplyFreeQueueAddress.High = 0; init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); getmicrotime(&now); time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); error = mpr_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mpr_dprint(sc, MPR_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus); return (error); } void mpr_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int mpr_alloc_queues(struct mpr_softc *sc) { bus_addr_t queues_busaddr; uint8_t *queues; int qsize, fqsize, pqsize; /* * The reply free queue contains 4 byte entries in multiples of 16 and * aligned on a 16 byte boundary. There must always be an unused entry. * This queue supplies fresh reply frames for the firmware to use. * * The reply descriptor post queue contains 8 byte entries in * multiples of 16 and aligned on a 16 byte boundary. This queue * contains filled-in reply frames sent from the firmware to the host. * * These two queues are allocated together for simplicity. */ sc->fqdepth = roundup2((sc->num_replies + 1), 16); sc->pqdepth = roundup2((sc->num_replies + 1), 16); fqsize= sc->fqdepth * 4; pqsize = sc->pqdepth * 8; qsize = fqsize + pqsize; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ qsize, /* maxsize */ 1, /* nsegments */ qsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->queues_dmat)) { device_printf(sc->mpr_dev, "Cannot allocate queues DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, &sc->queues_map)) { device_printf(sc->mpr_dev, "Cannot allocate queues memory\n"); return (ENOMEM); } bzero(queues, qsize); bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, mpr_memaddr_cb, &queues_busaddr, 0); sc->free_queue = (uint32_t *)queues; sc->free_busaddr = queues_busaddr; sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); sc->post_busaddr = queues_busaddr + fqsize; return (0); } static int mpr_alloc_replies(struct mpr_softc *sc) { int rsize, num_replies; /* * sc->num_replies should be one less than sc->fqdepth. We need to * allocate space for sc->fqdepth replies, but only sc->num_replies * replies can be used at once. */ num_replies = max(sc->fqdepth, sc->num_replies); rsize = sc->facts->ReplyFrameSize * num_replies * 4; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->reply_dmat)) { device_printf(sc->mpr_dev, "Cannot allocate replies DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, BUS_DMA_NOWAIT, &sc->reply_map)) { device_printf(sc->mpr_dev, "Cannot allocate replies memory\n"); return (ENOMEM); } bzero(sc->reply_frames, rsize); bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, mpr_memaddr_cb, &sc->reply_busaddr, 0); return (0); } static int mpr_alloc_requests(struct mpr_softc *sc) { struct mpr_command *cm; struct mpr_chain *chain; int i, rsize, nsegs; rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->req_dmat)) { device_printf(sc->mpr_dev, "Cannot allocate request DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, BUS_DMA_NOWAIT, &sc->req_map)) { device_printf(sc->mpr_dev, "Cannot allocate request memory\n"); return (ENOMEM); } bzero(sc->req_frames, rsize); bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, mpr_memaddr_cb, &sc->req_busaddr, 0); rsize = sc->facts->IOCRequestFrameSize * sc->max_chains * 4; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->chain_dmat)) { device_printf(sc->mpr_dev, "Cannot allocate chain DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, BUS_DMA_NOWAIT, &sc->chain_map)) { device_printf(sc->mpr_dev, "Cannot allocate chain memory\n"); return (ENOMEM); } bzero(sc->chain_frames, rsize); bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, mpr_memaddr_cb, &sc->chain_busaddr, 0); rsize = MPR_SENSE_LEN * sc->num_reqs; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sense_dmat)) { device_printf(sc->mpr_dev, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, BUS_DMA_NOWAIT, &sc->sense_map)) { device_printf(sc->mpr_dev, "Cannot allocate sense memory\n"); return (ENOMEM); } bzero(sc->sense_frames, rsize); bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, mpr_memaddr_cb, &sc->sense_busaddr, 0); sc->chains = malloc(sizeof(struct mpr_chain) * sc->max_chains, M_MPR, M_WAITOK | M_ZERO); if (!sc->chains) { device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } for (i = 0; i < sc->max_chains; i++) { chain = &sc->chains[i]; chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames + i * sc->facts->IOCRequestFrameSize * 4); chain->chain_busaddr = sc->chain_busaddr + i * sc->facts->IOCRequestFrameSize * 4; mpr_free_chain(sc, chain); sc->chain_free_lowwater++; } /* XXX Need to pick a more precise value */ nsegs = (MAXPHYS / PAGE_SIZE) + 1; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ nsegs, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->mpr_mtx, /* lockarg */ &sc->buffer_dmat)) { device_printf(sc->mpr_dev, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * SMID 0 cannot be used as a free command per the firmware spec. * Just drop that command instead of risking accounting bugs. */ sc->commands = malloc(sizeof(struct mpr_command) * sc->num_reqs, M_MPR, M_WAITOK | M_ZERO); if (!sc->commands) { device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; cm->cm_req = sc->req_frames + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_req_busaddr = sc->req_busaddr + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_sense = &sc->sense_frames[i]; cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; TAILQ_INIT(&cm->cm_chain_list); callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0); /* XXX Is a failure here a critical problem? */ if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0) if (i <= sc->facts->HighPriorityCredit) mpr_free_high_priority_command(sc, cm); else mpr_free_command(sc, cm); else { panic("failed to allocate command %d\n", i); sc->num_reqs = i; break; } } return (0); } static int mpr_init_queues(struct mpr_softc *sc) { int i; memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); /* * According to the spec, we need to use one less reply than we * have space for on the queue. So sc->num_replies (the number we * use) should be less than sc->fqdepth (allocated size). */ if (sc->num_replies >= sc->fqdepth) return (EINVAL); /* * Initialize all of the free queue entries. */ for (i = 0; i < sc->fqdepth; i++) sc->free_queue[i] = sc->reply_busaddr + (i * sc->facts->ReplyFrameSize * 4); sc->replyfreeindex = sc->num_replies; return (0); } /* Get the driver parameter tunables. Lowest priority are the driver defaults. * Next are the global settings, if they exist. Highest are the per-unit * settings, if they exist. */ static void mpr_get_tunables(struct mpr_softc *sc) { char tmpstr[80]; /* XXX default to some debugging for now */ sc->mpr_debug = MPR_INFO | MPR_FAULT; sc->disable_msix = 0; sc->disable_msi = 0; sc->max_chains = MPR_CHAIN_FRAMES; /* * Grab the global variables. */ TUNABLE_INT_FETCH("hw.mpr.debug_level", &sc->mpr_debug); TUNABLE_INT_FETCH("hw.mpr.disable_msix", &sc->disable_msix); TUNABLE_INT_FETCH("hw.mpr.disable_msi", &sc->disable_msi); TUNABLE_INT_FETCH("hw.mpr.max_chains", &sc->max_chains); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.debug_level", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->mpr_debug); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msix", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msi", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_chains", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); bzero(sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.exclude_ids", device_get_unit(sc->mpr_dev)); TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids)); } static void mpr_setup_sysctl(struct mpr_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MPR controller %d", device_get_unit(sc->mpr_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mpr_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mpr_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mpr_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mpr), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mpr_debug, 0, "mpr debug level"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, "Disable the use of MSI-X interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0, "Disable the use of MSI interrupts"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), - OID_AUTO, "firmware_version", CTLFLAG_RW, &sc->fw_version, + OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version, strlen(sc->fw_version), "firmware version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RW, MPR_DRIVER_VERSION, strlen(MPR_DRIVER_VERSION), "driver version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_active", CTLFLAG_RD, &sc->io_cmds_active, 0, "number of currently active commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "maximum active commands seen"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free", CTLFLAG_RD, &sc->chain_free, 0, "number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_chains", CTLFLAG_RD, &sc->max_chains, 0,"maximum chain frames that will be allocated"); #if __FreeBSD_version >= 900030 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, &sc->chain_alloc_fail, "chain allocation failures"); #endif //FreeBSD_version >= 900030 } int mpr_attach(struct mpr_softc *sc) { int error; mpr_get_tunables(sc); MPR_FUNCTRACE(sc); mtx_init(&sc->mpr_mtx, "MPR lock", NULL, MTX_DEF); callout_init_mtx(&sc->periodic, &sc->mpr_mtx, 0); TAILQ_INIT(&sc->event_list); timevalclear(&sc->lastfail); if ((error = mpr_transition_ready(sc)) != 0) { mpr_printf(sc, "%s failed to transition ready\n", __func__); return (error); } sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPR, M_ZERO|M_NOWAIT); if (!sc->facts) { device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } /* * Get IOC Facts and allocate all structures based on this information. * A Diag Reset will also call mpr_iocfacts_allocate and re-read the IOC * Facts. If relevant values have changed in IOC Facts, this function * will free all of the memory based on IOC Facts and reallocate that * memory. If this fails, any allocated memory should already be freed. */ if ((error = mpr_iocfacts_allocate(sc, TRUE)) != 0) { mpr_dprint(sc, MPR_FAULT, "%s IOC Facts based allocation " "failed with error %d\n", __func__, error); return (error); } /* Start the periodic watchdog check on the IOC Doorbell */ mpr_periodic(sc); /* * The portenable will kick off discovery events that will drive the * rest of the initialization process. The CAM/SAS module will * hold up the boot sequence until discovery is complete. */ sc->mpr_ich.ich_func = mpr_startup; sc->mpr_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mpr_ich) != 0) { mpr_dprint(sc, MPR_ERROR, "Cannot establish MPR config hook\n"); error = EINVAL; } /* * Allow IR to shutdown gracefully when shutdown occurs. */ sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, mprsas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); if (sc->shutdown_eh == NULL) mpr_dprint(sc, MPR_ERROR, "shutdown event registration " "failed\n"); mpr_setup_sysctl(sc); sc->mpr_flags |= MPR_FLAGS_ATTACH_DONE; return (error); } /* Run through any late-start handlers. */ static void mpr_startup(void *arg) { struct mpr_softc *sc; sc = (struct mpr_softc *)arg; mpr_lock(sc); mpr_unmask_intr(sc); /* initialize device mapping tables */ mpr_base_static_config_pages(sc); mpr_mapping_initialize(sc); mprsas_startup(sc); mpr_unlock(sc); } /* Periodic watchdog. Is called with the driver lock already held. */ static void mpr_periodic(void *arg) { struct mpr_softc *sc; uint32_t db; sc = (struct mpr_softc *)arg; if (sc->mpr_flags & MPR_FLAGS_SHUTDOWN) return; db = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { if ((db & MPI2_DOORBELL_FAULT_CODE_MASK) == IFAULT_IOP_OVER_TEMP_THRESHOLD_EXCEEDED) { panic("TEMPERATURE FAULT: STOPPING."); } mpr_dprint(sc, MPR_FAULT, "IOC Fault 0x%08x, Resetting\n", db); mpr_reinit(sc); } callout_reset(&sc->periodic, MPR_PERIODIC_DELAY * hz, mpr_periodic, sc); } static void mpr_log_evt_handler(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *event) { MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; mpr_print_event(sc, event); switch (event->Event) { case MPI2_EVENT_LOG_DATA: mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_DATA:\n"); if (sc->mpr_debug & MPR_EVENT) hexdump(event->EventData, event->EventDataLength, NULL, 0); break; case MPI2_EVENT_LOG_ENTRY_ADDED: entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " "0x%x Sequence %d:\n", entry->LogEntryQualifier, entry->LogSequence); break; default: break; } return; } static int mpr_attach_log(struct mpr_softc *sc) { uint8_t events[16]; bzero(events, 16); setbit(events, MPI2_EVENT_LOG_DATA); setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); mpr_register_events(sc, events, mpr_log_evt_handler, NULL, &sc->mpr_log_eh); return (0); } static int mpr_detach_log(struct mpr_softc *sc) { if (sc->mpr_log_eh != NULL) mpr_deregister_events(sc, sc->mpr_log_eh); return (0); } /* * Free all of the driver resources and detach submodules. Should be called * without the lock held. */ int mpr_free(struct mpr_softc *sc) { int error; /* Turn off the watchdog */ mpr_lock(sc); sc->mpr_flags |= MPR_FLAGS_SHUTDOWN; mpr_unlock(sc); /* Lock must not be held for this */ callout_drain(&sc->periodic); if (((error = mpr_detach_log(sc)) != 0) || ((error = mpr_detach_sas(sc)) != 0)) return (error); mpr_detach_user(sc); /* Put the IOC back in the READY state. */ mpr_lock(sc); if ((error = mpr_transition_ready(sc)) != 0) { mpr_unlock(sc); return (error); } mpr_unlock(sc); if (sc->facts != NULL) free(sc->facts, M_MPR); /* * Free all buffers that are based on IOC Facts. A Diag Reset may need * to free these buffers too. */ mpr_iocfacts_free(sc); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); /* Deregister the shutdown function */ if (sc->shutdown_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); mtx_destroy(&sc->mpr_mtx); return (0); } static __inline void mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm) { MPR_FUNCTRACE(sc); if (cm == NULL) { mpr_dprint(sc, MPR_ERROR, "Completing NULL command\n"); return; } if (cm->cm_flags & MPR_CM_FLAGS_POLLED) cm->cm_flags |= MPR_CM_FLAGS_COMPLETE; if (cm->cm_complete != NULL) { mpr_dprint(sc, MPR_TRACE, "%s cm %p calling cm_complete %p data %p reply %p\n", __func__, cm, cm->cm_complete, cm->cm_complete_data, cm->cm_reply); cm->cm_complete(sc, cm); } if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) { mpr_dprint(sc, MPR_TRACE, "waking up %p\n", cm); wakeup(cm); } if (sc->io_cmds_active != 0) { sc->io_cmds_active--; } else { mpr_dprint(sc, MPR_ERROR, "Warning: io_cmds_active is " "out of sync - resynching to 0\n"); } } static void mpr_sas_log_info(struct mpr_softc *sc , u32 log_info) { union loginfo_type { u32 loginfo; struct { u32 subcode:16; u32 code:8; u32 originator:4; u32 bus_type:4; } dw; }; union loginfo_type sas_loginfo; char *originator_str = NULL; sas_loginfo.loginfo = log_info; if (sas_loginfo.dw.bus_type != 3 /*SAS*/) return; /* each nexus loss loginfo */ if (log_info == 0x31170000) return; /* eat the loginfos associated with task aborts */ if ((log_info == 30050000) || (log_info == 0x31140000) || (log_info == 0x31130000)) return; switch (sas_loginfo.dw.originator) { case 0: originator_str = "IOP"; break; case 1: originator_str = "PL"; break; case 2: originator_str = "IR"; break; } mpr_dprint(sc, MPR_INFO, "log_info(0x%08x): originator(%s), " "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode); } static void mpr_display_reply_info(struct mpr_softc *sc, uint8_t *reply) { MPI2DefaultReply_t *mpi_reply; u16 sc_status; mpi_reply = (MPI2DefaultReply_t*)reply; sc_status = le16toh(mpi_reply->IOCStatus); if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) mpr_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); } void mpr_intr(void *data) { struct mpr_softc *sc; uint32_t status; sc = (struct mpr_softc *)data; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* * Check interrupt status register to flush the bus. This is * needed for both INTx interrupts and driver-driven polling */ status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) return; mpr_lock(sc); mpr_intr_locked(data); mpr_unlock(sc); return; } /* * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the * chip. Hopefully this theory is correct. */ void mpr_intr_msi(void *data) { struct mpr_softc *sc; sc = (struct mpr_softc *)data; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); mpr_lock(sc); mpr_intr_locked(data); mpr_unlock(sc); return; } /* * The locking is overly broad and simplistic, but easy to deal with for now. */ void mpr_intr_locked(void *data) { MPI2_REPLY_DESCRIPTORS_UNION *desc; struct mpr_softc *sc; struct mpr_command *cm = NULL; uint8_t flags; u_int pq; MPI2_DIAG_RELEASE_REPLY *rel_rep; mpr_fw_diagnostic_buffer_t *pBuffer; sc = (struct mpr_softc *)data; pq = sc->replypostindex; mpr_dprint(sc, MPR_TRACE, "%s sc %p starting with replypostindex %u\n", __func__, sc, sc->replypostindex); for ( ;; ) { cm = NULL; desc = &sc->post_queue[sc->replypostindex]; flags = desc->Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || (le32toh(desc->Words.High) == 0xffffffff)) break; /* increment the replypostindex now, so that event handlers * and cm completion handlers which decide to do a diag * reset can zero it without it getting incremented again * afterwards, and we break out of this loop on the next * iteration since the reply post queue has been cleared to * 0xFF and all descriptors look unused (which they are). */ if (++sc->replypostindex >= sc->pqdepth) sc->replypostindex = 0; switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: { uint32_t baddr; uint8_t *reply; /* * Re-compose the reply address from the address * sent back from the chip. The ReplyFrameAddress * is the lower 32 bits of the physical address of * particular reply frame. Convert that address to * host format, and then use that to provide the * offset against the virtual address base * (sc->reply_frames). */ baddr = le32toh(desc->AddressReply.ReplyFrameAddress); reply = sc->reply_frames + (baddr - ((uint32_t)sc->reply_busaddr)); /* * Make sure the reply we got back is in a valid * range. If not, go ahead and panic here, since * we'll probably panic as soon as we deference the * reply pointer anyway. */ if ((reply < sc->reply_frames) || (reply > (sc->reply_frames + (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) { printf("%s: WARNING: reply %p out of range!\n", __func__, reply); printf("%s: reply_frames %p, fqdepth %d, " "frame size %d\n", __func__, sc->reply_frames, sc->fqdepth, sc->facts->ReplyFrameSize * 4); printf("%s: baddr %#x,\n", __func__, baddr); /* LSI-TODO. See Linux Code for Graceful exit */ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { if (((MPI2_DEFAULT_REPLY *)reply)->Function == MPI2_FUNCTION_DIAG_BUFFER_POST) { /* * If SMID is 0 for Diag Buffer Post, * this implies that the reply is due to * a release function with a status that * the buffer has been released. Set * the buffer flags accordingly. */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; if (le16toh(rel_rep->IOCStatus) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; pBuffer->valid_data = TRUE; pBuffer->owned_by_firmware = FALSE; pBuffer->immediate = FALSE; } } else mpr_dispatch_event(sc, baddr, (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; cm->cm_reply = reply; cm->cm_reply_data = le32toh(desc->AddressReply. ReplyFrameAddress); } break; } case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: default: /* Unhandled */ mpr_dprint(sc, MPR_ERROR, "Unhandled reply 0x%x\n", desc->Default.ReplyFlags); cm = NULL; break; } if (cm != NULL) { // Print Error reply frame if (cm->cm_reply) mpr_display_reply_info(sc,cm->cm_reply); mpr_complete_command(sc, cm); } desc->Words.Low = 0xffffffff; desc->Words.High = 0xffffffff; } if (pq != sc->replypostindex) { mpr_dprint(sc, MPR_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); } return; } static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mpr_event_handle *eh; int event, handled = 0; event = le16toh(reply->Event); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { if (isset(eh->mask, event)) { eh->callback(sc, data, reply); handled++; } } if (handled == 0) mpr_dprint(sc, MPR_EVENT, "Unhandled event 0x%x\n", le16toh(event)); /* * This is the only place that the event/reply should be freed. * Anything wanting to hold onto the event data should have * already copied it into their own storage. */ mpr_free_reply(sc, data); } static void mpr_reregister_events_complete(struct mpr_softc *sc, struct mpr_command *cm) { mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if (cm->cm_reply) mpr_print_event(sc, (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); mpr_free_command(sc, cm); /* next, send a port enable */ mprsas_startup(sc); } /* * For both register_events and update_events, the caller supplies a bitmap * of events that it _wants_. These functions then turn that into a bitmask * suitable for the controller. */ int mpr_register_events(struct mpr_softc *sc, uint8_t *mask, mpr_evt_callback_t *cb, void *data, struct mpr_event_handle **handle) { struct mpr_event_handle *eh; int error = 0; eh = malloc(sizeof(struct mpr_event_handle), M_MPR, M_WAITOK|M_ZERO); if (!eh) { device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } eh->callback = cb; eh->data = data; TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); if (mask != NULL) error = mpr_update_events(sc, eh, mask); *handle = eh; return (error); } int mpr_update_events(struct mpr_softc *sc, struct mpr_event_handle *handle, uint8_t *mask) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; MPI2_EVENT_NOTIFICATION_REPLY *reply; struct mpr_command *cm; struct mpr_event_handle *eh; int error, i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if ((mask != NULL) && (handle != NULL)) bcopy(mask, &handle->mask[0], 16); memset(sc->event_mask, 0xff, 16); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < 16; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mpr_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPR_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); } #else bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; error = mpr_request_polled(sc, cm); reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; if ((reply == NULL) || (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; if(reply) mpr_print_event(sc, reply); mpr_dprint(sc, MPR_TRACE, "%s finished error %d\n", __func__, error); mpr_free_command(sc, cm); return (error); } static int mpr_reregister_events(struct mpr_softc *sc) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; struct mpr_command *cm; struct mpr_event_handle *eh; int error, i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* first, reregister events */ memset(sc->event_mask, 0xff, 16); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < 16; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mpr_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPR_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); } #else bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; cm->cm_complete = mpr_reregister_events_complete; error = mpr_map_command(sc, cm); mpr_dprint(sc, MPR_TRACE, "%s finished with error %d\n", __func__, error); return (error); } int mpr_deregister_events(struct mpr_softc *sc, struct mpr_event_handle *handle) { TAILQ_REMOVE(&sc->event_list, handle, eh_list); free(handle, M_MPR); return (mpr_update_events(sc, NULL, NULL)); } /* * Add a chain element as the next SGE for the specified command. * Reset cm_sge and cm_sgesize to indicate all the available space. Chains are * only required for IEEE commands. Therefore there is no code for commands * that have the MPR_CM_FLAGS_SGE_SIMPLE flag set (and those commands shouldn't * be requesting chains). */ static int mpr_add_chain(struct mpr_command *cm, int segsleft) { struct mpr_softc *sc = cm->cm_sc; MPI2_REQUEST_HEADER *req; MPI25_IEEE_SGE_CHAIN64 *ieee_sgc; struct mpr_chain *chain; int space, sgc_size, current_segs, rem_segs, segs_per_frame; uint8_t next_chain_offset = 0; /* * Fail if a command is requesting a chain for SIMPLE SGE's. For SAS3 * only IEEE commands should be requesting chains. Return some error * code other than 0. */ if (cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE) { mpr_dprint(sc, MPR_ERROR, "A chain element cannot be added to " "an MPI SGL.\n"); return(ENOBUFS); } sgc_size = sizeof(MPI25_IEEE_SGE_CHAIN64); if (cm->cm_sglsize < sgc_size) panic("MPR: Need SGE Error Code\n"); chain = mpr_alloc_chain(cm->cm_sc); if (chain == NULL) return (ENOBUFS); space = (int)cm->cm_sc->facts->IOCRequestFrameSize * 4; /* * Note: a double-linked list is used to make it easier to walk for * debugging. */ TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); /* * Need to know if the number of frames left is more than 1 or not. If * more than 1 frame is required, NextChainOffset will need to be set, * which will just be the last segment of the frame. */ rem_segs = 0; if (cm->cm_sglsize < (sgc_size * segsleft)) { /* * rem_segs is the number of segements remaining after the * segments that will go into the current frame. Since it is * known that at least one more frame is required, account for * the chain element. To know if more than one more frame is * required, just check if there will be a remainder after using * the current frame (with this chain) and the next frame. If * so the NextChainOffset must be the last element of the next * frame. */ current_segs = (cm->cm_sglsize / sgc_size) - 1; rem_segs = segsleft - current_segs; segs_per_frame = space / sgc_size; if (rem_segs > segs_per_frame) { next_chain_offset = segs_per_frame - 1; } } ieee_sgc = &((MPI25_SGE_IO_UNION *)cm->cm_sge)->IeeeChain; ieee_sgc->Length = next_chain_offset ? htole32((uint32_t)space) : htole32((uint32_t)rem_segs * (uint32_t)sgc_size); ieee_sgc->NextChainOffset = next_chain_offset; ieee_sgc->Flags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); ieee_sgc->Address.Low = htole32(chain->chain_busaddr); ieee_sgc->Address.High = htole32(chain->chain_busaddr >> 32); cm->cm_sge = &((MPI25_SGE_IO_UNION *)chain->chain)->IeeeSimple; req = (MPI2_REQUEST_HEADER *)cm->cm_req; req->ChainOffset = ((sc->facts->IOCRequestFrameSize * 4) - sgc_size) >> 4; cm->cm_sglsize = space; return (0); } /* * Add one scatter-gather element to the scatter-gather list for a command. * Maintain cm_sglsize and cm_sge as the remaining size and pointer to the next * SGE to fill in, respectively. In Gen3, the MPI SGL does not have a chain, * so don't consider any chain additions. */ int mpr_push_sge(struct mpr_command *cm, MPI2_SGE_SIMPLE64 *sge, size_t len, int segsleft) { uint32_t saved_buf_len, saved_address_low, saved_address_high; u32 sge_flags; /* * case 1: >=1 more segment, no room for anything (error) * case 2: 1 more segment and enough room for it */ if (cm->cm_sglsize < (segsleft * sizeof(MPI2_SGE_SIMPLE64))) { mpr_dprint(cm->cm_sc, MPR_ERROR, "%s: warning: Not enough room for MPI SGL in frame.\n", __func__); return(ENOBUFS); } KASSERT(segsleft == 1, ("segsleft cannot be more than 1 for an MPI SGL; segsleft = %d\n", segsleft)); /* * There is one more segment left to add for the MPI SGL and there is * enough room in the frame to add it. This is the normal case because * MPI SGL's don't have chains, otherwise something is wrong. * * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->FlagsLength & 0x00FFFFFF; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->FlagsLength = cm->cm_out_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sge, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); } sge->FlagsLength = saved_buf_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); if (cm->cm_flags & MPR_CM_FLAGS_DATAIN) { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << MPI2_SGE_FLAGS_SHIFT); } else { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << MPI2_SGE_FLAGS_SHIFT); } sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sge, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (0); } /* * Add one IEEE scatter-gather element (chain or simple) to the IEEE scatter- * gather list for a command. Maintain cm_sglsize and cm_sge as the * remaining size and pointer to the next SGE to fill in, respectively. */ int mpr_push_ieee_sge(struct mpr_command *cm, void *sgep, int segsleft) { MPI2_IEEE_SGE_SIMPLE64 *sge = sgep; int error, ieee_sge_size = sizeof(MPI25_SGE_IO_UNION); uint32_t saved_buf_len, saved_address_low, saved_address_high; uint32_t sge_length; /* * case 1: No room for chain or segment (error). * case 2: Two or more segments left but only room for chain. * case 3: Last segment and room for it, so set flags. */ /* * There should be room for at least one element, or there is a big * problem. */ if (cm->cm_sglsize < ieee_sge_size) panic("MPR: Need SGE Error Code\n"); if ((segsleft >= 2) && (cm->cm_sglsize < (ieee_sge_size * 2))) { if ((error = mpr_add_chain(cm, segsleft)) != 0) return (error); } if (segsleft == 1) { /* * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->Length; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->Length = cm->cm_out_len; sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); cm->cm_sglsize -= ieee_sge_size; /* Endian Safe code */ sge_length = sge->Length; sge->Length = htole32(sge_length); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, ieee_sge_size); cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + ieee_sge_size); } sge->Length = saved_buf_len; sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | MPI25_IEEE_SGE_FLAGS_END_OF_LIST); sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; } cm->cm_sglsize -= ieee_sge_size; /* Endian Safe code */ sge_length = sge->Length; sge->Length = htole32(sge_length); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, ieee_sge_size); cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + ieee_sge_size); return (0); } /* * Add one dma segment to the scatter-gather list for a command. */ int mpr_add_dmaseg(struct mpr_command *cm, vm_paddr_t pa, size_t len, u_int flags, int segsleft) { MPI2_SGE_SIMPLE64 sge; MPI2_IEEE_SGE_SIMPLE64 ieee_sge; if (!(cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE)) { ieee_sge.Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); ieee_sge.Length = len; mpr_from_u64(pa, &ieee_sge.Address); return (mpr_push_ieee_sge(cm, &ieee_sge, segsleft)); } else { /* * This driver always uses 64-bit address elements for * simplicity. */ flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING; /* Set Endian safe macro in mpr_push_sge */ sge.FlagsLength = len | (flags << MPI2_SGE_FLAGS_SHIFT); mpr_from_u64(pa, &sge.Address); return (mpr_push_sge(cm, &sge, sizeof sge, segsleft)); } } static void mpr_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mpr_softc *sc; struct mpr_command *cm; u_int i, dir, sflags; cm = (struct mpr_command *)arg; sc = cm->cm_sc; /* * In this case, just print out a warning and let the chip tell the * user they did the wrong thing. */ if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { mpr_dprint(sc, MPR_ERROR, "%s: warning: busdma returned %d segments, " "more than the %d allowed\n", __func__, nsegs, cm->cm_max_segs); } /* * Set up DMA direction flags. Bi-directional requests are also handled * here. In that case, both direction flags will be set. */ sflags = 0; if (cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) { /* * We have to add a special case for SMP passthrough, there * is no easy way to generically handle it. The first * S/G element is used for the command (therefore the * direction bit needs to be set). The second one is used * for the reply. We'll leave it to the caller to make * sure we only have two buffers. */ /* * Even though the busdma man page says it doesn't make * sense to have both direction flags, it does in this case. * We have one s/g element being accessed in each direction. */ dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; /* * Set the direction flag on the first buffer in the SMP * passthrough request. We'll clear it for the second one. */ sflags |= MPI2_SGE_FLAGS_DIRECTION | MPI2_SGE_FLAGS_END_OF_BUFFER; } else if (cm->cm_flags & MPR_CM_FLAGS_DATAOUT) { sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; dir = BUS_DMASYNC_PREWRITE; } else dir = BUS_DMASYNC_PREREAD; for (i = 0; i < nsegs; i++) { if ((cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) && (i != 0)) { sflags &= ~MPI2_SGE_FLAGS_DIRECTION; } error = mpr_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, sflags, nsegs - i); if (error != 0) { /* Resource shortage, roll back! */ if (ratecheck(&sc->lastfail, &mpr_chainfail_interval)) mpr_dprint(sc, MPR_INFO, "Out of chain frames, " "consider increasing hw.mpr.max_chains.\n"); cm->cm_flags |= MPR_CM_FLAGS_CHAIN_FAILED; mpr_complete_command(sc, cm); return; } } bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); mpr_enqueue_request(sc, cm); return; } static void mpr_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { mpr_data_cb(arg, segs, nsegs, error); } /* * This is the routine to enqueue commands ansynchronously. * Note that the only error path here is from bus_dmamap_load(), which can * return EINPROGRESS if it is waiting for resources. Other than this, it's * assumed that if you have a command in-hand, then you have enough credits * to use it. */ int mpr_map_command(struct mpr_softc *sc, struct mpr_command *cm) { int error = 0; if (cm->cm_flags & MPR_CM_FLAGS_USE_UIO) { error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, &cm->cm_uio, mpr_data_cb2, cm, 0); } else if (cm->cm_flags & MPR_CM_FLAGS_USE_CCB) { error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, mpr_data_cb, cm, 0); } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_length, mpr_data_cb, cm, 0); } else { /* Add a zero-length element as needed */ if (cm->cm_sge != NULL) mpr_add_dmaseg(cm, 0, 0, 0, 1); mpr_enqueue_request(sc, cm); } return (error); } /* * This is the routine to enqueue commands synchronously. An error of * EINPROGRESS from mpr_map_command() is ignored since the command will * be executed and enqueued automatically. Other errors come from msleep(). */ int mpr_wait_command(struct mpr_softc *sc, struct mpr_command *cm, int timeout, int sleep_flag) { int error, rc; struct timeval cur_time, start_time; if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) return EBUSY; cm->cm_complete = NULL; cm->cm_flags |= (MPR_CM_FLAGS_WAKEUP + MPR_CM_FLAGS_POLLED); error = mpr_map_command(sc, cm); if ((error != 0) && (error != EINPROGRESS)) return (error); // Check for context and wait for 50 mSec at a time until time has // expired or the command has finished. If msleep can't be used, need // to poll. #if __FreeBSD_version >= 1000029 if (curthread->td_no_sleeping) #else //__FreeBSD_version < 1000029 if (curthread->td_pflags & TDP_NOSLEEPING) #endif //__FreeBSD_version >= 1000029 sleep_flag = NO_SLEEP; getmicrotime(&start_time); if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) { error = msleep(cm, &sc->mpr_mtx, 0, "mprwait", timeout*hz); } else { while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { mpr_intr_locked(sc); if (sleep_flag == CAN_SLEEP) pause("mprwait", hz/20); else DELAY(50000); getmicrotime(&cur_time); if ((cur_time.tv_sec - start_time.tv_sec) > timeout) { error = EWOULDBLOCK; break; } } } if (error == EWOULDBLOCK) { mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s\n", __func__); rc = mpr_reinit(sc); mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); error = ETIMEDOUT; } return (error); } /* * This is the routine to enqueue a command synchonously and poll for * completion. Its use should be rare. */ int mpr_request_polled(struct mpr_softc *sc, struct mpr_command *cm) { int error, timeout = 0, rc; struct timeval cur_time, start_time; error = 0; cm->cm_flags |= MPR_CM_FLAGS_POLLED; cm->cm_complete = NULL; mpr_map_command(sc, cm); getmicrotime(&start_time); while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { mpr_intr_locked(sc); if (mtx_owned(&sc->mpr_mtx)) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprpoll", hz/20); else pause("mprpoll", hz/20); /* * Check for real-time timeout and fail if more than 60 seconds. */ getmicrotime(&cur_time); timeout = cur_time.tv_sec - start_time.tv_sec; if (timeout > 60) { mpr_dprint(sc, MPR_FAULT, "polling failed\n"); error = ETIMEDOUT; break; } } if(error) { mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s\n", __func__); rc = mpr_reinit(sc); mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); } return (error); } /* * The MPT driver had a verbose interface for config pages. In this driver, * reduce it to much simplier terms, similar to the Linux driver. */ int mpr_read_config_page(struct mpr_softc *sc, struct mpr_config_params *params) { MPI2_CONFIG_REQUEST *req; struct mpr_command *cm; int error; if (sc->mpr_flags & MPR_FLAGS_BUSY) { return (EBUSY); } cm = mpr_alloc_command(sc); if (cm == NULL) { return (EBUSY); } req = (MPI2_CONFIG_REQUEST *)cm->cm_req; req->Function = MPI2_FUNCTION_CONFIG; req->Action = params->action; req->SGLFlags = 0; req->ChainOffset = 0; req->PageAddress = params->page_address; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Ext; req->ExtPageType = hdr->ExtPageType; req->ExtPageLength = hdr->ExtPageLength; req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; req->Header.PageLength = 0; /* Must be set to zero */ req->Header.PageNumber = hdr->PageNumber; req->Header.PageVersion = hdr->PageVersion; } else { MPI2_CONFIG_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Struct; req->Header.PageType = hdr->PageType; req->Header.PageNumber = hdr->PageNumber; req->Header.PageLength = hdr->PageLength; req->Header.PageVersion = hdr->PageVersion; } cm->cm_data = params->buffer; cm->cm_length = params->length; cm->cm_sge = &req->PageBufferSGE; cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); cm->cm_flags = MPR_CM_FLAGS_SGE_SIMPLE | MPR_CM_FLAGS_DATAIN; cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_complete_data = params; if (params->callback != NULL) { cm->cm_complete = mpr_config_complete; return (mpr_map_command(sc, cm)); } else { error = mpr_wait_command(sc, cm, 0, CAN_SLEEP); if (error) { mpr_dprint(sc, MPR_FAULT, "Error %d reading config page\n", error); mpr_free_command(sc, cm); return (error); } mpr_config_complete(sc, cm); } return (0); } int mpr_write_config_page(struct mpr_softc *sc, struct mpr_config_params *params) { return (EINVAL); } static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm) { MPI2_CONFIG_REPLY *reply; struct mpr_config_params *params; MPR_FUNCTRACE(sc); params = cm->cm_complete_data; if (cm->cm_data != NULL) { bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); } /* * XXX KDM need to do more error recovery? This results in the * device in question not getting probed. */ if ((cm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; if (reply == NULL) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } params->status = reply->IOCStatus; if (params->hdr.Ext.ExtPageType != 0) { params->hdr.Ext.ExtPageType = reply->ExtPageType; params->hdr.Ext.ExtPageLength = reply->ExtPageLength; } else { params->hdr.Struct.PageType = reply->Header.PageType; params->hdr.Struct.PageNumber = reply->Header.PageNumber; params->hdr.Struct.PageLength = reply->Header.PageLength; params->hdr.Struct.PageVersion = reply->Header.PageVersion; } done: mpr_free_command(sc, cm); if (params->callback != NULL) params->callback(sc, params); return; } Index: stable/9/sys/dev/mps/mps.c =================================================================== --- stable/9/sys/dev/mps/mps.c (revision 273911) +++ stable/9/sys/dev/mps/mps.c (revision 273912) @@ -1,2702 +1,2702 @@ /*- * Copyright (c) 2009 Yahoo! Inc. * Copyright (c) 2012 LSI Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * LSI MPT-Fusion Host Adapter FreeBSD * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* Communications core for LSI MPT2 */ /* TODO Move headers to mpsvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mps_diag_reset(struct mps_softc *sc, int sleep_flag); static int mps_init_queues(struct mps_softc *sc); static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag); static int mps_transition_operational(struct mps_softc *sc); static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching); static void mps_iocfacts_free(struct mps_softc *sc); static void mps_startup(void *arg); static int mps_send_iocinit(struct mps_softc *sc); static int mps_alloc_queues(struct mps_softc *sc); static int mps_alloc_replies(struct mps_softc *sc); static int mps_alloc_requests(struct mps_softc *sc); static int mps_attach_log(struct mps_softc *sc); static __inline void mps_complete_command(struct mps_softc *sc, struct mps_command *cm); static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply); static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm); static void mps_periodic(void *); static int mps_reregister_events(struct mps_softc *sc); static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm); static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts); static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag); SYSCTL_NODE(_hw, OID_AUTO, mps, CTLFLAG_RD, 0, "MPS Driver Parameters"); MALLOC_DEFINE(M_MPT2, "mps", "mpt2 driver memory"); /* * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of * any state and back to its initialization state machine. */ static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; /* Added this union to smoothly convert le64toh cm->cm_desc.Words. * Compiler only support unint64_t to be passed as argument. * Otherwise it will through below error * "aggregate value used where an integer was expected" */ typedef union _reply_descriptor { u64 word; struct { u32 low; u32 high; } u; }reply_descriptor,address_descriptor; /* * sleep_flag can be either CAN_SLEEP or NO_SLEEP. * If this function is called from process context, it can sleep * and there is no harm to sleep, in case if this fuction is called * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. * based on sleep flags driver will call either msleep, pause or DELAY. * msleep and pause are of same variant, but pause is used when mps_mtx * is not hold by driver. * */ static int mps_diag_reset(struct mps_softc *sc,int sleep_flag) { uint32_t reg; int i, error, tries = 0; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* Clear any pending interrupts */ mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /*Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. */ if(curthread->td_pflags & TDP_NOSLEEPING) sleep_flag = NO_SLEEP; /* Push the magic sequence */ error = ETIMEDOUT; while (tries++ < 20) { for (i = 0; i < sizeof(mpt2_reset_magic); i++) mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, mpt2_reset_magic[i]); /* wait 100 msec */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdiag", hz/10); else if (sleep_flag == CAN_SLEEP) pause("mpsdiag", hz/10); else DELAY(100 * 1000); reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { error = 0; break; } } if (error) return (error); /* Send the actual reset. XXX need to refresh the reg? */ mps_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, reg | MPI2_DIAG_RESET_ADAPTER); /* Wait up to 300 seconds in 50ms intervals */ error = ETIMEDOUT; for (i = 0; i < 60000; i++) { /* wait 50 msec */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdiag", hz/20); else if (sleep_flag == CAN_SLEEP) pause("mpsdiag", hz/20); else DELAY(50 * 1000); reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { error = 0; break; } } if (error) return (error); mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); return (0); } static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag) { MPS_FUNCTRACE(sc); mps_regwrite(sc, MPI2_DOORBELL_OFFSET, MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << MPI2_DOORBELL_FUNCTION_SHIFT); if (mps_wait_db_ack(sc, 5, sleep_flag) != 0) { mps_dprint(sc, MPS_FAULT, "Doorbell handshake failed : <%s>\n", __func__); return (ETIMEDOUT); } return (0); } static int mps_transition_ready(struct mps_softc *sc) { uint32_t reg, state; int error, tries = 0; int sleep_flags; MPS_FUNCTRACE(sc); /* If we are in attach call, do not sleep */ sleep_flags = (sc->mps_flags & MPS_FLAGS_ATTACH_DONE) ? CAN_SLEEP:NO_SLEEP; error = 0; while (tries++ < 5) { reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_dprint(sc, MPS_INIT, "Doorbell= 0x%x\n", reg); /* * Ensure the IOC is ready to talk. If it's not, try * resetting it. */ if (reg & MPI2_DOORBELL_USED) { mps_diag_reset(sc, sleep_flags); DELAY(50000); continue; } /* Is the adapter owned by another peer? */ if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { device_printf(sc->mps_dev, "IOC is under the control " "of another peer host, aborting initialization.\n"); return (ENXIO); } state = reg & MPI2_IOC_STATE_MASK; if (state == MPI2_IOC_STATE_READY) { /* Ready to go! */ error = 0; break; } else if (state == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "IOC in fault state 0x%x, resetting\n", state & MPI2_DOORBELL_FAULT_CODE_MASK); mps_diag_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_OPERATIONAL) { /* Need to take ownership */ mps_message_unit_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_RESET) { /* Wait a bit, IOC might be in transition */ mps_dprint(sc, MPS_FAULT, "IOC in unexpected reset state\n"); } else { mps_dprint(sc, MPS_FAULT, "IOC in unknown state 0x%x\n", state); error = EINVAL; break; } /* Wait 50ms for things to settle down. */ DELAY(50000); } if (error) device_printf(sc->mps_dev, "Cannot transition IOC to ready\n"); return (error); } static int mps_transition_operational(struct mps_softc *sc) { uint32_t reg, state; int error; MPS_FUNCTRACE(sc); error = 0; reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_dprint(sc, MPS_INIT, "Doorbell= 0x%x\n", reg); state = reg & MPI2_IOC_STATE_MASK; if (state != MPI2_IOC_STATE_READY) { if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_FAULT, "%s failed to transition ready\n", __func__); return (error); } } error = mps_send_iocinit(sc); return (error); } /* * This is called during attach and when re-initializing due to a Diag Reset. * IOC Facts is used to allocate many of the structures needed by the driver. * If called from attach, de-allocation is not required because the driver has * not allocated any structures yet, but if called from a Diag Reset, previously * allocated structures based on IOC Facts will need to be freed and re- * allocated bases on the latest IOC Facts. */ static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching) { int error, i; Mpi2IOCFactsReply_t saved_facts; uint8_t saved_mode, reallocating; struct mpssas_lun *lun, *lun_tmp; struct mpssas_target *targ; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* Save old IOC Facts and then only reallocate if Facts have changed */ if (!attaching) { bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); } /* * Get IOC Facts. In all cases throughout this function, panic if doing * a re-initialization and only return the error if attaching so the OS * can handle it. */ if ((error = mps_get_iocfacts(sc, sc->facts)) != 0) { if (attaching) { mps_dprint(sc, MPS_FAULT, "%s failed to get IOC Facts " "with error %d\n", __func__, error); return (error); } else { panic("%s failed to get IOC Facts with error %d\n", __func__, error); } } mps_print_iocfacts(sc, sc->facts); snprintf(sc->fw_version, sizeof(sc->fw_version), "%02d.%02d.%02d.%02d", sc->facts->FWVersion.Struct.Major, sc->facts->FWVersion.Struct.Minor, sc->facts->FWVersion.Struct.Unit, sc->facts->FWVersion.Struct.Dev); mps_printf(sc, "Firmware: %s, Driver: %s\n", sc->fw_version, MPS_DRIVER_VERSION); mps_printf(sc, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc"); /* * If the chip doesn't support event replay then a hard reset will be * required to trigger a full discovery. Do the reset here then * retransition to Ready. A hard reset might have already been done, * but it doesn't hurt to do it again. Only do this if attaching, not * for a Diag Reset. */ if (attaching) { if ((sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0) { mps_diag_reset(sc, NO_SLEEP); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_FAULT, "%s failed to " "transition to ready with error %d\n", __func__, error); return (error); } } } /* * Set flag if IR Firmware is loaded. If the RAID Capability has * changed from the previous IOC Facts, log a warning, but only if * checking this after a Diag Reset and not during attach. */ saved_mode = sc->ir_firmware; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) sc->ir_firmware = 1; if (!attaching) { if (sc->ir_firmware != saved_mode) { mps_dprint(sc, MPS_FAULT, "%s new IR/IT mode in IOC " "Facts does not match previous mode\n", __func__); } } /* Only deallocate and reallocate if relevant IOC Facts have changed */ reallocating = FALSE; if ((!attaching) && ((saved_facts.MsgVersion != sc->facts->MsgVersion) || (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || (saved_facts.RequestCredit != sc->facts->RequestCredit) || (saved_facts.ProductID != sc->facts->ProductID) || (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || (saved_facts.IOCRequestFrameSize != sc->facts->IOCRequestFrameSize) || (saved_facts.MaxTargets != sc->facts->MaxTargets) || (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || (saved_facts.MaxReplyDescriptorPostQueueDepth != sc->facts->MaxReplyDescriptorPostQueueDepth) || (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || (saved_facts.MaxPersistentEntries != sc->facts->MaxPersistentEntries))) { reallocating = TRUE; } /* * Some things should be done if attaching or re-allocating after a Diag * Reset, but are not needed after a Diag Reset if the FW has not * changed. */ if (attaching || reallocating) { /* * Check if controller supports FW diag buffers and set flag to * enable each type. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. enabled = TRUE; /* * Set flag if EEDP is supported and if TLR is supported. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) sc->eedp_enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) sc->control_TLR = TRUE; /* * Size the queues. Since the reply queues always need one free * entry, we'll just deduct one reply message here. */ sc->num_reqs = MIN(MPS_REQ_FRAMES, sc->facts->RequestCredit); sc->num_replies = MIN(MPS_REPLY_FRAMES + MPS_EVT_REPLY_FRAMES, sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; /* * Initialize all Tail Queues */ TAILQ_INIT(&sc->req_list); TAILQ_INIT(&sc->high_priority_req_list); TAILQ_INIT(&sc->chain_list); TAILQ_INIT(&sc->tm_list); } /* * If doing a Diag Reset and the FW is significantly different * (reallocating will be set above in IOC Facts comparison), then all * buffers based on the IOC Facts will need to be freed before they are * reallocated. */ if (reallocating) { mps_iocfacts_free(sc); /* * The number of targets is based on IOC Facts, so free all of * the allocated LUNs for each target and then the target buffer * itself. */ for (i=0; i< saved_facts.MaxTargets; i++) { targ = &sc->sassc->targets[i]; SLIST_FOREACH_SAFE(lun, &targ->luns, lun_link, lun_tmp) { free(lun, M_MPT2); } } free(sc->sassc->targets, M_MPT2); sc->sassc->targets = malloc(sizeof(struct mpssas_target) * sc->facts->MaxTargets, M_MPT2, M_WAITOK|M_ZERO); if (!sc->sassc->targets) { panic("%s failed to alloc targets with error %d\n", __func__, ENOMEM); } } /* * Any deallocation has been completed. Now start reallocating * if needed. Will only need to reallocate if attaching or if the new * IOC Facts are different from the previous IOC Facts after a Diag * Reset. Targets have already been allocated above if needed. */ if (attaching || reallocating) { if (((error = mps_alloc_queues(sc)) != 0) || ((error = mps_alloc_replies(sc)) != 0) || ((error = mps_alloc_requests(sc)) != 0)) { if (attaching ) { mps_dprint(sc, MPS_FAULT, "%s failed to alloc " "queues with error %d\n", __func__, error); mps_free(sc); return (error); } else { panic("%s failed to alloc queues with error " "%d\n", __func__, error); } } } /* Always initialize the queues */ bzero(sc->free_queue, sc->fqdepth * 4); mps_init_queues(sc); /* * Always get the chip out of the reset state, but only panic if not * attaching. If attaching and there is an error, that is handled by * the OS. */ error = mps_transition_operational(sc); if (error != 0) { if (attaching) { mps_printf(sc, "%s failed to transition to operational " "with error %d\n", __func__, error); mps_free(sc); return (error); } else { panic("%s failed to transition to operational with " "error %d\n", __func__, error); } } /* * Finish the queue initialization. * These are set here instead of in mps_init_queues() because the * IOC resets these values during the state transition in * mps_transition_operational(). The free index is set to 1 * because the corresponding index in the IOC is set to 0, and the * IOC treats the queues as full if both are set to the same value. * Hence the reason that the queue can't hold all of the possible * replies. */ sc->replypostindex = 0; mps_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); /* * Attach the subsystems so they can prepare their event masks. */ /* XXX Should be dynamic so that IM/IR and user modules can attach */ if (attaching) { if (((error = mps_attach_log(sc)) != 0) || ((error = mps_attach_sas(sc)) != 0) || ((error = mps_attach_user(sc)) != 0)) { mps_printf(sc, "%s failed to attach all subsystems: " "error %d\n", __func__, error); mps_free(sc); return (error); } if ((error = mps_pci_setup_interrupts(sc)) != 0) { mps_printf(sc, "%s failed to setup interrupts\n", __func__); mps_free(sc); return (error); } } /* * Set flag if this is a WD controller. This shouldn't ever change, but * reset it after a Diag Reset, just in case. */ sc->WD_available = FALSE; if (pci_get_device(sc->mps_dev) == MPI2_MFGPAGE_DEVID_SSS6200) sc->WD_available = TRUE; return (error); } /* * This is called if memory is being free (during detach for example) and when * buffers need to be reallocated due to a Diag Reset. */ static void mps_iocfacts_free(struct mps_softc *sc) { struct mps_command *cm; int i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if (sc->post_busaddr != 0) bus_dmamap_unload(sc->queues_dmat, sc->queues_map); if (sc->free_queue != NULL) bus_dmamem_free(sc->queues_dmat, sc->free_queue, sc->queues_map); if (sc->queues_dmat != NULL) bus_dma_tag_destroy(sc->queues_dmat); if (sc->chain_busaddr != 0) bus_dmamap_unload(sc->chain_dmat, sc->chain_map); if (sc->chain_frames != NULL) bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); if (sc->chain_dmat != NULL) bus_dma_tag_destroy(sc->chain_dmat); if (sc->sense_busaddr != 0) bus_dmamap_unload(sc->sense_dmat, sc->sense_map); if (sc->sense_frames != NULL) bus_dmamem_free(sc->sense_dmat, sc->sense_frames, sc->sense_map); if (sc->sense_dmat != NULL) bus_dma_tag_destroy(sc->sense_dmat); if (sc->reply_busaddr != 0) bus_dmamap_unload(sc->reply_dmat, sc->reply_map); if (sc->reply_frames != NULL) bus_dmamem_free(sc->reply_dmat, sc->reply_frames, sc->reply_map); if (sc->reply_dmat != NULL) bus_dma_tag_destroy(sc->reply_dmat); if (sc->req_busaddr != 0) bus_dmamap_unload(sc->req_dmat, sc->req_map); if (sc->req_frames != NULL) bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); if (sc->req_dmat != NULL) bus_dma_tag_destroy(sc->req_dmat); if (sc->chains != NULL) free(sc->chains, M_MPT2); if (sc->commands != NULL) { for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); } free(sc->commands, M_MPT2); } if (sc->buffer_dmat != NULL) bus_dma_tag_destroy(sc->buffer_dmat); } /* * The terms diag reset and hard reset are used interchangeably in the MPI * docs to mean resetting the controller chip. In this code diag reset * cleans everything up, and the hard reset function just sends the reset * sequence to the chip. This should probably be refactored so that every * subsystem gets a reset notification of some sort, and can clean up * appropriately. */ int mps_reinit(struct mps_softc *sc) { int error; MPS_FUNCTRACE(sc); mtx_assert(&sc->mps_mtx, MA_OWNED); if (sc->mps_flags & MPS_FLAGS_DIAGRESET) { mps_dprint(sc, MPS_INIT, "%s reset already in progress\n", __func__); return 0; } mps_dprint(sc, MPS_INFO, "Reinitializing controller,\n"); /* make sure the completion callbacks can recognize they're getting * a NULL cm_reply due to a reset. */ sc->mps_flags |= MPS_FLAGS_DIAGRESET; /* * Mask interrupts here. */ mps_dprint(sc, MPS_INIT, "%s mask interrupts\n", __func__); mps_mask_intr(sc); error = mps_diag_reset(sc, CAN_SLEEP); if (error != 0) { /* XXXSL No need to panic here */ panic("%s hard reset failed with error %d\n", __func__, error); } /* Restore the PCI state, including the MSI-X registers */ mps_pci_restore(sc); /* Give the I/O subsystem special priority to get itself prepared */ mpssas_handle_reinit(sc); /* * Get IOC Facts and allocate all structures based on this information. * The attach function will also call mps_iocfacts_allocate at startup. * If relevant values have changed in IOC Facts, this function will free * all of the memory based on IOC Facts and reallocate that memory. */ if ((error = mps_iocfacts_allocate(sc, FALSE)) != 0) { panic("%s IOC Facts based allocation failed with error %d\n", __func__, error); } /* * Mapping structures will be re-allocated after getting IOC Page8, so * free these structures here. */ mps_mapping_exit(sc); /* * The static page function currently read is IOC Page8. Others can be * added in future. It's possible that the values in IOC Page8 have * changed after a Diag Reset due to user modification, so always read * these. Interrupts are masked, so unmask them before getting config * pages. */ mps_unmask_intr(sc); sc->mps_flags &= ~MPS_FLAGS_DIAGRESET; mps_base_static_config_pages(sc); /* * Some mapping info is based in IOC Page8 data, so re-initialize the * mapping tables. */ mps_mapping_initialize(sc); /* * Restart will reload the event masks clobbered by the reset, and * then enable the port. */ mps_reregister_events(sc); /* the end of discovery will release the simq, so we're done. */ mps_dprint(sc, MPS_INFO, "%s finished sc %p post %u free %u\n", __func__, sc, sc->replypostindex, sc->replyfreeindex); return 0; } /* Wait for the chip to ACK a word that we've put into its FIFO * Wait for seconds. In single loop wait for busy loop * for 500 microseconds. * Total is [ 0.5 * (2000 * ) ] in miliseconds. * */ static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag) { u32 cntdn, count; u32 int_status; u32 doorbell; count = 0; cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; do { int_status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { mps_dprint(sc, MPS_INIT, "%s: successfull count(%d), timeout(%d)\n", __func__, count, timeout); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { doorbell = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "fault_state(0x%04x)!\n", doorbell); return (EFAULT); } } else if (int_status == 0xFFFFFFFF) goto out; /* If it can sleep, sleep for 1 milisecond, else busy loop for * 0.5 milisecond */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdba", hz/1000); else if (sleep_flag == CAN_SLEEP) pause("mpsdba", hz/1000); else DELAY(500); count++; } while (--cntdn); out: mps_dprint(sc, MPS_FAULT, "%s: failed due to timeout count(%d), " "int_status(%x)!\n", __func__, count, int_status); return (ETIMEDOUT); } /* Wait for the chip to signal that the next word in its FIFO can be fetched */ static int mps_wait_db_int(struct mps_softc *sc) { int retry; for (retry = 0; retry < MPS_DB_MAX_WAIT; retry++) { if ((mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & MPI2_HIS_IOC2SYS_DB_STATUS) != 0) return (0); DELAY(2000); } return (ETIMEDOUT); } /* Step through the synchronous command state machine, i.e. "Doorbell mode" */ static int mps_request_sync(struct mps_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, int req_sz, int reply_sz, int timeout) { uint32_t *data32; uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; if(curthread->td_pflags & TDP_NOSLEEPING) sleep_flags = NO_SLEEP; /* Step 1 */ mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Step 2 */ if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) return (EBUSY); /* Step 3 * Announce that a message is coming through the doorbell. Messages * are pushed at 32bit words, so round up if needed. */ count = (req_sz + 3) / 4; mps_regwrite(sc, MPI2_DOORBELL_OFFSET, (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); /* Step 4 */ if (mps_wait_db_int(sc) || (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { mps_dprint(sc, MPS_FAULT, "Doorbell failed to activate\n"); return (ENXIO); } mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) { mps_dprint(sc, MPS_FAULT, "Doorbell handshake failed\n"); return (ENXIO); } /* Step 5 */ /* Clock out the message data synchronously in 32-bit dwords*/ data32 = (uint32_t *)req; for (i = 0; i < count; i++) { mps_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout while writing doorbell\n"); return (ENXIO); } } /* Step 6 */ /* Clock in the reply in 16-bit words. The total length of the * message is always in the 4th byte, so clock out the first 2 words * manually, then loop the rest. */ data16 = (uint16_t *)reply; if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 0\n"); return (ENXIO); } data16[0] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 1\n"); return (ENXIO); } data16[1] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Number of 32bit words in the message */ ioc_sz = reply->MsgLength; /* * Figure out how many 16bit words to clock in without overrunning. * The precision loss with dividing reply_sz can safely be * ignored because the messages can only be multiples of 32bits. */ residual = 0; count = MIN((reply_sz / 4), ioc_sz) * 2; if (count < ioc_sz * 2) { residual = ioc_sz * 2 - count; mps_dprint(sc, MPS_ERROR, "Driver error, throwing away %d " "residual message words\n", residual); } for (i = 2; i < count; i++) { if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell %d\n", i); return (ENXIO); } data16[i] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* * Pull out residual words that won't fit into the provided buffer. * This keeps the chip from hanging due to a driver programming * error. */ while (residual--) { if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell\n"); return (ENXIO); } (void)mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* Step 7 */ if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout waiting to exit doorbell\n"); return (ENXIO); } if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) mps_dprint(sc, MPS_FAULT, "Warning, doorbell still active\n"); mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); return (0); } static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm) { reply_descriptor rd; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_TRACE, "SMID %u cm %p ccb %p\n", cm->cm_desc.Default.SMID, cm, cm->cm_ccb); if (sc->mps_flags & MPS_FLAGS_ATTACH_DONE && !(sc->mps_flags & MPS_FLAGS_SHUTDOWN)) mtx_assert(&sc->mps_mtx, MA_OWNED); if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; rd.u.low = cm->cm_desc.Words.Low; rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); /* TODO-We may need to make below regwrite atomic */ mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, rd.u.high); } /* * Just the FACTS, ma'am. */ static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts) { MPI2_DEFAULT_REPLY *reply; MPI2_IOC_FACTS_REQUEST request; int error, req_sz, reply_sz; MPS_FUNCTRACE(sc); req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); reply = (MPI2_DEFAULT_REPLY *)facts; bzero(&request, req_sz); request.Function = MPI2_FUNCTION_IOC_FACTS; error = mps_request_sync(sc, &request, reply, req_sz, reply_sz, 5); return (error); } static int mps_send_iocinit(struct mps_softc *sc) { MPI2_IOC_INIT_REQUEST init; MPI2_DEFAULT_REPLY reply; int req_sz, reply_sz, error; struct timeval now; uint64_t time_in_msec; MPS_FUNCTRACE(sc); req_sz = sizeof(MPI2_IOC_INIT_REQUEST); reply_sz = sizeof(MPI2_IOC_INIT_REPLY); bzero(&init, req_sz); bzero(&reply, reply_sz); /* * Fill in the init block. Note that most addresses are * deliberately in the lower 32bits of memory. This is a micro- * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. */ init.Function = MPI2_FUNCTION_IOC_INIT; init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; init.MsgVersion = htole16(MPI2_VERSION); init.HeaderVersion = htole16(MPI2_HEADER_VERSION); init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize); init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); init.ReplyFreeQueueDepth = htole16(sc->fqdepth); init.SenseBufferAddressHigh = 0; init.SystemReplyAddressHigh = 0; init.SystemRequestFrameBaseAddress.High = 0; init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr); init.ReplyDescriptorPostQueueAddress.High = 0; init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr); init.ReplyFreeQueueAddress.High = 0; init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); getmicrotime(&now); time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); error = mps_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mps_dprint(sc, MPS_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus); return (error); } void mps_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int mps_alloc_queues(struct mps_softc *sc) { bus_addr_t queues_busaddr; uint8_t *queues; int qsize, fqsize, pqsize; /* * The reply free queue contains 4 byte entries in multiples of 16 and * aligned on a 16 byte boundary. There must always be an unused entry. * This queue supplies fresh reply frames for the firmware to use. * * The reply descriptor post queue contains 8 byte entries in * multiples of 16 and aligned on a 16 byte boundary. This queue * contains filled-in reply frames sent from the firmware to the host. * * These two queues are allocated together for simplicity. */ sc->fqdepth = roundup2((sc->num_replies + 1), 16); sc->pqdepth = roundup2((sc->num_replies + 1), 16); fqsize= sc->fqdepth * 4; pqsize = sc->pqdepth * 8; qsize = fqsize + pqsize; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ qsize, /* maxsize */ 1, /* nsegments */ qsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->queues_dmat)) { device_printf(sc->mps_dev, "Cannot allocate queues DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, &sc->queues_map)) { device_printf(sc->mps_dev, "Cannot allocate queues memory\n"); return (ENOMEM); } bzero(queues, qsize); bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, mps_memaddr_cb, &queues_busaddr, 0); sc->free_queue = (uint32_t *)queues; sc->free_busaddr = queues_busaddr; sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); sc->post_busaddr = queues_busaddr + fqsize; return (0); } static int mps_alloc_replies(struct mps_softc *sc) { int rsize, num_replies; /* * sc->num_replies should be one less than sc->fqdepth. We need to * allocate space for sc->fqdepth replies, but only sc->num_replies * replies can be used at once. */ num_replies = max(sc->fqdepth, sc->num_replies); rsize = sc->facts->ReplyFrameSize * num_replies * 4; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->reply_dmat)) { device_printf(sc->mps_dev, "Cannot allocate replies DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, BUS_DMA_NOWAIT, &sc->reply_map)) { device_printf(sc->mps_dev, "Cannot allocate replies memory\n"); return (ENOMEM); } bzero(sc->reply_frames, rsize); bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, mps_memaddr_cb, &sc->reply_busaddr, 0); return (0); } static int mps_alloc_requests(struct mps_softc *sc) { struct mps_command *cm; struct mps_chain *chain; int i, rsize, nsegs; rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->req_dmat)) { device_printf(sc->mps_dev, "Cannot allocate request DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, BUS_DMA_NOWAIT, &sc->req_map)) { device_printf(sc->mps_dev, "Cannot allocate request memory\n"); return (ENOMEM); } bzero(sc->req_frames, rsize); bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, mps_memaddr_cb, &sc->req_busaddr, 0); rsize = sc->facts->IOCRequestFrameSize * sc->max_chains * 4; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->chain_dmat)) { device_printf(sc->mps_dev, "Cannot allocate chain DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, BUS_DMA_NOWAIT, &sc->chain_map)) { device_printf(sc->mps_dev, "Cannot allocate chain memory\n"); return (ENOMEM); } bzero(sc->chain_frames, rsize); bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, mps_memaddr_cb, &sc->chain_busaddr, 0); rsize = MPS_SENSE_LEN * sc->num_reqs; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sense_dmat)) { device_printf(sc->mps_dev, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, BUS_DMA_NOWAIT, &sc->sense_map)) { device_printf(sc->mps_dev, "Cannot allocate sense memory\n"); return (ENOMEM); } bzero(sc->sense_frames, rsize); bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, mps_memaddr_cb, &sc->sense_busaddr, 0); sc->chains = malloc(sizeof(struct mps_chain) * sc->max_chains, M_MPT2, M_WAITOK | M_ZERO); if(!sc->chains) { device_printf(sc->mps_dev, "Cannot allocate chains memory %s %d\n", __func__, __LINE__); return (ENOMEM); } for (i = 0; i < sc->max_chains; i++) { chain = &sc->chains[i]; chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames + i * sc->facts->IOCRequestFrameSize * 4); chain->chain_busaddr = sc->chain_busaddr + i * sc->facts->IOCRequestFrameSize * 4; mps_free_chain(sc, chain); sc->chain_free_lowwater++; } /* XXX Need to pick a more precise value */ nsegs = (MAXPHYS / PAGE_SIZE) + 1; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ nsegs, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->mps_mtx, /* lockarg */ &sc->buffer_dmat)) { device_printf(sc->mps_dev, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * SMID 0 cannot be used as a free command per the firmware spec. * Just drop that command instead of risking accounting bugs. */ sc->commands = malloc(sizeof(struct mps_command) * sc->num_reqs, M_MPT2, M_WAITOK | M_ZERO); if(!sc->commands) { device_printf(sc->mps_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; cm->cm_req = sc->req_frames + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_req_busaddr = sc->req_busaddr + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_sense = &sc->sense_frames[i]; cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; TAILQ_INIT(&cm->cm_chain_list); callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0); /* XXX Is a failure here a critical problem? */ if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0) if (i <= sc->facts->HighPriorityCredit) mps_free_high_priority_command(sc, cm); else mps_free_command(sc, cm); else { panic("failed to allocate command %d\n", i); sc->num_reqs = i; break; } } return (0); } static int mps_init_queues(struct mps_softc *sc) { int i; memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); /* * According to the spec, we need to use one less reply than we * have space for on the queue. So sc->num_replies (the number we * use) should be less than sc->fqdepth (allocated size). */ if (sc->num_replies >= sc->fqdepth) return (EINVAL); /* * Initialize all of the free queue entries. */ for (i = 0; i < sc->fqdepth; i++) sc->free_queue[i] = sc->reply_busaddr + (i * sc->facts->ReplyFrameSize * 4); sc->replyfreeindex = sc->num_replies; return (0); } /* Get the driver parameter tunables. Lowest priority are the driver defaults. * Next are the global settings, if they exist. Highest are the per-unit * settings, if they exist. */ static void mps_get_tunables(struct mps_softc *sc) { char tmpstr[80]; /* XXX default to some debugging for now */ sc->mps_debug = MPS_INFO|MPS_FAULT; sc->disable_msix = 0; sc->disable_msi = 0; sc->max_chains = MPS_CHAIN_FRAMES; /* * Grab the global variables. */ TUNABLE_INT_FETCH("hw.mps.debug_level", &sc->mps_debug); TUNABLE_INT_FETCH("hw.mps.disable_msix", &sc->disable_msix); TUNABLE_INT_FETCH("hw.mps.disable_msi", &sc->disable_msi); TUNABLE_INT_FETCH("hw.mps.max_chains", &sc->max_chains); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.debug_level", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->mps_debug); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msix", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msi", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_chains", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); } static void mps_setup_sysctl(struct mps_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MPS controller %d", device_get_unit(sc->mps_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mps_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mps_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mps_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mps), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mps_debug, 0, "mps debug level"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, "Disable the use of MSI-X interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0, "Disable the use of MSI interrupts"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), - OID_AUTO, "firmware_version", CTLFLAG_RW, &sc->fw_version, + OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version, strlen(sc->fw_version), "firmware version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RW, MPS_DRIVER_VERSION, strlen(MPS_DRIVER_VERSION), "driver version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_active", CTLFLAG_RD, &sc->io_cmds_active, 0, "number of currently active commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "maximum active commands seen"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free", CTLFLAG_RD, &sc->chain_free, 0, "number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_chains", CTLFLAG_RD, &sc->max_chains, 0,"maximum chain frames that will be allocated"); #if __FreeBSD_version >= 900030 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, &sc->chain_alloc_fail, "chain allocation failures"); #endif //FreeBSD_version >= 900030 } int mps_attach(struct mps_softc *sc) { int error; mps_get_tunables(sc); MPS_FUNCTRACE(sc); mtx_init(&sc->mps_mtx, "MPT2SAS lock", NULL, MTX_DEF); callout_init_mtx(&sc->periodic, &sc->mps_mtx, 0); TAILQ_INIT(&sc->event_list); if ((error = mps_transition_ready(sc)) != 0) { mps_printf(sc, "%s failed to transition ready\n", __func__); return (error); } sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPT2, M_ZERO|M_NOWAIT); if(!sc->facts) { device_printf(sc->mps_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } /* * Get IOC Facts and allocate all structures based on this information. * A Diag Reset will also call mps_iocfacts_allocate and re-read the IOC * Facts. If relevant values have changed in IOC Facts, this function * will free all of the memory based on IOC Facts and reallocate that * memory. If this fails, any allocated memory should already be freed. */ if ((error = mps_iocfacts_allocate(sc, TRUE)) != 0) { mps_dprint(sc, MPS_FAULT, "%s IOC Facts based allocation " "failed with error %d\n", __func__, error); return (error); } /* Start the periodic watchdog check on the IOC Doorbell */ mps_periodic(sc); /* * The portenable will kick off discovery events that will drive the * rest of the initialization process. The CAM/SAS module will * hold up the boot sequence until discovery is complete. */ sc->mps_ich.ich_func = mps_startup; sc->mps_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mps_ich) != 0) { mps_dprint(sc, MPS_ERROR, "Cannot establish MPS config hook\n"); error = EINVAL; } /* * Allow IR to shutdown gracefully when shutdown occurs. */ sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, mpssas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); if (sc->shutdown_eh == NULL) mps_dprint(sc, MPS_ERROR, "shutdown event registration " "failed\n"); mps_setup_sysctl(sc); sc->mps_flags |= MPS_FLAGS_ATTACH_DONE; return (error); } /* Run through any late-start handlers. */ static void mps_startup(void *arg) { struct mps_softc *sc; sc = (struct mps_softc *)arg; mps_lock(sc); mps_unmask_intr(sc); /* initialize device mapping tables */ mps_base_static_config_pages(sc); mps_mapping_initialize(sc); mpssas_startup(sc); mps_unlock(sc); } /* Periodic watchdog. Is called with the driver lock already held. */ static void mps_periodic(void *arg) { struct mps_softc *sc; uint32_t db; sc = (struct mps_softc *)arg; if (sc->mps_flags & MPS_FLAGS_SHUTDOWN) return; db = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "IOC Fault 0x%08x, Resetting\n", db); mps_reinit(sc); } callout_reset(&sc->periodic, MPS_PERIODIC_DELAY * hz, mps_periodic, sc); } static void mps_log_evt_handler(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *event) { MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; mps_print_event(sc, event); switch (event->Event) { case MPI2_EVENT_LOG_DATA: mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_DATA:\n"); if (sc->mps_debug & MPS_EVENT) hexdump(event->EventData, event->EventDataLength, NULL, 0); break; case MPI2_EVENT_LOG_ENTRY_ADDED: entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " "0x%x Sequence %d:\n", entry->LogEntryQualifier, entry->LogSequence); break; default: break; } return; } static int mps_attach_log(struct mps_softc *sc) { u32 events[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS]; bzero(events, 16); setbit(events, MPI2_EVENT_LOG_DATA); setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); mps_register_events(sc, events, mps_log_evt_handler, NULL, &sc->mps_log_eh); return (0); } static int mps_detach_log(struct mps_softc *sc) { if (sc->mps_log_eh != NULL) mps_deregister_events(sc, sc->mps_log_eh); return (0); } /* * Free all of the driver resources and detach submodules. Should be called * without the lock held. */ int mps_free(struct mps_softc *sc) { int error; /* Turn off the watchdog */ mps_lock(sc); sc->mps_flags |= MPS_FLAGS_SHUTDOWN; mps_unlock(sc); /* Lock must not be held for this */ callout_drain(&sc->periodic); if (((error = mps_detach_log(sc)) != 0) || ((error = mps_detach_sas(sc)) != 0)) return (error); mps_detach_user(sc); /* Put the IOC back in the READY state. */ mps_lock(sc); if ((error = mps_transition_ready(sc)) != 0) { mps_unlock(sc); return (error); } mps_unlock(sc); if (sc->facts != NULL) free(sc->facts, M_MPT2); /* * Free all buffers that are based on IOC Facts. A Diag Reset may need * to free these buffers too. */ mps_iocfacts_free(sc); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); /* Deregister the shutdown function */ if (sc->shutdown_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); mtx_destroy(&sc->mps_mtx); return (0); } static __inline void mps_complete_command(struct mps_softc *sc, struct mps_command *cm) { MPS_FUNCTRACE(sc); if (cm == NULL) { mps_dprint(sc, MPS_ERROR, "Completing NULL command\n"); return; } if (cm->cm_flags & MPS_CM_FLAGS_POLLED) cm->cm_flags |= MPS_CM_FLAGS_COMPLETE; if (cm->cm_complete != NULL) { mps_dprint(sc, MPS_TRACE, "%s cm %p calling cm_complete %p data %p reply %p\n", __func__, cm, cm->cm_complete, cm->cm_complete_data, cm->cm_reply); cm->cm_complete(sc, cm); } if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) { mps_dprint(sc, MPS_TRACE, "waking up %p\n", cm); wakeup(cm); } if (cm->cm_sc->io_cmds_active != 0) { cm->cm_sc->io_cmds_active--; } else { mps_dprint(sc, MPS_ERROR, "Warning: io_cmds_active is " "out of sync - resynching to 0\n"); } } static void mps_sas_log_info(struct mps_softc *sc , u32 log_info) { union loginfo_type { u32 loginfo; struct { u32 subcode:16; u32 code:8; u32 originator:4; u32 bus_type:4; } dw; }; union loginfo_type sas_loginfo; char *originator_str = NULL; sas_loginfo.loginfo = log_info; if (sas_loginfo.dw.bus_type != 3 /*SAS*/) return; /* each nexus loss loginfo */ if (log_info == 0x31170000) return; /* eat the loginfos associated with task aborts */ if ((log_info == 30050000 || log_info == 0x31140000 || log_info == 0x31130000)) return; switch (sas_loginfo.dw.originator) { case 0: originator_str = "IOP"; break; case 1: originator_str = "PL"; break; case 2: originator_str = "IR"; break; } mps_dprint(sc, MPS_LOG, "log_info(0x%08x): originator(%s), " "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode); } static void mps_display_reply_info(struct mps_softc *sc, uint8_t *reply) { MPI2DefaultReply_t *mpi_reply; u16 sc_status; mpi_reply = (MPI2DefaultReply_t*)reply; sc_status = le16toh(mpi_reply->IOCStatus); if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) mps_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); } void mps_intr(void *data) { struct mps_softc *sc; uint32_t status; sc = (struct mps_softc *)data; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* * Check interrupt status register to flush the bus. This is * needed for both INTx interrupts and driver-driven polling */ status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) return; mps_lock(sc); mps_intr_locked(data); mps_unlock(sc); return; } /* * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the * chip. Hopefully this theory is correct. */ void mps_intr_msi(void *data) { struct mps_softc *sc; sc = (struct mps_softc *)data; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); mps_lock(sc); mps_intr_locked(data); mps_unlock(sc); return; } /* * The locking is overly broad and simplistic, but easy to deal with for now. */ void mps_intr_locked(void *data) { MPI2_REPLY_DESCRIPTORS_UNION *desc; struct mps_softc *sc; struct mps_command *cm = NULL; uint8_t flags; u_int pq; MPI2_DIAG_RELEASE_REPLY *rel_rep; mps_fw_diagnostic_buffer_t *pBuffer; sc = (struct mps_softc *)data; pq = sc->replypostindex; mps_dprint(sc, MPS_TRACE, "%s sc %p starting with replypostindex %u\n", __func__, sc, sc->replypostindex); for ( ;; ) { cm = NULL; desc = &sc->post_queue[sc->replypostindex]; flags = desc->Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || (le32toh(desc->Words.High) == 0xffffffff)) break; /* increment the replypostindex now, so that event handlers * and cm completion handlers which decide to do a diag * reset can zero it without it getting incremented again * afterwards, and we break out of this loop on the next * iteration since the reply post queue has been cleared to * 0xFF and all descriptors look unused (which they are). */ if (++sc->replypostindex >= sc->pqdepth) sc->replypostindex = 0; switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: { uint32_t baddr; uint8_t *reply; /* * Re-compose the reply address from the address * sent back from the chip. The ReplyFrameAddress * is the lower 32 bits of the physical address of * particular reply frame. Convert that address to * host format, and then use that to provide the * offset against the virtual address base * (sc->reply_frames). */ baddr = le32toh(desc->AddressReply.ReplyFrameAddress); reply = sc->reply_frames + (baddr - ((uint32_t)sc->reply_busaddr)); /* * Make sure the reply we got back is in a valid * range. If not, go ahead and panic here, since * we'll probably panic as soon as we deference the * reply pointer anyway. */ if ((reply < sc->reply_frames) || (reply > (sc->reply_frames + (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) { printf("%s: WARNING: reply %p out of range!\n", __func__, reply); printf("%s: reply_frames %p, fqdepth %d, " "frame size %d\n", __func__, sc->reply_frames, sc->fqdepth, sc->facts->ReplyFrameSize * 4); printf("%s: baddr %#x,\n", __func__, baddr); /* LSI-TODO. See Linux Code. Need Gracefull exit*/ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { if (((MPI2_DEFAULT_REPLY *)reply)->Function == MPI2_FUNCTION_DIAG_BUFFER_POST) { /* * If SMID is 0 for Diag Buffer Post, * this implies that the reply is due to * a release function with a status that * the buffer has been released. Set * the buffer flags accordingly. */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; if (le16toh(rel_rep->IOCStatus) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; pBuffer->valid_data = TRUE; pBuffer->owned_by_firmware = FALSE; pBuffer->immediate = FALSE; } } else mps_dispatch_event(sc, baddr, (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { cm = &sc->commands[le16toh(desc->AddressReply.SMID)]; cm->cm_reply = reply; cm->cm_reply_data = le32toh(desc->AddressReply.ReplyFrameAddress); } break; } case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: default: /* Unhandled */ mps_dprint(sc, MPS_ERROR, "Unhandled reply 0x%x\n", desc->Default.ReplyFlags); cm = NULL; break; } if (cm != NULL) { // Print Error reply frame if (cm->cm_reply) mps_display_reply_info(sc,cm->cm_reply); mps_complete_command(sc, cm); } desc->Words.Low = 0xffffffff; desc->Words.High = 0xffffffff; } if (pq != sc->replypostindex) { mps_dprint(sc, MPS_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); } return; } static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mps_event_handle *eh; int event, handled = 0; event = le16toh(reply->Event); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { if (isset(eh->mask, event)) { eh->callback(sc, data, reply); handled++; } } if (handled == 0) mps_dprint(sc, MPS_EVENT, "Unhandled event 0x%x\n", le16toh(event)); /* * This is the only place that the event/reply should be freed. * Anything wanting to hold onto the event data should have * already copied it into their own storage. */ mps_free_reply(sc, data); } static void mps_reregister_events_complete(struct mps_softc *sc, struct mps_command *cm) { mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if (cm->cm_reply) mps_print_event(sc, (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); mps_free_command(sc, cm); /* next, send a port enable */ mpssas_startup(sc); } /* * For both register_events and update_events, the caller supplies a bitmap * of events that it _wants_. These functions then turn that into a bitmask * suitable for the controller. */ int mps_register_events(struct mps_softc *sc, u32 *mask, mps_evt_callback_t *cb, void *data, struct mps_event_handle **handle) { struct mps_event_handle *eh; int error = 0; eh = malloc(sizeof(struct mps_event_handle), M_MPT2, M_WAITOK|M_ZERO); if(!eh) { device_printf(sc->mps_dev, "Cannot allocate memory %s %d\n", __func__, __LINE__); return (ENOMEM); } eh->callback = cb; eh->data = data; TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); if (mask != NULL) error = mps_update_events(sc, eh, mask); *handle = eh; return (error); } int mps_update_events(struct mps_softc *sc, struct mps_event_handle *handle, u32 *mask) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; MPI2_EVENT_NOTIFICATION_REPLY *reply; struct mps_command *cm; int error, i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if ((mask != NULL) && (handle != NULL)) bcopy(mask, &handle->mask[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] = -1; for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] &= ~handle->mask[i]; if ((cm = mps_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPS_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; error = mps_request_polled(sc, cm); reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; if ((reply == NULL) || (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mps_print_event(sc, reply); mps_dprint(sc, MPS_TRACE, "%s finished error %d\n", __func__, error); mps_free_command(sc, cm); return (error); } static int mps_reregister_events(struct mps_softc *sc) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; struct mps_command *cm; struct mps_event_handle *eh; int error, i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* first, reregister events */ for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] = -1; TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mps_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPS_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; cm->cm_complete = mps_reregister_events_complete; error = mps_map_command(sc, cm); mps_dprint(sc, MPS_TRACE, "%s finished with error %d\n", __func__, error); return (error); } void mps_deregister_events(struct mps_softc *sc, struct mps_event_handle *handle) { TAILQ_REMOVE(&sc->event_list, handle, eh_list); free(handle, M_MPT2); } /* * Add a chain element as the next SGE for the specified command. * Reset cm_sge and cm_sgesize to indicate all the available space. */ static int mps_add_chain(struct mps_command *cm) { MPI2_SGE_CHAIN32 *sgc; struct mps_chain *chain; int space; if (cm->cm_sglsize < MPS_SGC_SIZE) panic("MPS: Need SGE Error Code\n"); chain = mps_alloc_chain(cm->cm_sc); if (chain == NULL) return (ENOBUFS); space = (int)cm->cm_sc->facts->IOCRequestFrameSize * 4; /* * Note: a double-linked list is used to make it easier to * walk for debugging. */ TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); sgc = (MPI2_SGE_CHAIN32 *)&cm->cm_sge->MpiChain; sgc->Length = htole16(space); sgc->NextChainOffset = 0; /* TODO Looks like bug in Setting sgc->Flags. * sgc->Flags = ( MPI2_SGE_FLAGS_CHAIN_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING | * MPI2_SGE_FLAGS_SYSTEM_ADDRESS) << MPI2_SGE_FLAGS_SHIFT * This is fine.. because we are not using simple element. In case of * MPI2_SGE_CHAIN32, we have seperate Length and Flags feild. */ sgc->Flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT; sgc->Address = htole32(chain->chain_busaddr); cm->cm_sge = (MPI2_SGE_IO_UNION *)&chain->chain->MpiSimple; cm->cm_sglsize = space; return (0); } /* * Add one scatter-gather element (chain, simple, transaction context) * to the scatter-gather list for a command. Maintain cm_sglsize and * cm_sge as the remaining size and pointer to the next SGE to fill * in, respectively. */ int mps_push_sge(struct mps_command *cm, void *sgep, size_t len, int segsleft) { MPI2_SGE_TRANSACTION_UNION *tc = sgep; MPI2_SGE_SIMPLE64 *sge = sgep; int error, type; uint32_t saved_buf_len, saved_address_low, saved_address_high; u32 sge_flags; type = (tc->Flags & MPI2_SGE_FLAGS_ELEMENT_MASK); #ifdef INVARIANTS switch (type) { case MPI2_SGE_FLAGS_TRANSACTION_ELEMENT: { if (len != tc->DetailsLength + 4) panic("TC %p length %u or %zu?", tc, tc->DetailsLength + 4, len); } break; case MPI2_SGE_FLAGS_CHAIN_ELEMENT: /* Driver only uses 32-bit chain elements */ if (len != MPS_SGC_SIZE) panic("CHAIN %p length %u or %zu?", sgep, MPS_SGC_SIZE, len); break; case MPI2_SGE_FLAGS_SIMPLE_ELEMENT: /* Driver only uses 64-bit SGE simple elements */ sge = sgep; if (len != MPS_SGE64_SIZE) panic("SGE simple %p length %u or %zu?", sge, MPS_SGE64_SIZE, len); if (((sge->FlagsLength >> MPI2_SGE_FLAGS_SHIFT) & MPI2_SGE_FLAGS_ADDRESS_SIZE) == 0) panic("SGE simple %p flags %02x not marked 64-bit?", sge, sge->FlagsLength >> MPI2_SGE_FLAGS_SHIFT); break; default: panic("Unexpected SGE %p, flags %02x", tc, tc->Flags); } #endif /* * case 1: 1 more segment, enough room for it * case 2: 2 more segments, enough room for both * case 3: >=2 more segments, only enough room for 1 and a chain * case 4: >=1 more segment, enough room for only a chain * case 5: >=1 more segment, no room for anything (error) */ /* * There should be room for at least a chain element, or this * code is buggy. Case (5). */ if (cm->cm_sglsize < MPS_SGC_SIZE) panic("MPS: Need SGE Error Code\n"); if (segsleft >= 2 && cm->cm_sglsize < len + MPS_SGC_SIZE + MPS_SGE64_SIZE) { /* * There are 2 or more segments left to add, and only * enough room for 1 and a chain. Case (3). * * Mark as last element in this chain if necessary. */ if (type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) { sge->FlagsLength |= (MPI2_SGE_FLAGS_LAST_ELEMENT << MPI2_SGE_FLAGS_SHIFT); } /* * Add the item then a chain. Do the chain now, * rather than on the next iteration, to simplify * understanding the code. */ cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (mps_add_chain(cm)); } if (segsleft >= 1 && cm->cm_sglsize < len + MPS_SGC_SIZE) { /* * 1 or more segment, enough room for only a chain. * Hope the previous element wasn't a Simple entry * that needed to be marked with * MPI2_SGE_FLAGS_LAST_ELEMENT. Case (4). */ if ((error = mps_add_chain(cm)) != 0) return (error); } #ifdef INVARIANTS /* Case 1: 1 more segment, enough room for it. */ if (segsleft == 1 && cm->cm_sglsize < len) panic("1 seg left and no room? %u versus %zu", cm->cm_sglsize, len); /* Case 2: 2 more segments, enough room for both */ if (segsleft == 2 && cm->cm_sglsize < len + MPS_SGE64_SIZE) panic("2 segs left and no room? %u versus %zu", cm->cm_sglsize, len); #endif if (segsleft == 1 && type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) { /* * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->FlagsLength & 0x00FFFFFF; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->FlagsLength = cm->cm_out_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); } sge->FlagsLength = saved_buf_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); if (cm->cm_flags & MPS_CM_FLAGS_DATAIN) { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << MPI2_SGE_FLAGS_SHIFT); } else { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << MPI2_SGE_FLAGS_SHIFT); } sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; } cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (0); } /* * Add one dma segment to the scatter-gather list for a command. */ int mps_add_dmaseg(struct mps_command *cm, vm_paddr_t pa, size_t len, u_int flags, int segsleft) { MPI2_SGE_SIMPLE64 sge; /* * This driver always uses 64-bit address elements for simplicity. */ flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING; /* Set Endian safe macro in mps_push_sge */ sge.FlagsLength = len | (flags << MPI2_SGE_FLAGS_SHIFT); mps_from_u64(pa, &sge.Address); return (mps_push_sge(cm, &sge, sizeof sge, segsleft)); } static void mps_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mps_softc *sc; struct mps_command *cm; u_int i, dir, sflags; cm = (struct mps_command *)arg; sc = cm->cm_sc; /* * In this case, just print out a warning and let the chip tell the * user they did the wrong thing. */ if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { mps_dprint(sc, MPS_ERROR, "%s: warning: busdma returned %d segments, " "more than the %d allowed\n", __func__, nsegs, cm->cm_max_segs); } /* * Set up DMA direction flags. Bi-directional requests are also handled * here. In that case, both direction flags will be set. */ sflags = 0; if (cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) { /* * We have to add a special case for SMP passthrough, there * is no easy way to generically handle it. The first * S/G element is used for the command (therefore the * direction bit needs to be set). The second one is used * for the reply. We'll leave it to the caller to make * sure we only have two buffers. */ /* * Even though the busdma man page says it doesn't make * sense to have both direction flags, it does in this case. * We have one s/g element being accessed in each direction. */ dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; /* * Set the direction flag on the first buffer in the SMP * passthrough request. We'll clear it for the second one. */ sflags |= MPI2_SGE_FLAGS_DIRECTION | MPI2_SGE_FLAGS_END_OF_BUFFER; } else if (cm->cm_flags & MPS_CM_FLAGS_DATAOUT) { sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; dir = BUS_DMASYNC_PREWRITE; } else dir = BUS_DMASYNC_PREREAD; for (i = 0; i < nsegs; i++) { if ((cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) && (i != 0)) { sflags &= ~MPI2_SGE_FLAGS_DIRECTION; } error = mps_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, sflags, nsegs - i); if (error != 0) { /* Resource shortage, roll back! */ mps_dprint(sc, MPS_INFO, "Out of chain frames, " "consider increasing hw.mps.max_chains.\n"); cm->cm_flags |= MPS_CM_FLAGS_CHAIN_FAILED; mps_complete_command(sc, cm); return; } } bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); mps_enqueue_request(sc, cm); return; } static void mps_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { mps_data_cb(arg, segs, nsegs, error); } /* * This is the routine to enqueue commands ansynchronously. * Note that the only error path here is from bus_dmamap_load(), which can * return EINPROGRESS if it is waiting for resources. Other than this, it's * assumed that if you have a command in-hand, then you have enough credits * to use it. */ int mps_map_command(struct mps_softc *sc, struct mps_command *cm) { MPI2_SGE_SIMPLE32 *sge; int error = 0; if (cm->cm_flags & MPS_CM_FLAGS_USE_UIO) { error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, &cm->cm_uio, mps_data_cb2, cm, 0); } else if (cm->cm_flags & MPS_CM_FLAGS_USE_CCB) { error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, mps_data_cb, cm, 0); } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_length, mps_data_cb, cm, 0); } else { /* Add a zero-length element as needed */ if (cm->cm_sge != NULL) { sge = (MPI2_SGE_SIMPLE32 *)cm->cm_sge; sge->FlagsLength = htole32((MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_SIMPLE_ELEMENT) << MPI2_SGE_FLAGS_SHIFT); sge->Address = 0; } mps_enqueue_request(sc, cm); } return (error); } /* * This is the routine to enqueue commands synchronously. An error of * EINPROGRESS from mps_map_command() is ignored since the command will * be executed and enqueued automatically. Other errors come from msleep(). */ int mps_wait_command(struct mps_softc *sc, struct mps_command *cm, int timeout, int sleep_flag) { int error, rc; struct timeval cur_time, start_time; if (sc->mps_flags & MPS_FLAGS_DIAGRESET) return EBUSY; cm->cm_complete = NULL; cm->cm_flags |= (MPS_CM_FLAGS_WAKEUP + MPS_CM_FLAGS_POLLED); error = mps_map_command(sc, cm); if ((error != 0) && (error != EINPROGRESS)) return (error); // Check for context and wait for 50 mSec at a time until time has // expired or the command has finished. If msleep can't be used, need // to poll. if (curthread->td_pflags & TDP_NOSLEEPING) sleep_flag = NO_SLEEP; getmicrotime(&start_time); if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) { error = msleep(cm, &sc->mps_mtx, 0, "mpswait", timeout*hz); } else { while ((cm->cm_flags & MPS_CM_FLAGS_COMPLETE) == 0) { mps_intr_locked(sc); if (sleep_flag == CAN_SLEEP) pause("mpswait", hz/20); else DELAY(50000); getmicrotime(&cur_time); if ((cur_time.tv_sec - start_time.tv_sec) > timeout) { error = EWOULDBLOCK; break; } } } if (error == EWOULDBLOCK) { mps_dprint(sc, MPS_FAULT, "Calling Reinit from %s\n", __func__); rc = mps_reinit(sc); mps_dprint(sc, MPS_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); error = ETIMEDOUT; } return (error); } /* * This is the routine to enqueue a command synchonously and poll for * completion. Its use should be rare. */ int mps_request_polled(struct mps_softc *sc, struct mps_command *cm) { int error, timeout = 0, rc; error = 0; cm->cm_flags |= MPS_CM_FLAGS_POLLED; cm->cm_complete = NULL; mps_map_command(sc, cm); while ((cm->cm_flags & MPS_CM_FLAGS_COMPLETE) == 0) { mps_intr_locked(sc); DELAY(50 * 1000); if (timeout++ > 1000) { mps_dprint(sc, MPS_FAULT, "polling failed\n"); error = ETIMEDOUT; break; } } if (error) { mps_dprint(sc, MPS_FAULT, "Calling Reinit from %s\n", __func__); rc = mps_reinit(sc); mps_dprint(sc, MPS_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); } return (error); } /* * The MPT driver had a verbose interface for config pages. In this driver, * reduce it to much simplier terms, similar to the Linux driver. */ int mps_read_config_page(struct mps_softc *sc, struct mps_config_params *params) { MPI2_CONFIG_REQUEST *req; struct mps_command *cm; int error; if (sc->mps_flags & MPS_FLAGS_BUSY) { return (EBUSY); } cm = mps_alloc_command(sc); if (cm == NULL) { return (EBUSY); } req = (MPI2_CONFIG_REQUEST *)cm->cm_req; req->Function = MPI2_FUNCTION_CONFIG; req->Action = params->action; req->SGLFlags = 0; req->ChainOffset = 0; req->PageAddress = params->page_address; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Ext; req->ExtPageType = hdr->ExtPageType; req->ExtPageLength = hdr->ExtPageLength; req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; req->Header.PageLength = 0; /* Must be set to zero */ req->Header.PageNumber = hdr->PageNumber; req->Header.PageVersion = hdr->PageVersion; } else { MPI2_CONFIG_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Struct; req->Header.PageType = hdr->PageType; req->Header.PageNumber = hdr->PageNumber; req->Header.PageLength = hdr->PageLength; req->Header.PageVersion = hdr->PageVersion; } cm->cm_data = params->buffer; cm->cm_length = params->length; cm->cm_sge = &req->PageBufferSGE; cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); cm->cm_flags = MPS_CM_FLAGS_SGE_SIMPLE | MPS_CM_FLAGS_DATAIN; cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_complete_data = params; if (params->callback != NULL) { cm->cm_complete = mps_config_complete; return (mps_map_command(sc, cm)); } else { error = mps_wait_command(sc, cm, 0, CAN_SLEEP); if (error) { mps_dprint(sc, MPS_FAULT, "Error %d reading config page\n", error); mps_free_command(sc, cm); return (error); } mps_config_complete(sc, cm); } return (0); } int mps_write_config_page(struct mps_softc *sc, struct mps_config_params *params) { return (EINVAL); } static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm) { MPI2_CONFIG_REPLY *reply; struct mps_config_params *params; MPS_FUNCTRACE(sc); params = cm->cm_complete_data; if (cm->cm_data != NULL) { bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); } /* * XXX KDM need to do more error recovery? This results in the * device in question not getting probed. */ if ((cm->cm_flags & MPS_CM_FLAGS_ERROR_MASK) != 0) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; if (reply == NULL) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } params->status = reply->IOCStatus; if (params->hdr.Ext.ExtPageType != 0) { params->hdr.Ext.ExtPageType = reply->ExtPageType; params->hdr.Ext.ExtPageLength = reply->ExtPageLength; } else { params->hdr.Struct.PageType = reply->Header.PageType; params->hdr.Struct.PageNumber = reply->Header.PageNumber; params->hdr.Struct.PageLength = reply->Header.PageLength; params->hdr.Struct.PageVersion = reply->Header.PageVersion; } done: mps_free_command(sc, cm); if (params->callback != NULL) params->callback(sc, params); return; } Index: stable/9/sys/dev/mrsas/mrsas.c =================================================================== --- stable/9/sys/dev/mrsas/mrsas.c (revision 273911) +++ stable/9/sys/dev/mrsas/mrsas.c (revision 273912) @@ -1,3672 +1,3672 @@ /* * Copyright (c) 2014, LSI Corp. * All rights reserved. * Author: Marian Choy * Support: freebsdraid@lsi.com * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. Neither the name of the nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies,either expressed or implied, of the FreeBSD Project. * * Send feedback to: * Mail to: LSI Corporation, 1621 Barber Lane, Milpitas, CA 95035 * ATTN: MegaRaid FreeBSD * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* * Function prototypes */ static d_open_t mrsas_open; static d_close_t mrsas_close; static d_read_t mrsas_read; static d_write_t mrsas_write; static d_ioctl_t mrsas_ioctl; static struct mrsas_ident *mrsas_find_ident(device_t); static void mrsas_shutdown_ctlr(struct mrsas_softc *sc, u_int32_t opcode); static void mrsas_flush_cache(struct mrsas_softc *sc); static void mrsas_reset_reply_desc(struct mrsas_softc *sc); static void mrsas_ocr_thread(void *arg); static int mrsas_get_map_info(struct mrsas_softc *sc); static int mrsas_get_ld_map_info(struct mrsas_softc *sc); static int mrsas_sync_map_info(struct mrsas_softc *sc); static int mrsas_get_pd_list(struct mrsas_softc *sc); static int mrsas_get_ld_list(struct mrsas_softc *sc); static int mrsas_setup_irq(struct mrsas_softc *sc); static int mrsas_alloc_mem(struct mrsas_softc *sc); static int mrsas_init_fw(struct mrsas_softc *sc); static int mrsas_setup_raidmap(struct mrsas_softc *sc); static int mrsas_complete_cmd(struct mrsas_softc *sc); static int mrsas_clear_intr(struct mrsas_softc *sc); static int mrsas_get_ctrl_info(struct mrsas_softc *sc, struct mrsas_ctrl_info *ctrl_info); static int mrsas_issue_blocked_abort_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd_to_abort); u_int32_t mrsas_read_reg(struct mrsas_softc *sc, int offset); u_int8_t mrsas_build_mptmfi_passthru(struct mrsas_softc *sc, struct mrsas_mfi_cmd *mfi_cmd); int mrsas_transition_to_ready(struct mrsas_softc *sc, int ocr); int mrsas_init_adapter(struct mrsas_softc *sc); int mrsas_alloc_mpt_cmds(struct mrsas_softc *sc); int mrsas_alloc_ioc_cmd(struct mrsas_softc *sc); int mrsas_alloc_ctlr_info_cmd(struct mrsas_softc *sc); int mrsas_ioc_init(struct mrsas_softc *sc); int mrsas_bus_scan(struct mrsas_softc *sc); int mrsas_issue_dcmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); int mrsas_issue_polled(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); int mrsas_reset_ctrl(struct mrsas_softc *sc); int mrsas_wait_for_outstanding(struct mrsas_softc *sc); int mrsas_issue_blocked_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); int mrsas_alloc_tmp_dcmd(struct mrsas_softc *sc, struct mrsas_tmp_dcmd *tcmd, int size); void mrsas_release_mfi_cmd(struct mrsas_mfi_cmd *cmd); void mrsas_wakeup(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); void mrsas_complete_aen(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); void mrsas_complete_abort(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); void mrsas_disable_intr(struct mrsas_softc *sc); void mrsas_enable_intr(struct mrsas_softc *sc); void mrsas_free_ioc_cmd(struct mrsas_softc *sc); void mrsas_free_mem(struct mrsas_softc *sc); void mrsas_free_tmp_dcmd(struct mrsas_tmp_dcmd *tmp); void mrsas_isr(void *arg); void mrsas_teardown_intr(struct mrsas_softc *sc); void mrsas_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error); void mrsas_kill_hba (struct mrsas_softc *sc); void mrsas_aen_handler(struct mrsas_softc *sc); void mrsas_write_reg(struct mrsas_softc *sc, int offset, u_int32_t value); void mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo, u_int32_t req_desc_hi); void mrsas_free_ctlr_info_cmd(struct mrsas_softc *sc); void mrsas_complete_mptmfi_passthru(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd, u_int8_t status); void mrsas_map_mpt_cmd_status(struct mrsas_mpt_cmd *cmd, u_int8_t status, u_int8_t extStatus); struct mrsas_mfi_cmd* mrsas_get_mfi_cmd(struct mrsas_softc *sc); MRSAS_REQUEST_DESCRIPTOR_UNION * mrsas_build_mpt_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); extern int mrsas_cam_attach(struct mrsas_softc *sc); extern void mrsas_cam_detach(struct mrsas_softc *sc); extern void mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd); extern void mrsas_free_frame(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd); extern int mrsas_alloc_mfi_cmds(struct mrsas_softc *sc); extern void mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd); extern struct mrsas_mpt_cmd *mrsas_get_mpt_cmd(struct mrsas_softc *sc); extern int mrsas_passthru(struct mrsas_softc *sc, void *arg); extern uint8_t MR_ValidateMapInfo(struct mrsas_softc *sc); extern u_int16_t MR_GetLDTgtId(u_int32_t ld, MR_FW_RAID_MAP_ALL *map); extern MR_LD_RAID *MR_LdRaidGet(u_int32_t ld, MR_FW_RAID_MAP_ALL *map); extern void mrsas_xpt_freeze(struct mrsas_softc *sc); extern void mrsas_xpt_release(struct mrsas_softc *sc); extern MRSAS_REQUEST_DESCRIPTOR_UNION *mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index); extern int mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim); static int mrsas_alloc_evt_log_info_cmd(struct mrsas_softc *sc); static void mrsas_free_evt_log_info_cmd(struct mrsas_softc *sc); SYSCTL_NODE(_hw, OID_AUTO, mrsas, CTLFLAG_RD, 0, "MRSAS Driver Parameters"); /** * PCI device struct and table * */ typedef struct mrsas_ident { uint16_t vendor; uint16_t device; uint16_t subvendor; uint16_t subdevice; const char *desc; } MRSAS_CTLR_ID; MRSAS_CTLR_ID device_table[] = { {0x1000, MRSAS_TBOLT, 0xffff, 0xffff, "LSI Thunderbolt SAS Controller"}, {0x1000, MRSAS_INVADER, 0xffff, 0xffff, "LSI Invader SAS Controller"}, {0x1000, MRSAS_FURY, 0xffff, 0xffff, "LSI Fury SAS Controller"}, {0, 0, 0, 0, NULL} }; /** * Character device entry points * */ static struct cdevsw mrsas_cdevsw = { .d_version = D_VERSION, .d_open = mrsas_open, .d_close = mrsas_close, .d_read = mrsas_read, .d_write = mrsas_write, .d_ioctl = mrsas_ioctl, .d_name = "mrsas", }; MALLOC_DEFINE(M_MRSAS, "mrsasbuf", "Buffers for the MRSAS driver"); /** * In the cdevsw routines, we find our softc by using the si_drv1 member * of struct cdev. We set this variable to point to our softc in our * attach routine when we create the /dev entry. */ int mrsas_open(struct cdev *dev, int oflags, int devtype, d_thread_t *td) { struct mrsas_softc *sc; sc = dev->si_drv1; return (0); } int mrsas_close(struct cdev *dev, int fflag, int devtype, d_thread_t *td) { struct mrsas_softc *sc; sc = dev->si_drv1; return (0); } int mrsas_read(struct cdev *dev, struct uio *uio, int ioflag) { struct mrsas_softc *sc; sc = dev->si_drv1; return (0); } int mrsas_write(struct cdev *dev, struct uio *uio, int ioflag) { struct mrsas_softc *sc; sc = dev->si_drv1; return (0); } /** * Register Read/Write Functions * */ void mrsas_write_reg(struct mrsas_softc *sc, int offset, u_int32_t value) { bus_space_tag_t bus_tag = sc->bus_tag; bus_space_handle_t bus_handle = sc->bus_handle; bus_space_write_4(bus_tag, bus_handle, offset, value); } u_int32_t mrsas_read_reg(struct mrsas_softc *sc, int offset) { bus_space_tag_t bus_tag = sc->bus_tag; bus_space_handle_t bus_handle = sc->bus_handle; return((u_int32_t)bus_space_read_4(bus_tag, bus_handle, offset)); } /** * Interrupt Disable/Enable/Clear Functions * */ void mrsas_disable_intr(struct mrsas_softc *sc) { u_int32_t mask = 0xFFFFFFFF; u_int32_t status; mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask), mask); /* Dummy read to force pci flush */ status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask)); } void mrsas_enable_intr(struct mrsas_softc *sc) { u_int32_t mask = MFI_FUSION_ENABLE_INTERRUPT_MASK; u_int32_t status; mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status), ~0); status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status)); mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask), ~mask); status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_mask)); } static int mrsas_clear_intr(struct mrsas_softc *sc) { u_int32_t status, fw_status, fw_state; /* Read received interrupt */ status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status)); /* If FW state change interrupt is received, write to it again to clear */ if (status & MRSAS_FW_STATE_CHNG_INTERRUPT) { fw_status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); fw_state = fw_status & MFI_STATE_MASK; if (fw_state == MFI_STATE_FAULT) { device_printf(sc->mrsas_dev, "FW is in FAULT state!\n"); if(sc->ocr_thread_active) wakeup(&sc->ocr_chan); } mrsas_write_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status), status); mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_intr_status)); return(1); } /* Not our interrupt, so just return */ if (!(status & MFI_FUSION_ENABLE_INTERRUPT_MASK)) return(0); /* We got a reply interrupt */ return(1); } /** * PCI Support Functions * */ static struct mrsas_ident * mrsas_find_ident(device_t dev) { struct mrsas_ident *pci_device; for (pci_device=device_table; pci_device->vendor != 0; pci_device++) { if ((pci_device->vendor == pci_get_vendor(dev)) && (pci_device->device == pci_get_device(dev)) && ((pci_device->subvendor == pci_get_subvendor(dev)) || (pci_device->subvendor == 0xffff)) && ((pci_device->subdevice == pci_get_subdevice(dev)) || (pci_device->subdevice == 0xffff))) return (pci_device); } return (NULL); } static int mrsas_probe(device_t dev) { static u_int8_t first_ctrl = 1; struct mrsas_ident *id; if ((id = mrsas_find_ident(dev)) != NULL) { if (first_ctrl) { printf("LSI MegaRAID SAS FreeBSD mrsas driver version: %s\n", MRSAS_VERSION); first_ctrl = 0; } device_set_desc(dev, id->desc); /* between BUS_PROBE_DEFAULT and BUS_PROBE_LOW_PRIORITY */ return (-30); } return (ENXIO); } /** * mrsas_setup_sysctl: setup sysctl values for mrsas * input: Adapter instance soft state * * Setup sysctl entries for mrsas driver. */ static void mrsas_setup_sysctl(struct mrsas_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MRSAS controller %d", device_get_unit(sc->mrsas_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mrsas_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mrsas_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mrsas_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mrsas), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_ocr", CTLFLAG_RW, &sc->disableOnlineCtrlReset, 0, "Disable the use of OCR"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RD, MRSAS_VERSION, strlen(MRSAS_VERSION), "driver version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "reset_count", CTLFLAG_RD, &sc->reset_count, 0, "number of ocr from start of the day"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "fw_outstanding", CTLFLAG_RD, - &sc->fw_outstanding, 0, "FW outstanding commands"); + &sc->fw_outstanding.val_rdonly, 0, "FW outstanding commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "Max FW outstanding commands"); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "mrsas_debug", CTLFLAG_RW, &sc->mrsas_debug, 0, "Driver debug level"); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "mrsas_io_timeout", CTLFLAG_RW, &sc->mrsas_io_timeout, 0, "Driver IO timeout value in mili-second."); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "mrsas_fw_fault_check_delay", CTLFLAG_RW, &sc->mrsas_fw_fault_check_delay, 0, "FW fault check thread delay in seconds. "); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "reset_in_progress", CTLFLAG_RD, &sc->reset_in_progress, 0, "ocr in progress status"); } /** * mrsas_get_tunables: get tunable parameters. * input: Adapter instance soft state * * Get tunable parameters. This will help to debug driver at boot time. */ static void mrsas_get_tunables(struct mrsas_softc *sc) { char tmpstr[80]; /* XXX default to some debugging for now */ sc->mrsas_debug = MRSAS_FAULT; sc->mrsas_io_timeout = MRSAS_IO_TIMEOUT; sc->mrsas_fw_fault_check_delay = 1; sc->reset_count = 0; sc->reset_in_progress = 0; /* * Grab the global variables. */ TUNABLE_INT_FETCH("hw.mrsas.debug_level", &sc->mrsas_debug); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mrsas.%d.debug_level", device_get_unit(sc->mrsas_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->mrsas_debug); } /** * mrsas_alloc_evt_log_info cmd: Allocates memory to get event log information. * Used to get sequence number at driver load time. * input: Adapter soft state * * Allocates DMAable memory for the event log info internal command. */ int mrsas_alloc_evt_log_info_cmd(struct mrsas_softc *sc) { int el_info_size; /* Allocate get event log info command */ el_info_size = sizeof(struct mrsas_evt_log_info); if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 1, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg el_info_size, // maxsize 1, // msegments el_info_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->el_info_tag)) { device_printf(sc->mrsas_dev, "Cannot allocate event log info tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->el_info_tag, (void **)&sc->el_info_mem, BUS_DMA_NOWAIT, &sc->el_info_dmamap)) { device_printf(sc->mrsas_dev, "Cannot allocate event log info cmd mem\n"); return (ENOMEM); } if (bus_dmamap_load(sc->el_info_tag, sc->el_info_dmamap, sc->el_info_mem, el_info_size, mrsas_addr_cb, &sc->el_info_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load event log info cmd mem\n"); return (ENOMEM); } memset(sc->el_info_mem, 0, el_info_size); return (0); } /** * mrsas_free_evt_info_cmd: Free memory for Event log info command * input: Adapter soft state * * Deallocates memory for the event log info internal command. */ void mrsas_free_evt_log_info_cmd(struct mrsas_softc *sc) { if (sc->el_info_phys_addr) bus_dmamap_unload(sc->el_info_tag, sc->el_info_dmamap); if (sc->el_info_mem != NULL) bus_dmamem_free(sc->el_info_tag, sc->el_info_mem, sc->el_info_dmamap); if (sc->el_info_tag != NULL) bus_dma_tag_destroy(sc->el_info_tag); } /** * mrsas_get_seq_num: Get latest event sequence number * @sc: Adapter soft state * @eli: Firmware event log sequence number information. * Firmware maintains a log of all events in a non-volatile area. * Driver get the sequence number using DCMD * "MR_DCMD_CTRL_EVENT_GET_INFO" at driver load time. */ static int mrsas_get_seq_num(struct mrsas_softc *sc, struct mrsas_evt_log_info *eli) { struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Failed to get a free cmd\n"); return -ENOMEM; } dcmd = &cmd->frame->dcmd; if (mrsas_alloc_evt_log_info_cmd(sc) != SUCCESS) { device_printf(sc->mrsas_dev, "Cannot allocate evt log info cmd\n"); mrsas_release_mfi_cmd(cmd); return -ENOMEM; } memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = sizeof(struct mrsas_evt_log_info); dcmd->opcode = MR_DCMD_CTRL_EVENT_GET_INFO; dcmd->sgl.sge32[0].phys_addr = sc->el_info_phys_addr; dcmd->sgl.sge32[0].length = sizeof(struct mrsas_evt_log_info); mrsas_issue_blocked_cmd(sc, cmd); /* * Copy the data back into callers buffer */ memcpy(eli, sc->el_info_mem, sizeof(struct mrsas_evt_log_info)); mrsas_free_evt_log_info_cmd(sc); mrsas_release_mfi_cmd(cmd); return 0; } /** * mrsas_register_aen: Register for asynchronous event notification * @sc: Adapter soft state * @seq_num: Starting sequence number * @class_locale: Class of the event * This function subscribes for events beyond the @seq_num * and type @class_locale. * * */ static int mrsas_register_aen(struct mrsas_softc *sc, u_int32_t seq_num, u_int32_t class_locale_word) { int ret_val; struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; union mrsas_evt_class_locale curr_aen; union mrsas_evt_class_locale prev_aen; /* * If there an AEN pending already (aen_cmd), check if the * class_locale of that pending AEN is inclusive of the new * AEN request we currently have. If it is, then we don't have * to do anything. In other words, whichever events the current * AEN request is subscribing to, have already been subscribed * to. * If the old_cmd is _not_ inclusive, then we have to abort * that command, form a class_locale that is superset of both * old and current and re-issue to the FW * */ curr_aen.word = class_locale_word; if (sc->aen_cmd) { prev_aen.word = sc->aen_cmd->frame->dcmd.mbox.w[1]; /* * A class whose enum value is smaller is inclusive of all * higher values. If a PROGRESS (= -1) was previously * registered, then a new registration requests for higher * classes need not be sent to FW. They are automatically * included. * Locale numbers don't have such hierarchy. They are bitmap values */ if ((prev_aen.members.class <= curr_aen.members.class) && !((prev_aen.members.locale & curr_aen.members.locale) ^ curr_aen.members.locale)) { /* * Previously issued event registration includes * current request. Nothing to do. */ return 0; } else { curr_aen.members.locale |= prev_aen.members.locale; if (prev_aen.members.class < curr_aen.members.class) curr_aen.members.class = prev_aen.members.class; sc->aen_cmd->abort_aen = 1; ret_val = mrsas_issue_blocked_abort_cmd(sc, sc->aen_cmd); if (ret_val) { printf("mrsas: Failed to abort " "previous AEN command\n"); return ret_val; } } } cmd = mrsas_get_mfi_cmd(sc); if (!cmd) return -ENOMEM; dcmd = &cmd->frame->dcmd; memset(sc->evt_detail_mem, 0, sizeof(struct mrsas_evt_detail)); /* * Prepare DCMD for aen registration */ memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = sizeof(struct mrsas_evt_detail); dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT; dcmd->mbox.w[0] = seq_num; sc->last_seq_num = seq_num; dcmd->mbox.w[1] = curr_aen.word; dcmd->sgl.sge32[0].phys_addr = (u_int32_t) sc->evt_detail_phys_addr; dcmd->sgl.sge32[0].length = sizeof(struct mrsas_evt_detail); if (sc->aen_cmd != NULL) { mrsas_release_mfi_cmd(cmd); return 0; } /* * Store reference to the cmd used to register for AEN. When an * application wants us to register for AEN, we have to abort this * cmd and re-register with a new EVENT LOCALE supplied by that app */ sc->aen_cmd = cmd; /* Issue the aen registration frame */ if (mrsas_issue_dcmd(sc, cmd)){ device_printf(sc->mrsas_dev, "Cannot issue AEN DCMD command.\n"); return(1); } return 0; } /** * mrsas_start_aen - Subscribes to AEN during driver load time * @instance: Adapter soft state */ static int mrsas_start_aen(struct mrsas_softc *sc) { struct mrsas_evt_log_info eli; union mrsas_evt_class_locale class_locale; /* Get the latest sequence number from FW*/ memset(&eli, 0, sizeof(eli)); if (mrsas_get_seq_num(sc, &eli)) return -1; /* Register AEN with FW for latest sequence number plus 1*/ class_locale.members.reserved = 0; class_locale.members.locale = MR_EVT_LOCALE_ALL; class_locale.members.class = MR_EVT_CLASS_DEBUG; return mrsas_register_aen(sc, eli.newest_seq_num + 1, class_locale.word); } /** * mrsas_attach: PCI entry point * input: device struct pointer * * Performs setup of PCI and registers, initializes mutexes and * linked lists, registers interrupts and CAM, and initializes * the adapter/controller to its proper state. */ static int mrsas_attach(device_t dev) { struct mrsas_softc *sc = device_get_softc(dev); uint32_t cmd, bar, error; /* Look up our softc and initialize its fields. */ sc->mrsas_dev = dev; sc->device_id = pci_get_device(dev); mrsas_get_tunables(sc); /* * Set up PCI and registers */ cmd = pci_read_config(dev, PCIR_COMMAND, 2); if ( (cmd & PCIM_CMD_PORTEN) == 0) { return (ENXIO); } /* Force the busmaster enable bit on. */ cmd |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, cmd, 2); //bar = pci_read_config(dev, MRSAS_PCI_BAR0, 4); bar = pci_read_config(dev, MRSAS_PCI_BAR1, 4); sc->reg_res_id = MRSAS_PCI_BAR1; /* BAR1 offset */ if ((sc->reg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &(sc->reg_res_id), 0, ~0, 1, RF_ACTIVE)) == NULL) { device_printf(dev, "Cannot allocate PCI registers\n"); goto attach_fail; } sc->bus_tag = rman_get_bustag(sc->reg_res); sc->bus_handle = rman_get_bushandle(sc->reg_res); /* Intialize mutexes */ mtx_init(&sc->sim_lock, "mrsas_sim_lock", NULL, MTX_DEF); mtx_init(&sc->pci_lock, "mrsas_pci_lock", NULL, MTX_DEF); mtx_init(&sc->io_lock, "mrsas_io_lock", NULL, MTX_DEF); mtx_init(&sc->aen_lock, "mrsas_aen_lock", NULL, MTX_DEF); mtx_init(&sc->ioctl_lock, "mrsas_ioctl_lock", NULL, MTX_SPIN); mtx_init(&sc->mpt_cmd_pool_lock, "mrsas_mpt_cmd_pool_lock", NULL, MTX_DEF); mtx_init(&sc->mfi_cmd_pool_lock, "mrsas_mfi_cmd_pool_lock", NULL, MTX_DEF); mtx_init(&sc->raidmap_lock, "mrsas_raidmap_lock", NULL, MTX_DEF); /* Intialize linked list */ TAILQ_INIT(&sc->mrsas_mpt_cmd_list_head); TAILQ_INIT(&sc->mrsas_mfi_cmd_list_head); atomic_set(&sc->fw_outstanding,0); sc->io_cmds_highwater = 0; /* Create a /dev entry for this device. */ sc->mrsas_cdev = make_dev(&mrsas_cdevsw, device_get_unit(dev), UID_ROOT, GID_OPERATOR, (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP), "mrsas%u", device_get_unit(dev)); if (sc->mrsas_cdev) sc->mrsas_cdev->si_drv1 = sc; sc->adprecovery = MRSAS_HBA_OPERATIONAL; sc->UnevenSpanSupport = 0; /* Initialize Firmware */ if (mrsas_init_fw(sc) != SUCCESS) { goto attach_fail_fw; } /* Register SCSI mid-layer */ if ((mrsas_cam_attach(sc) != SUCCESS)) { goto attach_fail_cam; } /* Register IRQs */ if (mrsas_setup_irq(sc) != SUCCESS) { goto attach_fail_irq; } /* Enable Interrupts */ mrsas_enable_intr(sc); error = mrsas_kproc_create(mrsas_ocr_thread, sc, &sc->ocr_thread, 0, 0, "mrsas_ocr%d", device_get_unit(sc->mrsas_dev)); if (error) { printf("Error %d starting rescan thread\n", error); goto attach_fail_irq; } mrsas_setup_sysctl(sc); /* Initiate AEN (Asynchronous Event Notification)*/ if (mrsas_start_aen(sc)) { printf("Error: start aen failed\n"); goto fail_start_aen; } return (0); fail_start_aen: attach_fail_irq: mrsas_teardown_intr(sc); attach_fail_cam: mrsas_cam_detach(sc); attach_fail_fw: //attach_fail_raidmap: mrsas_free_mem(sc); mtx_destroy(&sc->sim_lock); mtx_destroy(&sc->aen_lock); mtx_destroy(&sc->pci_lock); mtx_destroy(&sc->io_lock); mtx_destroy(&sc->ioctl_lock); mtx_destroy(&sc->mpt_cmd_pool_lock); mtx_destroy(&sc->mfi_cmd_pool_lock); mtx_destroy(&sc->raidmap_lock); attach_fail: destroy_dev(sc->mrsas_cdev); if (sc->reg_res){ bus_release_resource(sc->mrsas_dev, SYS_RES_MEMORY, sc->reg_res_id, sc->reg_res); } return (ENXIO); } /** * mrsas_detach: De-allocates and teardown resources * input: device struct pointer * * This function is the entry point for device disconnect and detach. It * performs memory de-allocations, shutdown of the controller and various * teardown and destroy resource functions. */ static int mrsas_detach(device_t dev) { struct mrsas_softc *sc; int i = 0; sc = device_get_softc(dev); sc->remove_in_progress = 1; if(sc->ocr_thread_active) wakeup(&sc->ocr_chan); while(sc->reset_in_progress){ i++; if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) { mrsas_dprint(sc, MRSAS_INFO, "[%2d]waiting for ocr to be finished\n",i); } pause("mr_shutdown", hz); } i = 0; while(sc->ocr_thread_active){ i++; if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) { mrsas_dprint(sc, MRSAS_INFO, "[%2d]waiting for " "mrsas_ocr thread to quit ocr %d\n",i, sc->ocr_thread_active); } pause("mr_shutdown", hz); } mrsas_flush_cache(sc); mrsas_shutdown_ctlr(sc, MR_DCMD_CTRL_SHUTDOWN); mrsas_disable_intr(sc); mrsas_cam_detach(sc); mrsas_teardown_intr(sc); mrsas_free_mem(sc); mtx_destroy(&sc->sim_lock); mtx_destroy(&sc->aen_lock); mtx_destroy(&sc->pci_lock); mtx_destroy(&sc->io_lock); mtx_destroy(&sc->ioctl_lock); mtx_destroy(&sc->mpt_cmd_pool_lock); mtx_destroy(&sc->mfi_cmd_pool_lock); mtx_destroy(&sc->raidmap_lock); if (sc->reg_res){ bus_release_resource(sc->mrsas_dev, SYS_RES_MEMORY, sc->reg_res_id, sc->reg_res); } destroy_dev(sc->mrsas_cdev); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); return (0); } /** * mrsas_free_mem: Frees allocated memory * input: Adapter instance soft state * * This function is called from mrsas_detach() to free previously allocated * memory. */ void mrsas_free_mem(struct mrsas_softc *sc) { int i; u_int32_t max_cmd; struct mrsas_mfi_cmd *mfi_cmd; struct mrsas_mpt_cmd *mpt_cmd; /* * Free RAID map memory */ for (i=0; i < 2; i++) { if (sc->raidmap_phys_addr[i]) bus_dmamap_unload(sc->raidmap_tag[i], sc->raidmap_dmamap[i]); if (sc->raidmap_mem[i] != NULL) bus_dmamem_free(sc->raidmap_tag[i], sc->raidmap_mem[i], sc->raidmap_dmamap[i]); if (sc->raidmap_tag[i] != NULL) bus_dma_tag_destroy(sc->raidmap_tag[i]); } /* * Free version buffer memroy */ if (sc->verbuf_phys_addr) bus_dmamap_unload(sc->verbuf_tag, sc->verbuf_dmamap); if (sc->verbuf_mem != NULL) bus_dmamem_free(sc->verbuf_tag, sc->verbuf_mem, sc->verbuf_dmamap); if (sc->verbuf_tag != NULL) bus_dma_tag_destroy(sc->verbuf_tag); /* * Free sense buffer memory */ if (sc->sense_phys_addr) bus_dmamap_unload(sc->sense_tag, sc->sense_dmamap); if (sc->sense_mem != NULL) bus_dmamem_free(sc->sense_tag, sc->sense_mem, sc->sense_dmamap); if (sc->sense_tag != NULL) bus_dma_tag_destroy(sc->sense_tag); /* * Free chain frame memory */ if (sc->chain_frame_phys_addr) bus_dmamap_unload(sc->chain_frame_tag, sc->chain_frame_dmamap); if (sc->chain_frame_mem != NULL) bus_dmamem_free(sc->chain_frame_tag, sc->chain_frame_mem, sc->chain_frame_dmamap); if (sc->chain_frame_tag != NULL) bus_dma_tag_destroy(sc->chain_frame_tag); /* * Free IO Request memory */ if (sc->io_request_phys_addr) bus_dmamap_unload(sc->io_request_tag, sc->io_request_dmamap); if (sc->io_request_mem != NULL) bus_dmamem_free(sc->io_request_tag, sc->io_request_mem, sc->io_request_dmamap); if (sc->io_request_tag != NULL) bus_dma_tag_destroy(sc->io_request_tag); /* * Free Reply Descriptor memory */ if (sc->reply_desc_phys_addr) bus_dmamap_unload(sc->reply_desc_tag, sc->reply_desc_dmamap); if (sc->reply_desc_mem != NULL) bus_dmamem_free(sc->reply_desc_tag, sc->reply_desc_mem, sc->reply_desc_dmamap); if (sc->reply_desc_tag != NULL) bus_dma_tag_destroy(sc->reply_desc_tag); /* * Free event detail memory */ if (sc->evt_detail_phys_addr) bus_dmamap_unload(sc->evt_detail_tag, sc->evt_detail_dmamap); if (sc->evt_detail_mem != NULL) bus_dmamem_free(sc->evt_detail_tag, sc->evt_detail_mem, sc->evt_detail_dmamap); if (sc->evt_detail_tag != NULL) bus_dma_tag_destroy(sc->evt_detail_tag); /* * Free MFI frames */ if (sc->mfi_cmd_list) { for (i = 0; i < MRSAS_MAX_MFI_CMDS; i++) { mfi_cmd = sc->mfi_cmd_list[i]; mrsas_free_frame(sc, mfi_cmd); } } if (sc->mficmd_frame_tag != NULL) bus_dma_tag_destroy(sc->mficmd_frame_tag); /* * Free MPT internal command list */ max_cmd = sc->max_fw_cmds; if (sc->mpt_cmd_list) { for (i = 0; i < max_cmd; i++) { mpt_cmd = sc->mpt_cmd_list[i]; bus_dmamap_destroy(sc->data_tag, mpt_cmd->data_dmamap); free(sc->mpt_cmd_list[i], M_MRSAS); } free(sc->mpt_cmd_list, M_MRSAS); sc->mpt_cmd_list = NULL; } /* * Free MFI internal command list */ if (sc->mfi_cmd_list) { for (i = 0; i < MRSAS_MAX_MFI_CMDS; i++) { free(sc->mfi_cmd_list[i], M_MRSAS); } free(sc->mfi_cmd_list, M_MRSAS); sc->mfi_cmd_list = NULL; } /* * Free request descriptor memory */ free(sc->req_desc, M_MRSAS); sc->req_desc = NULL; /* * Destroy parent tag */ if (sc->mrsas_parent_tag != NULL) bus_dma_tag_destroy(sc->mrsas_parent_tag); } /** * mrsas_teardown_intr: Teardown interrupt * input: Adapter instance soft state * * This function is called from mrsas_detach() to teardown and release * bus interrupt resourse. */ void mrsas_teardown_intr(struct mrsas_softc *sc) { if (sc->intr_handle) bus_teardown_intr(sc->mrsas_dev, sc->mrsas_irq, sc->intr_handle); if (sc->mrsas_irq != NULL) bus_release_resource(sc->mrsas_dev, SYS_RES_IRQ, sc->irq_id, sc->mrsas_irq); sc->intr_handle = NULL; } /** * mrsas_suspend: Suspend entry point * input: Device struct pointer * * This function is the entry point for system suspend from the OS. */ static int mrsas_suspend(device_t dev) { struct mrsas_softc *sc; sc = device_get_softc(dev); return (0); } /** * mrsas_resume: Resume entry point * input: Device struct pointer * * This function is the entry point for system resume from the OS. */ static int mrsas_resume(device_t dev) { struct mrsas_softc *sc; sc = device_get_softc(dev); return (0); } /** * mrsas_ioctl: IOCtl commands entry point. * * This function is the entry point for IOCtls from the OS. It calls the * appropriate function for processing depending on the command received. */ static int mrsas_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, d_thread_t *td) { struct mrsas_softc *sc; int ret = 0, i = 0; sc = (struct mrsas_softc *)(dev->si_drv1); if (sc->remove_in_progress) { mrsas_dprint(sc, MRSAS_INFO, "Driver remove or shutdown called.\n"); return ENOENT; } mtx_lock_spin(&sc->ioctl_lock); if (!sc->reset_in_progress) { mtx_unlock_spin(&sc->ioctl_lock); goto do_ioctl; } /* Release ioclt_lock, and wait for OCR * to be finished */ mtx_unlock_spin(&sc->ioctl_lock); while(sc->reset_in_progress){ i++; if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) { mrsas_dprint(sc, MRSAS_INFO, "[%2d]waiting for " "OCR to be finished %d\n",i, sc->ocr_thread_active); } pause("mr_ioctl", hz); } do_ioctl: switch (cmd) { case MRSAS_IOC_FIRMWARE_PASS_THROUGH: ret = mrsas_passthru(sc, (void *)arg); break; case MRSAS_IOC_SCAN_BUS: ret = mrsas_bus_scan(sc); break; } return (ret); } /** * mrsas_setup_irq: Set up interrupt. * input: Adapter instance soft state * * This function sets up interrupts as a bus resource, with flags indicating * resource permitting contemporaneous sharing and for resource to activate * atomically. */ static int mrsas_setup_irq(struct mrsas_softc *sc) { sc->irq_id = 0; sc->mrsas_irq = bus_alloc_resource_any(sc->mrsas_dev, SYS_RES_IRQ, &sc->irq_id, RF_SHAREABLE | RF_ACTIVE); if (sc->mrsas_irq == NULL){ device_printf(sc->mrsas_dev, "Cannot allocate interrupt\n"); return (FAIL); } if (bus_setup_intr(sc->mrsas_dev, sc->mrsas_irq, INTR_MPSAFE|INTR_TYPE_CAM, NULL, mrsas_isr, sc, &sc->intr_handle)) { device_printf(sc->mrsas_dev, "Cannot set up interrupt\n"); return (FAIL); } return (0); } /* * mrsas_isr: ISR entry point * input: argument pointer * * This function is the interrupt service routine entry point. There * are two types of interrupts, state change interrupt and response * interrupt. If an interrupt is not ours, we just return. */ void mrsas_isr(void *arg) { struct mrsas_softc *sc = (struct mrsas_softc *)arg; int status; /* Clear FW state change interrupt */ status = mrsas_clear_intr(sc); /* Not our interrupt */ if (!status) return; /* If we are resetting, bail */ if (test_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags)) { printf(" Entered into ISR when OCR is going active. \n"); mrsas_clear_intr(sc); return; } /* Process for reply request and clear response interrupt */ if (mrsas_complete_cmd(sc) != SUCCESS) mrsas_clear_intr(sc); return; } /* * mrsas_complete_cmd: Process reply request * input: Adapter instance soft state * * This function is called from mrsas_isr() to process reply request and * clear response interrupt. Processing of the reply request entails * walking through the reply descriptor array for the command request * pended from Firmware. We look at the Function field to determine * the command type and perform the appropriate action. Before we * return, we clear the response interrupt. */ static int mrsas_complete_cmd(struct mrsas_softc *sc) { Mpi2ReplyDescriptorsUnion_t *desc; MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR *reply_desc; MRSAS_RAID_SCSI_IO_REQUEST *scsi_io_req; struct mrsas_mpt_cmd *cmd_mpt; struct mrsas_mfi_cmd *cmd_mfi; u_int8_t arm, reply_descript_type; u_int16_t smid, num_completed; u_int8_t status, extStatus; union desc_value desc_val; PLD_LOAD_BALANCE_INFO lbinfo; u_int32_t device_id; int threshold_reply_count = 0; /* If we have a hardware error, not need to continue */ if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR) return (DONE); desc = sc->reply_desc_mem; desc += sc->last_reply_idx; reply_desc = (MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR *)desc; desc_val.word = desc->Words; num_completed = 0; reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; /* Find our reply descriptor for the command and process */ while((desc_val.u.low != 0xFFFFFFFF) && (desc_val.u.high != 0xFFFFFFFF)) { smid = reply_desc->SMID; cmd_mpt = sc->mpt_cmd_list[smid -1]; scsi_io_req = (MRSAS_RAID_SCSI_IO_REQUEST *)cmd_mpt->io_request; status = scsi_io_req->RaidContext.status; extStatus = scsi_io_req->RaidContext.exStatus; switch (scsi_io_req->Function) { case MPI2_FUNCTION_SCSI_IO_REQUEST : /*Fast Path IO.*/ device_id = cmd_mpt->ccb_ptr->ccb_h.target_id; lbinfo = &sc->load_balance_info[device_id]; if (cmd_mpt->load_balance == MRSAS_LOAD_BALANCE_FLAG) { arm = lbinfo->raid1DevHandle[0] == scsi_io_req->DevHandle ? 0 : 1; atomic_dec(&lbinfo->scsi_pending_cmds[arm]); cmd_mpt->load_balance &= ~MRSAS_LOAD_BALANCE_FLAG; } //Fall thru and complete IO case MRSAS_MPI2_FUNCTION_LD_IO_REQUEST: mrsas_map_mpt_cmd_status(cmd_mpt, status, extStatus); mrsas_cmd_done(sc, cmd_mpt); scsi_io_req->RaidContext.status = 0; scsi_io_req->RaidContext.exStatus = 0; atomic_dec(&sc->fw_outstanding); break; case MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST: /*MFI command */ cmd_mfi = sc->mfi_cmd_list[cmd_mpt->sync_cmd_idx]; mrsas_complete_mptmfi_passthru(sc, cmd_mfi, status); cmd_mpt->flags = 0; mrsas_release_mpt_cmd(cmd_mpt); break; } sc->last_reply_idx++; if (sc->last_reply_idx >= sc->reply_q_depth) sc->last_reply_idx = 0; desc->Words = ~((uint64_t)0x00); /* set it back to all 0xFFFFFFFFs */ num_completed++; threshold_reply_count++; /* Get the next reply descriptor */ if (!sc->last_reply_idx) desc = sc->reply_desc_mem; else desc++; reply_desc = (MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR *)desc; desc_val.word = desc->Words; reply_descript_type = reply_desc->ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if(reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) break; /* * Write to reply post index after completing threshold reply count * and still there are more replies in reply queue pending to be * completed. */ if (threshold_reply_count >= THRESHOLD_REPLY_COUNT) { mrsas_write_reg(sc, offsetof(mrsas_reg_set, reply_post_host_index), sc->last_reply_idx); threshold_reply_count = 0; } } /* No match, just return */ if (num_completed == 0) return (DONE); /* Clear response interrupt */ mrsas_write_reg(sc, offsetof(mrsas_reg_set, reply_post_host_index),sc->last_reply_idx); return(0); } /* * mrsas_map_mpt_cmd_status: Allocate DMAable memory. * input: Adapter instance soft state * * This function is called from mrsas_complete_cmd(), for LD IO and FastPath IO. * It checks the command status and maps the appropriate CAM status for the CCB. */ void mrsas_map_mpt_cmd_status(struct mrsas_mpt_cmd *cmd, u_int8_t status, u_int8_t extStatus) { struct mrsas_softc *sc = cmd->sc; u_int8_t *sense_data; switch (status) { case MFI_STAT_OK: cmd->ccb_ptr->ccb_h.status = CAM_REQ_CMP; break; case MFI_STAT_SCSI_IO_FAILED: case MFI_STAT_SCSI_DONE_WITH_ERROR: cmd->ccb_ptr->ccb_h.status = CAM_SCSI_STATUS_ERROR; sense_data = (u_int8_t *)&cmd->ccb_ptr->csio.sense_data; if (sense_data) { /* For now just copy 18 bytes back */ memcpy(sense_data, cmd->sense, 18); cmd->ccb_ptr->csio.sense_len = 18; cmd->ccb_ptr->ccb_h.status |= CAM_AUTOSNS_VALID; } break; case MFI_STAT_LD_OFFLINE: case MFI_STAT_DEVICE_NOT_FOUND: if (cmd->ccb_ptr->ccb_h.target_lun) cmd->ccb_ptr->ccb_h.status |= CAM_LUN_INVALID; else cmd->ccb_ptr->ccb_h.status |= CAM_DEV_NOT_THERE; break; case MFI_STAT_CONFIG_SEQ_MISMATCH: /*send status to CAM layer to retry sending command without * decrementing retry counter*/ cmd->ccb_ptr->ccb_h.status |= CAM_REQUEUE_REQ; break; default: device_printf(sc->mrsas_dev, "FW cmd complete status %x\n", status); cmd->ccb_ptr->ccb_h.status = CAM_REQ_CMP_ERR; cmd->ccb_ptr->csio.scsi_status = status; } return; } /* * mrsas_alloc_mem: Allocate DMAable memory. * input: Adapter instance soft state * * This function creates the parent DMA tag and allocates DMAable memory. * DMA tag describes constraints of DMA mapping. Memory allocated is mapped * into Kernel virtual address. Callback argument is physical memory address. */ static int mrsas_alloc_mem(struct mrsas_softc *sc) { u_int32_t verbuf_size, io_req_size, reply_desc_size, sense_size, chain_frame_size, evt_detail_size; /* * Allocate parent DMA tag */ if (bus_dma_tag_create(NULL, /* parent */ 1, /* alignment */ 0, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MRSAS_MAX_IO_SIZE,/* maxsize */ MRSAS_MAX_SGL, /* nsegments */ MRSAS_MAX_IO_SIZE,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mrsas_parent_tag /* tag */ )) { device_printf(sc->mrsas_dev, "Cannot allocate parent DMA tag\n"); return(ENOMEM); } /* * Allocate for version buffer */ verbuf_size = MRSAS_MAX_NAME_LENGTH*(sizeof(bus_addr_t)); if (bus_dma_tag_create(sc->mrsas_parent_tag, // parent 1, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg verbuf_size, // maxsize 1, // msegments verbuf_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->verbuf_tag)) { device_printf(sc->mrsas_dev, "Cannot allocate verbuf DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->verbuf_tag, (void **)&sc->verbuf_mem, BUS_DMA_NOWAIT, &sc->verbuf_dmamap)) { device_printf(sc->mrsas_dev, "Cannot allocate verbuf memory\n"); return (ENOMEM); } bzero(sc->verbuf_mem, verbuf_size); if (bus_dmamap_load(sc->verbuf_tag, sc->verbuf_dmamap, sc->verbuf_mem, verbuf_size, mrsas_addr_cb, &sc->verbuf_phys_addr, BUS_DMA_NOWAIT)){ device_printf(sc->mrsas_dev, "Cannot load verbuf DMA map\n"); return(ENOMEM); } /* * Allocate IO Request Frames */ io_req_size = sc->io_frames_alloc_sz; if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 16, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg io_req_size, // maxsize 1, // msegments io_req_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->io_request_tag)) { device_printf(sc->mrsas_dev, "Cannot create IO request tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->io_request_tag, (void **)&sc->io_request_mem, BUS_DMA_NOWAIT, &sc->io_request_dmamap)) { device_printf(sc->mrsas_dev, "Cannot alloc IO request memory\n"); return (ENOMEM); } bzero(sc->io_request_mem, io_req_size); if (bus_dmamap_load(sc->io_request_tag, sc->io_request_dmamap, sc->io_request_mem, io_req_size, mrsas_addr_cb, &sc->io_request_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load IO request memory\n"); return (ENOMEM); } /* * Allocate Chain Frames */ chain_frame_size = sc->chain_frames_alloc_sz; if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 4, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg chain_frame_size, // maxsize 1, // msegments chain_frame_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->chain_frame_tag)) { device_printf(sc->mrsas_dev, "Cannot create chain frame tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_frame_tag, (void **)&sc->chain_frame_mem, BUS_DMA_NOWAIT, &sc->chain_frame_dmamap)) { device_printf(sc->mrsas_dev, "Cannot alloc chain frame memory\n"); return (ENOMEM); } bzero(sc->chain_frame_mem, chain_frame_size); if (bus_dmamap_load(sc->chain_frame_tag, sc->chain_frame_dmamap, sc->chain_frame_mem, chain_frame_size, mrsas_addr_cb, &sc->chain_frame_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load chain frame memory\n"); return (ENOMEM); } /* * Allocate Reply Descriptor Array */ reply_desc_size = sc->reply_alloc_sz; if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 16, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg reply_desc_size, // maxsize 1, // msegments reply_desc_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->reply_desc_tag)) { device_printf(sc->mrsas_dev, "Cannot create reply descriptor tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_desc_tag, (void **)&sc->reply_desc_mem, BUS_DMA_NOWAIT, &sc->reply_desc_dmamap)) { device_printf(sc->mrsas_dev, "Cannot alloc reply descriptor memory\n"); return (ENOMEM); } if (bus_dmamap_load(sc->reply_desc_tag, sc->reply_desc_dmamap, sc->reply_desc_mem, reply_desc_size, mrsas_addr_cb, &sc->reply_desc_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load reply descriptor memory\n"); return (ENOMEM); } /* * Allocate Sense Buffer Array. Keep in lower 4GB */ sense_size = sc->max_fw_cmds * MRSAS_SENSE_LEN; if (bus_dma_tag_create(sc->mrsas_parent_tag, // parent 64, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg sense_size, // maxsize 1, // nsegments sense_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->sense_tag)) { device_printf(sc->mrsas_dev, "Cannot allocate sense buf tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_tag, (void **)&sc->sense_mem, BUS_DMA_NOWAIT, &sc->sense_dmamap)) { device_printf(sc->mrsas_dev, "Cannot allocate sense buf memory\n"); return (ENOMEM); } if (bus_dmamap_load(sc->sense_tag, sc->sense_dmamap, sc->sense_mem, sense_size, mrsas_addr_cb, &sc->sense_phys_addr, BUS_DMA_NOWAIT)){ device_printf(sc->mrsas_dev, "Cannot load sense buf memory\n"); return (ENOMEM); } /* * Allocate for Event detail structure */ evt_detail_size = sizeof(struct mrsas_evt_detail); if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 1, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg evt_detail_size, // maxsize 1, // msegments evt_detail_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->evt_detail_tag)) { device_printf(sc->mrsas_dev, "Cannot create Event detail tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->evt_detail_tag, (void **)&sc->evt_detail_mem, BUS_DMA_NOWAIT, &sc->evt_detail_dmamap)) { device_printf(sc->mrsas_dev, "Cannot alloc Event detail buffer memory\n"); return (ENOMEM); } bzero(sc->evt_detail_mem, evt_detail_size); if (bus_dmamap_load(sc->evt_detail_tag, sc->evt_detail_dmamap, sc->evt_detail_mem, evt_detail_size, mrsas_addr_cb, &sc->evt_detail_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load Event detail buffer memory\n"); return (ENOMEM); } /* * Create a dma tag for data buffers; size will be the maximum * possible I/O size (280kB). */ if (bus_dma_tag_create(sc->mrsas_parent_tag, // parent 1, // alignment 0, // boundary BUS_SPACE_MAXADDR, // lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg MRSAS_MAX_IO_SIZE, // maxsize MRSAS_MAX_SGL, // nsegments MRSAS_MAX_IO_SIZE, // maxsegsize BUS_DMA_ALLOCNOW, // flags busdma_lock_mutex, // lockfunc &sc->io_lock, // lockfuncarg &sc->data_tag)) { device_printf(sc->mrsas_dev, "Cannot create data dma tag\n"); return(ENOMEM); } return(0); } /* * mrsas_addr_cb: Callback function of bus_dmamap_load() * input: callback argument, * machine dependent type that describes DMA segments, * number of segments, * error code. * * This function is for the driver to receive mapping information resultant * of the bus_dmamap_load(). The information is actually not being used, * but the address is saved anyway. */ void mrsas_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } /* * mrsas_setup_raidmap: Set up RAID map. * input: Adapter instance soft state * * Allocate DMA memory for the RAID maps and perform setup. */ static int mrsas_setup_raidmap(struct mrsas_softc *sc) { sc->map_sz = sizeof(MR_FW_RAID_MAP) + (sizeof(MR_LD_SPAN_MAP) * (MAX_LOGICAL_DRIVES - 1)); for (int i=0; i < 2; i++) { if (bus_dma_tag_create(sc->mrsas_parent_tag, // parent 4, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg sc->map_sz, // maxsize 1, // nsegments sc->map_sz, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->raidmap_tag[i])) { device_printf(sc->mrsas_dev, "Cannot allocate raid map tag.\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->raidmap_tag[i], (void **)&sc->raidmap_mem[i], BUS_DMA_NOWAIT, &sc->raidmap_dmamap[i])) { device_printf(sc->mrsas_dev, "Cannot allocate raidmap memory.\n"); return (ENOMEM); } if (bus_dmamap_load(sc->raidmap_tag[i], sc->raidmap_dmamap[i], sc->raidmap_mem[i], sc->map_sz, mrsas_addr_cb, &sc->raidmap_phys_addr[i], BUS_DMA_NOWAIT)){ device_printf(sc->mrsas_dev, "Cannot load raidmap memory.\n"); return (ENOMEM); } if (!sc->raidmap_mem[i]) { device_printf(sc->mrsas_dev, "Cannot allocate memory for raid map.\n"); return (ENOMEM); } } if (!mrsas_get_map_info(sc)) mrsas_sync_map_info(sc); return (0); } /** * mrsas_init_fw: Initialize Firmware * input: Adapter soft state * * Calls transition_to_ready() to make sure Firmware is in operational * state and calls mrsas_init_adapter() to send IOC_INIT command to * Firmware. It issues internal commands to get the controller info * after the IOC_INIT command response is received by Firmware. * Note: code relating to get_pdlist, get_ld_list and max_sectors * are currently not being used, it is left here as placeholder. */ static int mrsas_init_fw(struct mrsas_softc *sc) { u_int32_t max_sectors_1; u_int32_t max_sectors_2; u_int32_t tmp_sectors; struct mrsas_ctrl_info *ctrl_info; int ret, ocr = 0; /* Make sure Firmware is ready */ ret = mrsas_transition_to_ready(sc, ocr); if (ret != SUCCESS) { return(ret); } /* Get operational params, sge flags, send init cmd to ctlr */ if (mrsas_init_adapter(sc) != SUCCESS){ device_printf(sc->mrsas_dev, "Adapter initialize Fail.\n"); return(1); } /* Allocate internal commands for pass-thru */ if (mrsas_alloc_mfi_cmds(sc) != SUCCESS){ device_printf(sc->mrsas_dev, "Allocate MFI cmd failed.\n"); return(1); } if (mrsas_setup_raidmap(sc) != SUCCESS) { device_printf(sc->mrsas_dev, "Set up RAID map failed.\n"); return(1); } /* For pass-thru, get PD/LD list and controller info */ memset(sc->pd_list, 0, MRSAS_MAX_PD * sizeof(struct mrsas_pd_list)); mrsas_get_pd_list(sc); memset(sc->ld_ids, 0xff, MRSAS_MAX_LD); mrsas_get_ld_list(sc); //memset(sc->log_to_span, 0, MRSAS_MAX_LD * sizeof(LD_SPAN_INFO)); ctrl_info = malloc(sizeof(struct mrsas_ctrl_info), M_MRSAS, M_NOWAIT); /* * Compute the max allowed sectors per IO: The controller info has two * limits on max sectors. Driver should use the minimum of these two. * * 1 << stripe_sz_ops.min = max sectors per strip * * Note that older firmwares ( < FW ver 30) didn't report information * to calculate max_sectors_1. So the number ended up as zero always. */ tmp_sectors = 0; if (ctrl_info && !mrsas_get_ctrl_info(sc, ctrl_info)) { max_sectors_1 = (1 << ctrl_info->stripe_sz_ops.min) * ctrl_info->max_strips_per_io; max_sectors_2 = ctrl_info->max_request_size; tmp_sectors = min(max_sectors_1 , max_sectors_2); sc->disableOnlineCtrlReset = ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset; sc->UnevenSpanSupport = ctrl_info->adapterOperations2.supportUnevenSpans; if(sc->UnevenSpanSupport) { device_printf(sc->mrsas_dev, "FW supports: UnevenSpanSupport=%x\n", sc->UnevenSpanSupport); if (MR_ValidateMapInfo(sc)) sc->fast_path_io = 1; else sc->fast_path_io = 0; } } sc->max_sectors_per_req = sc->max_num_sge * MRSAS_PAGE_SIZE / 512; if (tmp_sectors && (sc->max_sectors_per_req > tmp_sectors)) sc->max_sectors_per_req = tmp_sectors; if (ctrl_info) free(ctrl_info, M_MRSAS); return(0); } /** * mrsas_init_adapter: Initializes the adapter/controller * input: Adapter soft state * * Prepares for the issuing of the IOC Init cmd to FW for initializing the * ROC/controller. The FW register is read to determined the number of * commands that is supported. All memory allocations for IO is based on * max_cmd. Appropriate calculations are performed in this function. */ int mrsas_init_adapter(struct mrsas_softc *sc) { uint32_t status; u_int32_t max_cmd; int ret; /* Read FW status register */ status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); /* Get operational params from status register */ sc->max_fw_cmds = status & MRSAS_FWSTATE_MAXCMD_MASK; /* Decrement the max supported by 1, to correlate with FW */ sc->max_fw_cmds = sc->max_fw_cmds-1; max_cmd = sc->max_fw_cmds; /* Determine allocation size of command frames */ sc->reply_q_depth = ((max_cmd *2 +1 +15)/16*16); sc->request_alloc_sz = sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION) * max_cmd; sc->reply_alloc_sz = sizeof(MPI2_REPLY_DESCRIPTORS_UNION) * (sc->reply_q_depth); sc->io_frames_alloc_sz = MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE + (MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * (max_cmd + 1)); sc->chain_frames_alloc_sz = 1024 * max_cmd; sc->max_sge_in_main_msg = (MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE - offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL))/16; sc->max_sge_in_chain = MRSAS_MAX_SZ_CHAIN_FRAME / sizeof(MPI2_SGE_IO_UNION); sc->max_num_sge = sc->max_sge_in_main_msg + sc->max_sge_in_chain - 2; /* Used for pass thru MFI frame (DCMD) */ sc->chain_offset_mfi_pthru = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL)/16; sc->chain_offset_io_request = (MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE - sizeof(MPI2_SGE_IO_UNION))/16; sc->last_reply_idx = 0; ret = mrsas_alloc_mem(sc); if (ret != SUCCESS) return(ret); ret = mrsas_alloc_mpt_cmds(sc); if (ret != SUCCESS) return(ret); ret = mrsas_ioc_init(sc); if (ret != SUCCESS) return(ret); return(0); } /** * mrsas_alloc_ioc_cmd: Allocates memory for IOC Init command * input: Adapter soft state * * Allocates for the IOC Init cmd to FW to initialize the ROC/controller. */ int mrsas_alloc_ioc_cmd(struct mrsas_softc *sc) { int ioc_init_size; /* Allocate IOC INIT command */ ioc_init_size = 1024 + sizeof(MPI2_IOC_INIT_REQUEST); if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 1, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg ioc_init_size, // maxsize 1, // msegments ioc_init_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->ioc_init_tag)) { device_printf(sc->mrsas_dev, "Cannot allocate ioc init tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->ioc_init_tag, (void **)&sc->ioc_init_mem, BUS_DMA_NOWAIT, &sc->ioc_init_dmamap)) { device_printf(sc->mrsas_dev, "Cannot allocate ioc init cmd mem\n"); return (ENOMEM); } bzero(sc->ioc_init_mem, ioc_init_size); if (bus_dmamap_load(sc->ioc_init_tag, sc->ioc_init_dmamap, sc->ioc_init_mem, ioc_init_size, mrsas_addr_cb, &sc->ioc_init_phys_mem, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load ioc init cmd mem\n"); return (ENOMEM); } return (0); } /** * mrsas_free_ioc_cmd: Allocates memory for IOC Init command * input: Adapter soft state * * Deallocates memory of the IOC Init cmd. */ void mrsas_free_ioc_cmd(struct mrsas_softc *sc) { if (sc->ioc_init_phys_mem) bus_dmamap_unload(sc->ioc_init_tag, sc->ioc_init_dmamap); if (sc->ioc_init_mem != NULL) bus_dmamem_free(sc->ioc_init_tag, sc->ioc_init_mem, sc->ioc_init_dmamap); if (sc->ioc_init_tag != NULL) bus_dma_tag_destroy(sc->ioc_init_tag); } /** * mrsas_ioc_init: Sends IOC Init command to FW * input: Adapter soft state * * Issues the IOC Init cmd to FW to initialize the ROC/controller. */ int mrsas_ioc_init(struct mrsas_softc *sc) { struct mrsas_init_frame *init_frame; pMpi2IOCInitRequest_t IOCInitMsg; MRSAS_REQUEST_DESCRIPTOR_UNION req_desc; u_int8_t max_wait = MRSAS_IOC_INIT_WAIT_TIME; bus_addr_t phys_addr; int i, retcode = 0; /* Allocate memory for the IOC INIT command */ if (mrsas_alloc_ioc_cmd(sc)) { device_printf(sc->mrsas_dev, "Cannot allocate IOC command.\n"); return(1); } IOCInitMsg = (pMpi2IOCInitRequest_t)(((char *)sc->ioc_init_mem) +1024); IOCInitMsg->Function = MPI2_FUNCTION_IOC_INIT; IOCInitMsg->WhoInit = MPI2_WHOINIT_HOST_DRIVER; IOCInitMsg->MsgVersion = MPI2_VERSION; IOCInitMsg->HeaderVersion = MPI2_HEADER_VERSION; IOCInitMsg->SystemRequestFrameSize = MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4; IOCInitMsg->ReplyDescriptorPostQueueDepth = sc->reply_q_depth; IOCInitMsg->ReplyDescriptorPostQueueAddress = sc->reply_desc_phys_addr; IOCInitMsg->SystemRequestFrameBaseAddress = sc->io_request_phys_addr; init_frame = (struct mrsas_init_frame *)sc->ioc_init_mem; init_frame->cmd = MFI_CMD_INIT; init_frame->cmd_status = 0xFF; init_frame->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; if (sc->verbuf_mem) { snprintf((char *)sc->verbuf_mem, strlen(MRSAS_VERSION)+2,"%s\n", MRSAS_VERSION); init_frame->driver_ver_lo = (bus_addr_t)sc->verbuf_phys_addr; init_frame->driver_ver_hi = 0; } phys_addr = (bus_addr_t)sc->ioc_init_phys_mem + 1024; init_frame->queue_info_new_phys_addr_lo = phys_addr; init_frame->data_xfer_len = sizeof(Mpi2IOCInitRequest_t); req_desc.addr.Words = (bus_addr_t)sc->ioc_init_phys_mem; req_desc.MFAIo.RequestFlags = (MRSAS_REQ_DESCRIPT_FLAGS_MFA << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); mrsas_disable_intr(sc); mrsas_dprint(sc, MRSAS_OCR, "Issuing IOC INIT command to FW.\n"); //device_printf(sc->mrsas_dev, "Issuing IOC INIT command to FW.\n");del? mrsas_fire_cmd(sc, req_desc.addr.u.low, req_desc.addr.u.high); /* * Poll response timer to wait for Firmware response. While this * timer with the DELAY call could block CPU, the time interval for * this is only 1 millisecond. */ if (init_frame->cmd_status == 0xFF) { for (i=0; i < (max_wait * 1000); i++){ if (init_frame->cmd_status == 0xFF) DELAY(1000); else break; } } if (init_frame->cmd_status == 0) mrsas_dprint(sc, MRSAS_OCR, "IOC INIT response received from FW.\n"); //device_printf(sc->mrsas_dev, "IOC INIT response received from FW.\n");del? else { if (init_frame->cmd_status == 0xFF) device_printf(sc->mrsas_dev, "IOC Init timed out after %d seconds.\n", max_wait); else device_printf(sc->mrsas_dev, "IOC Init failed, status = 0x%x\n", init_frame->cmd_status); retcode = 1; } mrsas_free_ioc_cmd(sc); return (retcode); } /** * mrsas_alloc_mpt_cmds: Allocates the command packets * input: Adapter instance soft state * * This function allocates the internal commands for IOs. Each command that is * issued to FW is wrapped in a local data structure called mrsas_mpt_cmd. * An array is allocated with mrsas_mpt_cmd context. The free commands are * maintained in a linked list (cmd pool). SMID value range is from 1 to * max_fw_cmds. */ int mrsas_alloc_mpt_cmds(struct mrsas_softc *sc) { int i, j; u_int32_t max_cmd; struct mrsas_mpt_cmd *cmd; pMpi2ReplyDescriptorsUnion_t reply_desc; u_int32_t offset, chain_offset, sense_offset; bus_addr_t io_req_base_phys, chain_frame_base_phys, sense_base_phys; u_int8_t *io_req_base, *chain_frame_base, *sense_base; max_cmd = sc->max_fw_cmds; sc->req_desc = malloc(sc->request_alloc_sz, M_MRSAS, M_NOWAIT); if (!sc->req_desc) { device_printf(sc->mrsas_dev, "Out of memory, cannot alloc req desc\n"); return(ENOMEM); } memset(sc->req_desc, 0, sc->request_alloc_sz); /* * sc->mpt_cmd_list is an array of struct mrsas_mpt_cmd pointers. Allocate the * dynamic array first and then allocate individual commands. */ sc->mpt_cmd_list = malloc(sizeof(struct mrsas_mpt_cmd*)*max_cmd, M_MRSAS, M_NOWAIT); if (!sc->mpt_cmd_list) { device_printf(sc->mrsas_dev, "Cannot alloc memory for mpt_cmd_list.\n"); return(ENOMEM); } memset(sc->mpt_cmd_list, 0, sizeof(struct mrsas_mpt_cmd *)*max_cmd); for (i = 0; i < max_cmd; i++) { sc->mpt_cmd_list[i] = malloc(sizeof(struct mrsas_mpt_cmd), M_MRSAS, M_NOWAIT); if (!sc->mpt_cmd_list[i]) { for (j = 0; j < i; j++) free(sc->mpt_cmd_list[j],M_MRSAS); free(sc->mpt_cmd_list, M_MRSAS); sc->mpt_cmd_list = NULL; return(ENOMEM); } } io_req_base = (u_int8_t*)sc->io_request_mem + MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = (bus_addr_t)sc->io_request_phys_addr + MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; chain_frame_base = (u_int8_t*)sc->chain_frame_mem; chain_frame_base_phys = (bus_addr_t)sc->chain_frame_phys_addr; sense_base = (u_int8_t*)sc->sense_mem; sense_base_phys = (bus_addr_t)sc->sense_phys_addr; for (i = 0; i < max_cmd; i++) { cmd = sc->mpt_cmd_list[i]; offset = MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * i; chain_offset = 1024 * i; sense_offset = MRSAS_SENSE_LEN * i; memset(cmd, 0, sizeof(struct mrsas_mpt_cmd)); cmd->index = i + 1; cmd->ccb_ptr = NULL; callout_init(&cmd->cm_callout, 0); cmd->sync_cmd_idx = (u_int32_t)MRSAS_ULONG_MAX; cmd->sc = sc; cmd->io_request = (MRSAS_RAID_SCSI_IO_REQUEST *) (io_req_base + offset); memset(cmd->io_request, 0, sizeof(MRSAS_RAID_SCSI_IO_REQUEST)); cmd->io_request_phys_addr = io_req_base_phys + offset; cmd->chain_frame = (MPI2_SGE_IO_UNION *) (chain_frame_base + chain_offset); cmd->chain_frame_phys_addr = chain_frame_base_phys + chain_offset; cmd->sense = sense_base + sense_offset; cmd->sense_phys_addr = sense_base_phys + sense_offset; if (bus_dmamap_create(sc->data_tag, 0, &cmd->data_dmamap)) { return(FAIL); } TAILQ_INSERT_TAIL(&(sc->mrsas_mpt_cmd_list_head), cmd, next); } /* Initialize reply descriptor array to 0xFFFFFFFF */ reply_desc = sc->reply_desc_mem; for (i = 0; i < sc->reply_q_depth; i++, reply_desc++) { reply_desc->Words = MRSAS_ULONG_MAX; } return(0); } /** * mrsas_fire_cmd: Sends command to FW * input: Adapter soft state * request descriptor address low * request descriptor address high * * This functions fires the command to Firmware by writing to the * inbound_low_queue_port and inbound_high_queue_port. */ void mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo, u_int32_t req_desc_hi) { mtx_lock(&sc->pci_lock); mrsas_write_reg(sc, offsetof(mrsas_reg_set, inbound_low_queue_port), req_desc_lo); mrsas_write_reg(sc, offsetof(mrsas_reg_set, inbound_high_queue_port), req_desc_hi); mtx_unlock(&sc->pci_lock); } /** * mrsas_transition_to_ready: Move FW to Ready state * input: Adapter instance soft state * * During the initialization, FW passes can potentially be in any one of * several possible states. If the FW in operational, waiting-for-handshake * states, driver must take steps to bring it to ready state. Otherwise, it * has to wait for the ready state. */ int mrsas_transition_to_ready(struct mrsas_softc *sc, int ocr) { int i; u_int8_t max_wait; u_int32_t val, fw_state; u_int32_t cur_state; u_int32_t abs_state, curr_abs_state; val = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); fw_state = val & MFI_STATE_MASK; max_wait = MRSAS_RESET_WAIT_TIME; if (fw_state != MFI_STATE_READY) device_printf(sc->mrsas_dev, "Waiting for FW to come to ready state\n"); while (fw_state != MFI_STATE_READY) { abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); switch (fw_state) { case MFI_STATE_FAULT: device_printf(sc->mrsas_dev, "FW is in FAULT state!!\n"); if (ocr) { cur_state = MFI_STATE_FAULT; break; } else return -ENODEV; case MFI_STATE_WAIT_HANDSHAKE: /* Set the CLR bit in inbound doorbell */ mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell), MFI_INIT_CLEAR_HANDSHAKE|MFI_INIT_HOTPLUG); cur_state = MFI_STATE_WAIT_HANDSHAKE; break; case MFI_STATE_BOOT_MESSAGE_PENDING: mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell), MFI_INIT_HOTPLUG); cur_state = MFI_STATE_BOOT_MESSAGE_PENDING; break; case MFI_STATE_OPERATIONAL: /* Bring it to READY state; assuming max wait 10 secs */ mrsas_disable_intr(sc); mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell), MFI_RESET_FLAGS); for (i=0; i < max_wait * 1000; i++) { if (mrsas_read_reg(sc, offsetof(mrsas_reg_set, doorbell)) & 1) DELAY(1000); else break; } cur_state = MFI_STATE_OPERATIONAL; break; case MFI_STATE_UNDEFINED: /* This state should not last for more than 2 seconds */ cur_state = MFI_STATE_UNDEFINED; break; case MFI_STATE_BB_INIT: cur_state = MFI_STATE_BB_INIT; break; case MFI_STATE_FW_INIT: cur_state = MFI_STATE_FW_INIT; break; case MFI_STATE_FW_INIT_2: cur_state = MFI_STATE_FW_INIT_2; break; case MFI_STATE_DEVICE_SCAN: cur_state = MFI_STATE_DEVICE_SCAN; break; case MFI_STATE_FLUSH_CACHE: cur_state = MFI_STATE_FLUSH_CACHE; break; default: device_printf(sc->mrsas_dev, "Unknown state 0x%x\n", fw_state); return -ENODEV; } /* * The cur_state should not last for more than max_wait secs */ for (i = 0; i < (max_wait * 1000); i++) { fw_state = (mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad))& MFI_STATE_MASK); curr_abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); if (abs_state == curr_abs_state) DELAY(1000); else break; } /* * Return error if fw_state hasn't changed after max_wait */ if (curr_abs_state == abs_state) { device_printf(sc->mrsas_dev, "FW state [%d] hasn't changed " "in %d secs\n", fw_state, max_wait); return -ENODEV; } } mrsas_dprint(sc, MRSAS_OCR, "FW now in Ready state\n"); //device_printf(sc->mrsas_dev, "FW now in Ready state\n");del? return 0; } /** * mrsas_get_mfi_cmd: Get a cmd from free command pool * input: Adapter soft state * * This function removes an MFI command from the command list. */ struct mrsas_mfi_cmd* mrsas_get_mfi_cmd(struct mrsas_softc *sc) { struct mrsas_mfi_cmd *cmd = NULL; mtx_lock(&sc->mfi_cmd_pool_lock); if (!TAILQ_EMPTY(&sc->mrsas_mfi_cmd_list_head)){ cmd = TAILQ_FIRST(&sc->mrsas_mfi_cmd_list_head); TAILQ_REMOVE(&sc->mrsas_mfi_cmd_list_head, cmd, next); } mtx_unlock(&sc->mfi_cmd_pool_lock); return cmd; } /** * mrsas_ocr_thread Thread to handle OCR/Kill Adapter. * input: Adapter Context. * * This function will check FW status register and flag * do_timeout_reset flag. It will do OCR/Kill adapter if * FW is in fault state or IO timed out has trigger reset. */ static void mrsas_ocr_thread(void *arg) { struct mrsas_softc *sc; u_int32_t fw_status, fw_state; sc = (struct mrsas_softc *)arg; mrsas_dprint(sc, MRSAS_TRACE, "%s\n", __func__); sc->ocr_thread_active = 1; mtx_lock(&sc->sim_lock); for (;;) { /* Sleep for 1 second and check the queue status*/ msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "mrsas_ocr", sc->mrsas_fw_fault_check_delay * hz); if (sc->remove_in_progress) { mrsas_dprint(sc, MRSAS_OCR, "Exit due to shutdown from %s\n", __func__); break; } fw_status = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); fw_state = fw_status & MFI_STATE_MASK; if (fw_state == MFI_STATE_FAULT || sc->do_timedout_reset) { device_printf(sc->mrsas_dev, "OCR started due to %s!\n", sc->do_timedout_reset?"IO Timeout": "FW fault detected"); mtx_lock_spin(&sc->ioctl_lock); sc->reset_in_progress = 1; sc->reset_count++; mtx_unlock_spin(&sc->ioctl_lock); mrsas_xpt_freeze(sc); mrsas_reset_ctrl(sc); mrsas_xpt_release(sc); sc->reset_in_progress = 0; sc->do_timedout_reset = 0; } } mtx_unlock(&sc->sim_lock); sc->ocr_thread_active = 0; mrsas_kproc_exit(0); } /** * mrsas_reset_reply_desc Reset Reply descriptor as part of OCR. * input: Adapter Context. * * This function will clear reply descriptor so that post OCR * driver and FW will lost old history. */ void mrsas_reset_reply_desc(struct mrsas_softc *sc) { int i; pMpi2ReplyDescriptorsUnion_t reply_desc; sc->last_reply_idx = 0; reply_desc = sc->reply_desc_mem; for (i = 0; i < sc->reply_q_depth; i++, reply_desc++) { reply_desc->Words = MRSAS_ULONG_MAX; } } /** * mrsas_reset_ctrl Core function to OCR/Kill adapter. * input: Adapter Context. * * This function will run from thread context so that it can sleep. * 1. Do not handle OCR if FW is in HW critical error. * 2. Wait for outstanding command to complete for 180 seconds. * 3. If #2 does not find any outstanding command Controller is in working * state, so skip OCR. * Otherwise, do OCR/kill Adapter based on flag disableOnlineCtrlReset. * 4. Start of the OCR, return all SCSI command back to CAM layer which has * ccb_ptr. * 5. Post OCR, Re-fire Managment command and move Controller to Operation * state. */ int mrsas_reset_ctrl(struct mrsas_softc *sc) { int retval = SUCCESS, i, j, retry = 0; u_int32_t host_diag, abs_state, status_reg, reset_adapter; union ccb *ccb; struct mrsas_mfi_cmd *mfi_cmd; struct mrsas_mpt_cmd *mpt_cmd; MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc; if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR) { device_printf(sc->mrsas_dev, "mrsas: Hardware critical error, returning FAIL.\n"); return FAIL; } set_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags); sc->adprecovery = MRSAS_ADPRESET_SM_INFAULT; mrsas_disable_intr(sc); DELAY(1000 * 1000); /* First try waiting for commands to complete */ if (mrsas_wait_for_outstanding(sc)) { mrsas_dprint(sc, MRSAS_OCR, "resetting adapter from %s.\n", __func__); /* Now return commands back to the CAM layer */ for (i = 0 ; i < sc->max_fw_cmds; i++) { mpt_cmd = sc->mpt_cmd_list[i]; if (mpt_cmd->ccb_ptr) { ccb = (union ccb *)(mpt_cmd->ccb_ptr); ccb->ccb_h.status = CAM_SCSI_BUS_RESET; mrsas_cmd_done(sc, mpt_cmd); atomic_dec(&sc->fw_outstanding); } } status_reg = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)); abs_state = status_reg & MFI_STATE_MASK; reset_adapter = status_reg & MFI_RESET_ADAPTER; if (sc->disableOnlineCtrlReset || (abs_state == MFI_STATE_FAULT && !reset_adapter)) { /* Reset not supported, kill adapter */ mrsas_dprint(sc, MRSAS_OCR,"Reset not supported, killing adapter.\n"); mrsas_kill_hba(sc); sc->adprecovery = MRSAS_HW_CRITICAL_ERROR; retval = FAIL; goto out; } /* Now try to reset the chip */ for (i = 0; i < MRSAS_FUSION_MAX_RESET_TRIES; i++) { mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_FLUSH_KEY_VALUE); mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_1ST_KEY_VALUE); mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_2ND_KEY_VALUE); mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_3RD_KEY_VALUE); mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_4TH_KEY_VALUE); mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_5TH_KEY_VALUE); mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_seq_offset), MPI2_WRSEQ_6TH_KEY_VALUE); /* Check that the diag write enable (DRWE) bit is on */ host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set, fusion_host_diag)); retry = 0; while (!(host_diag & HOST_DIAG_WRITE_ENABLE)) { DELAY(100 * 1000); host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set, fusion_host_diag)); if (retry++ == 100) { mrsas_dprint(sc, MRSAS_OCR, "Host diag unlock failed!\n"); break; } } if (!(host_diag & HOST_DIAG_WRITE_ENABLE)) continue; /* Send chip reset command */ mrsas_write_reg(sc, offsetof(mrsas_reg_set, fusion_host_diag), host_diag | HOST_DIAG_RESET_ADAPTER); DELAY(3000 * 1000); /* Make sure reset adapter bit is cleared */ host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set, fusion_host_diag)); retry = 0; while (host_diag & HOST_DIAG_RESET_ADAPTER) { DELAY(100 * 1000); host_diag = mrsas_read_reg(sc, offsetof(mrsas_reg_set, fusion_host_diag)); if (retry++ == 1000) { mrsas_dprint(sc, MRSAS_OCR, "Diag reset adapter never cleared!\n"); break; } } if (host_diag & HOST_DIAG_RESET_ADAPTER) continue; abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)) & MFI_STATE_MASK; retry = 0; while ((abs_state <= MFI_STATE_FW_INIT) && (retry++ < 1000)) { DELAY(100 * 1000); abs_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)) & MFI_STATE_MASK; } if (abs_state <= MFI_STATE_FW_INIT) { mrsas_dprint(sc, MRSAS_OCR, "firmware state < MFI_STATE_FW_INIT," " state = 0x%x\n", abs_state); continue; } /* Wait for FW to become ready */ if (mrsas_transition_to_ready(sc, 1)) { mrsas_dprint(sc, MRSAS_OCR, "mrsas: Failed to transition controller to ready.\n"); continue; } mrsas_reset_reply_desc(sc); if (mrsas_ioc_init(sc)) { mrsas_dprint(sc, MRSAS_OCR, "mrsas_ioc_init() failed!\n"); continue; } clear_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags); mrsas_enable_intr(sc); sc->adprecovery = MRSAS_HBA_OPERATIONAL; /* Re-fire management commands */ for (j = 0 ; j < sc->max_fw_cmds; j++) { mpt_cmd = sc->mpt_cmd_list[j]; if (mpt_cmd->sync_cmd_idx != (u_int32_t)MRSAS_ULONG_MAX) { mfi_cmd = sc->mfi_cmd_list[mpt_cmd->sync_cmd_idx]; if (mfi_cmd->frame->dcmd.opcode == MR_DCMD_LD_MAP_GET_INFO) { mrsas_release_mfi_cmd(mfi_cmd); mrsas_release_mpt_cmd(mpt_cmd); } else { req_desc = mrsas_get_request_desc(sc, mfi_cmd->cmd_id.context.smid - 1); mrsas_dprint(sc, MRSAS_OCR, "Re-fire command DCMD opcode 0x%x index %d\n ", mfi_cmd->frame->dcmd.opcode, j); if (!req_desc) device_printf(sc->mrsas_dev, "Cannot build MPT cmd.\n"); else mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high); } } } /* Reset load balance info */ memset(sc->load_balance_info, 0, sizeof(LD_LOAD_BALANCE_INFO) * MAX_LOGICAL_DRIVES); if (!mrsas_get_map_info(sc)) mrsas_sync_map_info(sc); /* Adapter reset completed successfully */ device_printf(sc->mrsas_dev, "Reset successful\n"); retval = SUCCESS; goto out; } /* Reset failed, kill the adapter */ device_printf(sc->mrsas_dev, "Reset failed, killing adapter.\n"); mrsas_kill_hba(sc); retval = FAIL; } else { clear_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags); mrsas_enable_intr(sc); sc->adprecovery = MRSAS_HBA_OPERATIONAL; } out: clear_bit(MRSAS_FUSION_IN_RESET, &sc->reset_flags); mrsas_dprint(sc, MRSAS_OCR, "Reset Exit with %d.\n", retval); return retval; } /** * mrsas_kill_hba Kill HBA when OCR is not supported. * input: Adapter Context. * * This function will kill HBA when OCR is not supported. */ void mrsas_kill_hba (struct mrsas_softc *sc) { mrsas_dprint(sc, MRSAS_OCR, "%s\n", __func__); mrsas_write_reg(sc, offsetof(mrsas_reg_set, doorbell), MFI_STOP_ADP); /* Flush */ mrsas_read_reg(sc, offsetof(mrsas_reg_set, doorbell)); } /** * mrsas_wait_for_outstanding Wait for outstanding commands * input: Adapter Context. * * This function will wait for 180 seconds for outstanding * commands to be completed. */ int mrsas_wait_for_outstanding(struct mrsas_softc *sc) { int i, outstanding, retval = 0; u_int32_t fw_state; for (i = 0; i < MRSAS_RESET_WAIT_TIME; i++) { if (sc->remove_in_progress) { mrsas_dprint(sc, MRSAS_OCR, "Driver remove or shutdown called.\n"); retval = 1; goto out; } /* Check if firmware is in fault state */ fw_state = mrsas_read_reg(sc, offsetof(mrsas_reg_set, outbound_scratch_pad)) & MFI_STATE_MASK; if (fw_state == MFI_STATE_FAULT) { mrsas_dprint(sc, MRSAS_OCR, "Found FW in FAULT state, will reset adapter.\n"); retval = 1; goto out; } outstanding = atomic_read(&sc->fw_outstanding); if (!outstanding) goto out; if (!(i % MRSAS_RESET_NOTICE_INTERVAL)) { mrsas_dprint(sc, MRSAS_OCR, "[%2d]waiting for %d " "commands to complete\n",i,outstanding); mrsas_complete_cmd(sc); } DELAY(1000 * 1000); } if (atomic_read(&sc->fw_outstanding)) { mrsas_dprint(sc, MRSAS_OCR, " pending commands remain after waiting," " will reset adapter.\n"); retval = 1; } out: return retval; } /** * mrsas_release_mfi_cmd: Return a cmd to free command pool * input: Command packet for return to free cmd pool * * This function returns the MFI command to the command list. */ void mrsas_release_mfi_cmd(struct mrsas_mfi_cmd *cmd) { struct mrsas_softc *sc = cmd->sc; mtx_lock(&sc->mfi_cmd_pool_lock); cmd->ccb_ptr = NULL; cmd->cmd_id.frame_count = 0; TAILQ_INSERT_TAIL(&(sc->mrsas_mfi_cmd_list_head), cmd, next); mtx_unlock(&sc->mfi_cmd_pool_lock); return; } /** * mrsas_get_controller_info - Returns FW's controller structure * input: Adapter soft state * Controller information structure * * Issues an internal command (DCMD) to get the FW's controller structure. * This information is mainly used to find out the maximum IO transfer per * command supported by the FW. */ static int mrsas_get_ctrl_info(struct mrsas_softc *sc, struct mrsas_ctrl_info *ctrl_info) { int retcode = 0; struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Failed to get a free cmd\n"); return -ENOMEM; } dcmd = &cmd->frame->dcmd; if (mrsas_alloc_ctlr_info_cmd(sc) != SUCCESS) { device_printf(sc->mrsas_dev, "Cannot allocate get ctlr info cmd\n"); mrsas_release_mfi_cmd(cmd); return -ENOMEM; } memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = sizeof(struct mrsas_ctrl_info); dcmd->opcode = MR_DCMD_CTRL_GET_INFO; dcmd->sgl.sge32[0].phys_addr = sc->ctlr_info_phys_addr; dcmd->sgl.sge32[0].length = sizeof(struct mrsas_ctrl_info); if (!mrsas_issue_polled(sc, cmd)) memcpy(ctrl_info, sc->ctlr_info_mem, sizeof(struct mrsas_ctrl_info)); else retcode = 1; mrsas_free_ctlr_info_cmd(sc); mrsas_release_mfi_cmd(cmd); return(retcode); } /** * mrsas_alloc_ctlr_info_cmd: Allocates memory for controller info command * input: Adapter soft state * * Allocates DMAable memory for the controller info internal command. */ int mrsas_alloc_ctlr_info_cmd(struct mrsas_softc *sc) { int ctlr_info_size; /* Allocate get controller info command */ ctlr_info_size = sizeof(struct mrsas_ctrl_info); if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 1, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg ctlr_info_size, // maxsize 1, // msegments ctlr_info_size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &sc->ctlr_info_tag)) { device_printf(sc->mrsas_dev, "Cannot allocate ctlr info tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->ctlr_info_tag, (void **)&sc->ctlr_info_mem, BUS_DMA_NOWAIT, &sc->ctlr_info_dmamap)) { device_printf(sc->mrsas_dev, "Cannot allocate ctlr info cmd mem\n"); return (ENOMEM); } if (bus_dmamap_load(sc->ctlr_info_tag, sc->ctlr_info_dmamap, sc->ctlr_info_mem, ctlr_info_size, mrsas_addr_cb, &sc->ctlr_info_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load ctlr info cmd mem\n"); return (ENOMEM); } memset(sc->ctlr_info_mem, 0, ctlr_info_size); return (0); } /** * mrsas_free_ctlr_info_cmd: Free memory for controller info command * input: Adapter soft state * * Deallocates memory of the get controller info cmd. */ void mrsas_free_ctlr_info_cmd(struct mrsas_softc *sc) { if (sc->ctlr_info_phys_addr) bus_dmamap_unload(sc->ctlr_info_tag, sc->ctlr_info_dmamap); if (sc->ctlr_info_mem != NULL) bus_dmamem_free(sc->ctlr_info_tag, sc->ctlr_info_mem, sc->ctlr_info_dmamap); if (sc->ctlr_info_tag != NULL) bus_dma_tag_destroy(sc->ctlr_info_tag); } /** * mrsas_issue_polled: Issues a polling command * inputs: Adapter soft state * Command packet to be issued * * This function is for posting of internal commands to Firmware. MFI * requires the cmd_status to be set to 0xFF before posting. The maximun * wait time of the poll response timer is 180 seconds. */ int mrsas_issue_polled(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { struct mrsas_header *frame_hdr = &cmd->frame->hdr; u_int8_t max_wait = MRSAS_INTERNAL_CMD_WAIT_TIME; int i, retcode = 0; frame_hdr->cmd_status = 0xFF; frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; /* Issue the frame using inbound queue port */ if (mrsas_issue_dcmd(sc, cmd)) { device_printf(sc->mrsas_dev, "Cannot issue DCMD internal command.\n"); return(1); } /* * Poll response timer to wait for Firmware response. While this * timer with the DELAY call could block CPU, the time interval for * this is only 1 millisecond. */ if (frame_hdr->cmd_status == 0xFF) { for (i=0; i < (max_wait * 1000); i++){ if (frame_hdr->cmd_status == 0xFF) DELAY(1000); else break; } } if (frame_hdr->cmd_status != 0) { if (frame_hdr->cmd_status == 0xFF) device_printf(sc->mrsas_dev, "DCMD timed out after %d seconds.\n", max_wait); else device_printf(sc->mrsas_dev, "DCMD failed, status = 0x%x\n", frame_hdr->cmd_status); retcode = 1; } return(retcode); } /** * mrsas_issue_dcmd - Issues a MFI Pass thru cmd * input: Adapter soft state * mfi cmd pointer * * This function is called by mrsas_issued_blocked_cmd() and * mrsas_issued_polled(), to build the MPT command and then fire the * command to Firmware. */ int mrsas_issue_dcmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc; req_desc = mrsas_build_mpt_cmd(sc, cmd); if (!req_desc) { device_printf(sc->mrsas_dev, "Cannot build MPT cmd.\n"); return(1); } mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high); return(0); } /** * mrsas_build_mpt_cmd - Calls helper function to build Passthru cmd * input: Adapter soft state * mfi cmd to build * * This function is called by mrsas_issue_cmd() to build the MPT-MFI * passthru command and prepares the MPT command to send to Firmware. */ MRSAS_REQUEST_DESCRIPTOR_UNION * mrsas_build_mpt_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc; u_int16_t index; if (mrsas_build_mptmfi_passthru(sc, cmd)) { device_printf(sc->mrsas_dev, "Cannot build MPT-MFI passthru cmd.\n"); return NULL; } index = cmd->cmd_id.context.smid; req_desc = mrsas_get_request_desc(sc, index-1); if(!req_desc) return NULL; req_desc->addr.Words = 0; req_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); req_desc->SCSIIO.SMID = index; return(req_desc); } /** * mrsas_build_mptmfi_passthru - Builds a MPT MFI Passthru command * input: Adapter soft state * mfi cmd pointer * * The MPT command and the io_request are setup as a passthru command. * The SGE chain address is set to frame_phys_addr of the MFI command. */ u_int8_t mrsas_build_mptmfi_passthru(struct mrsas_softc *sc, struct mrsas_mfi_cmd *mfi_cmd) { MPI25_IEEE_SGE_CHAIN64 *mpi25_ieee_chain; PTR_MRSAS_RAID_SCSI_IO_REQUEST io_req; struct mrsas_mpt_cmd *mpt_cmd; struct mrsas_header *frame_hdr = &mfi_cmd->frame->hdr; mpt_cmd = mrsas_get_mpt_cmd(sc); if (!mpt_cmd) return(1); /* Save the smid. To be used for returning the cmd */ mfi_cmd->cmd_id.context.smid = mpt_cmd->index; mpt_cmd->sync_cmd_idx = mfi_cmd->index; /* * For cmds where the flag is set, store the flag and check * on completion. For cmds with this flag, don't call * mrsas_complete_cmd. */ if (frame_hdr->flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE) mpt_cmd->flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; io_req = mpt_cmd->io_request; if ((sc->device_id == MRSAS_INVADER) || (sc->device_id == MRSAS_FURY)) { pMpi25IeeeSgeChain64_t sgl_ptr_end = (pMpi25IeeeSgeChain64_t) &io_req->SGL; sgl_ptr_end += sc->max_sge_in_main_msg - 1; sgl_ptr_end->Flags = 0; } mpi25_ieee_chain = (MPI25_IEEE_SGE_CHAIN64 *)&io_req->SGL.IeeeChain; io_req->Function = MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST; io_req->SGLOffset0 = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL) / 4; io_req->ChainOffset = sc->chain_offset_mfi_pthru; mpi25_ieee_chain->Address = mfi_cmd->frame_phys_addr; mpi25_ieee_chain->Flags= IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR; mpi25_ieee_chain->Length = MRSAS_MAX_SZ_CHAIN_FRAME; return(0); } /** * mrsas_issue_blocked_cmd - Synchronous wrapper around regular FW cmds * input: Adapter soft state * Command to be issued * * This function waits on an event for the command to be returned * from the ISR. Max wait time is MRSAS_INTERNAL_CMD_WAIT_TIME secs. * Used for issuing internal and ioctl commands. */ int mrsas_issue_blocked_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { u_int8_t max_wait = MRSAS_INTERNAL_CMD_WAIT_TIME; unsigned long total_time = 0; int retcode = 0; /* Initialize cmd_status */ cmd->cmd_status = ECONNREFUSED; /* Build MPT-MFI command for issue to FW */ if (mrsas_issue_dcmd(sc, cmd)){ device_printf(sc->mrsas_dev, "Cannot issue DCMD internal command.\n"); return(1); } sc->chan = (void*)&cmd; /* The following is for debug only... */ //device_printf(sc->mrsas_dev,"DCMD issued to FW, about to sleep-wait...\n"); //device_printf(sc->mrsas_dev,"sc->chan = %p\n", sc->chan); while (1) { if (cmd->cmd_status == ECONNREFUSED){ tsleep((void *)&sc->chan, 0, "mrsas_sleep", hz); } else break; total_time++; if (total_time >= max_wait) { device_printf(sc->mrsas_dev, "Internal command timed out after %d seconds.\n", max_wait); retcode = 1; break; } } return(retcode); } /** * mrsas_complete_mptmfi_passthru - Completes a command * input: sc: Adapter soft state * cmd: Command to be completed * status: cmd completion status * * This function is called from mrsas_complete_cmd() after an interrupt * is received from Firmware, and io_request->Function is * MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST. */ void mrsas_complete_mptmfi_passthru(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd, u_int8_t status) { struct mrsas_header *hdr = &cmd->frame->hdr; u_int8_t cmd_status = cmd->frame->hdr.cmd_status; /* Reset the retry counter for future re-tries */ cmd->retry_for_fw_reset = 0; if (cmd->ccb_ptr) cmd->ccb_ptr = NULL; switch (hdr->cmd) { case MFI_CMD_INVALID: device_printf(sc->mrsas_dev, "MFI_CMD_INVALID command.\n"); break; case MFI_CMD_PD_SCSI_IO: case MFI_CMD_LD_SCSI_IO: /* * MFI_CMD_PD_SCSI_IO and MFI_CMD_LD_SCSI_IO could have been * issued either through an IO path or an IOCTL path. If it * was via IOCTL, we will send it to internal completion. */ if (cmd->sync_cmd) { cmd->sync_cmd = 0; mrsas_wakeup(sc, cmd); break; } case MFI_CMD_SMP: case MFI_CMD_STP: case MFI_CMD_DCMD: /* Check for LD map update */ if ((cmd->frame->dcmd.opcode == MR_DCMD_LD_MAP_GET_INFO) && (cmd->frame->dcmd.mbox.b[1] == 1)) { sc->fast_path_io = 0; mtx_lock(&sc->raidmap_lock); if (cmd_status != 0) { if (cmd_status != MFI_STAT_NOT_FOUND) device_printf(sc->mrsas_dev, "map sync failed, status=%x\n",cmd_status); else { mrsas_release_mfi_cmd(cmd); mtx_unlock(&sc->raidmap_lock); break; } } else sc->map_id++; mrsas_release_mfi_cmd(cmd); if (MR_ValidateMapInfo(sc)) sc->fast_path_io = 0; else sc->fast_path_io = 1; mrsas_sync_map_info(sc); mtx_unlock(&sc->raidmap_lock); break; } #if 0 //currently not supporting event handling, so commenting out if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET_INFO || cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET) { mrsas_poll_wait_aen = 0; } #endif /* See if got an event notification */ if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_WAIT) mrsas_complete_aen(sc, cmd); else mrsas_wakeup(sc, cmd); break; case MFI_CMD_ABORT: /* Command issued to abort another cmd return */ mrsas_complete_abort(sc, cmd); break; default: device_printf(sc->mrsas_dev,"Unknown command completed! [0x%X]\n", hdr->cmd); break; } } /** * mrsas_wakeup - Completes an internal command * input: Adapter soft state * Command to be completed * * In mrsas_issue_blocked_cmd(), after a command is issued to Firmware, * a wait timer is started. This function is called from * mrsas_complete_mptmfi_passthru() as it completes the command, * to wake up from the command wait. */ void mrsas_wakeup(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { cmd->cmd_status = cmd->frame->io.cmd_status; if (cmd->cmd_status == ECONNREFUSED) cmd->cmd_status = 0; /* For debug only ... */ //device_printf(sc->mrsas_dev,"DCMD rec'd for wakeup, sc->chan=%p\n", sc->chan); sc->chan = (void*)&cmd; wakeup_one((void *)&sc->chan); return; } /** * mrsas_shutdown_ctlr: Instructs FW to shutdown the controller * input: Adapter soft state * Shutdown/Hibernate * * This function issues a DCMD internal command to Firmware to initiate * shutdown of the controller. */ static void mrsas_shutdown_ctlr(struct mrsas_softc *sc, u_int32_t opcode) { struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR) return; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev,"Cannot allocate for shutdown cmd.\n"); return; } if (sc->aen_cmd) mrsas_issue_blocked_abort_cmd(sc, sc->aen_cmd); if (sc->map_update_cmd) mrsas_issue_blocked_abort_cmd(sc, sc->map_update_cmd); dcmd = &cmd->frame->dcmd; memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 0; dcmd->flags = MFI_FRAME_DIR_NONE; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = 0; dcmd->opcode = opcode; device_printf(sc->mrsas_dev,"Preparing to shut down controller.\n"); mrsas_issue_blocked_cmd(sc, cmd); mrsas_release_mfi_cmd(cmd); return; } /** * mrsas_flush_cache: Requests FW to flush all its caches * input: Adapter soft state * * This function is issues a DCMD internal command to Firmware to initiate * flushing of all caches. */ static void mrsas_flush_cache(struct mrsas_softc *sc) { struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; if (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR) return; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev,"Cannot allocate for flush cache cmd.\n"); return; } dcmd = &cmd->frame->dcmd; memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 0; dcmd->flags = MFI_FRAME_DIR_NONE; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = 0; dcmd->opcode = MR_DCMD_CTRL_CACHE_FLUSH; dcmd->mbox.b[0] = MR_FLUSH_CTRL_CACHE | MR_FLUSH_DISK_CACHE; mrsas_issue_blocked_cmd(sc, cmd); mrsas_release_mfi_cmd(cmd); return; } /** * mrsas_get_map_info: Load and validate RAID map * input: Adapter instance soft state * * This function calls mrsas_get_ld_map_info() and MR_ValidateMapInfo() * to load and validate RAID map. It returns 0 if successful, 1 other- * wise. */ static int mrsas_get_map_info(struct mrsas_softc *sc) { uint8_t retcode = 0; sc->fast_path_io = 0; if (!mrsas_get_ld_map_info(sc)) { retcode = MR_ValidateMapInfo(sc); if (retcode == 0) { sc->fast_path_io = 1; return 0; } } return 1; } /** * mrsas_get_ld_map_info: Get FW's ld_map structure * input: Adapter instance soft state * * Issues an internal command (DCMD) to get the FW's controller PD * list structure. */ static int mrsas_get_ld_map_info(struct mrsas_softc *sc) { int retcode = 0; struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; MR_FW_RAID_MAP_ALL *map; bus_addr_t map_phys_addr = 0; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Cannot alloc for ld map info cmd.\n"); return 1; } dcmd = &cmd->frame->dcmd; map = sc->raidmap_mem[(sc->map_id & 1)]; map_phys_addr = sc->raidmap_phys_addr[(sc->map_id & 1)]; if (!map) { device_printf(sc->mrsas_dev, "Failed to alloc mem for ld map info.\n"); mrsas_release_mfi_cmd(cmd); return (ENOMEM); } memset(map, 0, sizeof(*map)); memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = sc->map_sz; dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO; dcmd->sgl.sge32[0].phys_addr = map_phys_addr; dcmd->sgl.sge32[0].length = sc->map_sz; if (!mrsas_issue_polled(sc, cmd)) retcode = 0; else { device_printf(sc->mrsas_dev, "Fail to send get LD map info cmd.\n"); retcode = 1; } mrsas_release_mfi_cmd(cmd); return(retcode); } /** * mrsas_sync_map_info: Get FW's ld_map structure * input: Adapter instance soft state * * Issues an internal command (DCMD) to get the FW's controller PD * list structure. */ static int mrsas_sync_map_info(struct mrsas_softc *sc) { int retcode = 0, i; struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; uint32_t size_sync_info, num_lds; MR_LD_TARGET_SYNC *target_map = NULL; MR_FW_RAID_MAP_ALL *map; MR_LD_RAID *raid; MR_LD_TARGET_SYNC *ld_sync; bus_addr_t map_phys_addr = 0; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Cannot alloc for sync map info cmd\n"); return 1; } map = sc->raidmap_mem[sc->map_id & 1]; num_lds = map->raidMap.ldCount; dcmd = &cmd->frame->dcmd; size_sync_info = sizeof(MR_LD_TARGET_SYNC) * num_lds; memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); target_map = (MR_LD_TARGET_SYNC *)sc->raidmap_mem[(sc->map_id - 1) & 1]; memset(target_map, 0, sizeof(MR_FW_RAID_MAP_ALL)); map_phys_addr = sc->raidmap_phys_addr[(sc->map_id - 1) & 1]; ld_sync = (MR_LD_TARGET_SYNC *)target_map; for (i = 0; i < num_lds; i++, ld_sync++) { raid = MR_LdRaidGet(i, map); ld_sync->targetId = MR_GetLDTgtId(i, map); ld_sync->seqNum = raid->seqNum; } dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_WRITE; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = sc->map_sz; dcmd->mbox.b[0] = num_lds; dcmd->mbox.b[1] = MRSAS_DCMD_MBOX_PEND_FLAG; dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO; dcmd->sgl.sge32[0].phys_addr = map_phys_addr; dcmd->sgl.sge32[0].length = sc->map_sz; sc->map_update_cmd = cmd; if (mrsas_issue_dcmd(sc, cmd)) { device_printf(sc->mrsas_dev, "Fail to send sync map info command.\n"); return(1); } return(retcode); } /** * mrsas_get_pd_list: Returns FW's PD list structure * input: Adapter soft state * * Issues an internal command (DCMD) to get the FW's controller PD * list structure. This information is mainly used to find out about * system supported by Firmware. */ static int mrsas_get_pd_list(struct mrsas_softc *sc) { int retcode = 0, pd_index = 0, pd_count=0, pd_list_size; struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; struct MR_PD_LIST *pd_list_mem; struct MR_PD_ADDRESS *pd_addr; bus_addr_t pd_list_phys_addr = 0; struct mrsas_tmp_dcmd *tcmd; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Cannot alloc for get PD list cmd\n"); return 1; } dcmd = &cmd->frame->dcmd; tcmd = malloc(sizeof(struct mrsas_tmp_dcmd), M_MRSAS, M_NOWAIT); pd_list_size = MRSAS_MAX_PD * sizeof(struct MR_PD_LIST); if (mrsas_alloc_tmp_dcmd(sc, tcmd, pd_list_size) != SUCCESS) { device_printf(sc->mrsas_dev, "Cannot alloc dmamap for get PD list cmd\n"); mrsas_release_mfi_cmd(cmd); return(ENOMEM); } else { pd_list_mem = tcmd->tmp_dcmd_mem; pd_list_phys_addr = tcmd->tmp_dcmd_phys_addr; } memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->mbox.b[0] = MR_PD_QUERY_TYPE_EXPOSED_TO_HOST; dcmd->mbox.b[1] = 0; dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = MRSAS_MAX_PD * sizeof(struct MR_PD_LIST); dcmd->opcode = MR_DCMD_PD_LIST_QUERY; dcmd->sgl.sge32[0].phys_addr = pd_list_phys_addr; dcmd->sgl.sge32[0].length = MRSAS_MAX_PD * sizeof(struct MR_PD_LIST); if (!mrsas_issue_polled(sc, cmd)) retcode = 0; else retcode = 1; /* Get the instance PD list */ pd_count = MRSAS_MAX_PD; pd_addr = pd_list_mem->addr; if (retcode == 0 && pd_list_mem->count < pd_count) { memset(sc->local_pd_list, 0, MRSAS_MAX_PD * sizeof(struct mrsas_pd_list)); for (pd_index = 0; pd_index < pd_list_mem->count; pd_index++) { sc->local_pd_list[pd_addr->deviceId].tid = pd_addr->deviceId; sc->local_pd_list[pd_addr->deviceId].driveType = pd_addr->scsiDevType; sc->local_pd_list[pd_addr->deviceId].driveState = MR_PD_STATE_SYSTEM; pd_addr++; } } /* Use mutext/spinlock if pd_list component size increase more than 32 bit. */ memcpy(sc->pd_list, sc->local_pd_list, sizeof(sc->local_pd_list)); mrsas_free_tmp_dcmd(tcmd); mrsas_release_mfi_cmd(cmd); free(tcmd, M_MRSAS); return(retcode); } /** * mrsas_get_ld_list: Returns FW's LD list structure * input: Adapter soft state * * Issues an internal command (DCMD) to get the FW's controller PD * list structure. This information is mainly used to find out about * supported by the FW. */ static int mrsas_get_ld_list(struct mrsas_softc *sc) { int ld_list_size, retcode = 0, ld_index = 0, ids = 0; struct mrsas_mfi_cmd *cmd; struct mrsas_dcmd_frame *dcmd; struct MR_LD_LIST *ld_list_mem; bus_addr_t ld_list_phys_addr = 0; struct mrsas_tmp_dcmd *tcmd; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Cannot alloc for get LD list cmd\n"); return 1; } dcmd = &cmd->frame->dcmd; tcmd = malloc(sizeof(struct mrsas_tmp_dcmd), M_MRSAS, M_NOWAIT); ld_list_size = sizeof(struct MR_LD_LIST); if (mrsas_alloc_tmp_dcmd(sc, tcmd, ld_list_size) != SUCCESS) { device_printf(sc->mrsas_dev, "Cannot alloc dmamap for get LD list cmd\n"); mrsas_release_mfi_cmd(cmd); return(ENOMEM); } else { ld_list_mem = tcmd->tmp_dcmd_mem; ld_list_phys_addr = tcmd->tmp_dcmd_phys_addr; } memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; dcmd->flags = MFI_FRAME_DIR_READ; dcmd->timeout = 0; dcmd->data_xfer_len = sizeof(struct MR_LD_LIST); dcmd->opcode = MR_DCMD_LD_GET_LIST; dcmd->sgl.sge32[0].phys_addr = ld_list_phys_addr; dcmd->sgl.sge32[0].length = sizeof(struct MR_LD_LIST); dcmd->pad_0 = 0; if (!mrsas_issue_polled(sc, cmd)) retcode = 0; else retcode = 1; /* Get the instance LD list */ if ((retcode == 0) && (ld_list_mem->ldCount <= (MAX_LOGICAL_DRIVES))){ sc->CurLdCount = ld_list_mem->ldCount; memset(sc->ld_ids, 0xff, MRSAS_MAX_LD); for (ld_index = 0; ld_index < ld_list_mem->ldCount; ld_index++) { if (ld_list_mem->ldList[ld_index].state != 0) { ids = ld_list_mem->ldList[ld_index].ref.ld_context.targetId; sc->ld_ids[ids] = ld_list_mem->ldList[ld_index].ref.ld_context.targetId; } } } mrsas_free_tmp_dcmd(tcmd); mrsas_release_mfi_cmd(cmd); free(tcmd, M_MRSAS); return(retcode); } /** * mrsas_alloc_tmp_dcmd: Allocates memory for temporary command * input: Adapter soft state * Temp command * Size of alloction * * Allocates DMAable memory for a temporary internal command. The allocated * memory is initialized to all zeros upon successful loading of the dma * mapped memory. */ int mrsas_alloc_tmp_dcmd(struct mrsas_softc *sc, struct mrsas_tmp_dcmd *tcmd, int size) { if (bus_dma_tag_create( sc->mrsas_parent_tag, // parent 1, 0, // algnmnt, boundary BUS_SPACE_MAXADDR_32BIT,// lowaddr BUS_SPACE_MAXADDR, // highaddr NULL, NULL, // filter, filterarg size, // maxsize 1, // msegments size, // maxsegsize BUS_DMA_ALLOCNOW, // flags NULL, NULL, // lockfunc, lockarg &tcmd->tmp_dcmd_tag)) { device_printf(sc->mrsas_dev, "Cannot allocate tmp dcmd tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(tcmd->tmp_dcmd_tag, (void **)&tcmd->tmp_dcmd_mem, BUS_DMA_NOWAIT, &tcmd->tmp_dcmd_dmamap)) { device_printf(sc->mrsas_dev, "Cannot allocate tmp dcmd mem\n"); return (ENOMEM); } if (bus_dmamap_load(tcmd->tmp_dcmd_tag, tcmd->tmp_dcmd_dmamap, tcmd->tmp_dcmd_mem, size, mrsas_addr_cb, &tcmd->tmp_dcmd_phys_addr, BUS_DMA_NOWAIT)) { device_printf(sc->mrsas_dev, "Cannot load tmp dcmd mem\n"); return (ENOMEM); } memset(tcmd->tmp_dcmd_mem, 0, size); return (0); } /** * mrsas_free_tmp_dcmd: Free memory for temporary command * input: temporary dcmd pointer * * Deallocates memory of the temporary command for use in the construction * of the internal DCMD. */ void mrsas_free_tmp_dcmd(struct mrsas_tmp_dcmd *tmp) { if (tmp->tmp_dcmd_phys_addr) bus_dmamap_unload(tmp->tmp_dcmd_tag, tmp->tmp_dcmd_dmamap); if (tmp->tmp_dcmd_mem != NULL) bus_dmamem_free(tmp->tmp_dcmd_tag, tmp->tmp_dcmd_mem, tmp->tmp_dcmd_dmamap); if (tmp->tmp_dcmd_tag != NULL) bus_dma_tag_destroy(tmp->tmp_dcmd_tag); } /** * mrsas_issue_blocked_abort_cmd: Aborts previously issued cmd * input: Adapter soft state * Previously issued cmd to be aborted * * This function is used to abort previously issued commands, such as AEN and * RAID map sync map commands. The abort command is sent as a DCMD internal * command and subsequently the driver will wait for a return status. The * max wait time is MRSAS_INTERNAL_CMD_WAIT_TIME seconds. */ static int mrsas_issue_blocked_abort_cmd(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd_to_abort) { struct mrsas_mfi_cmd *cmd; struct mrsas_abort_frame *abort_fr; u_int8_t retcode = 0; unsigned long total_time = 0; u_int8_t max_wait = MRSAS_INTERNAL_CMD_WAIT_TIME; cmd = mrsas_get_mfi_cmd(sc); if (!cmd) { device_printf(sc->mrsas_dev, "Cannot alloc for abort cmd\n"); return(1); } abort_fr = &cmd->frame->abort; /* Prepare and issue the abort frame */ abort_fr->cmd = MFI_CMD_ABORT; abort_fr->cmd_status = 0xFF; abort_fr->flags = 0; abort_fr->abort_context = cmd_to_abort->index; abort_fr->abort_mfi_phys_addr_lo = cmd_to_abort->frame_phys_addr; abort_fr->abort_mfi_phys_addr_hi = 0; cmd->sync_cmd = 1; cmd->cmd_status = 0xFF; if (mrsas_issue_dcmd(sc, cmd)) { device_printf(sc->mrsas_dev, "Fail to send abort command.\n"); return(1); } /* Wait for this cmd to complete */ sc->chan = (void*)&cmd; while (1) { if (cmd->cmd_status == 0xFF){ tsleep((void *)&sc->chan, 0, "mrsas_sleep", hz); } else break; total_time++; if (total_time >= max_wait) { device_printf(sc->mrsas_dev, "Abort cmd timed out after %d sec.\n", max_wait); retcode = 1; break; } } cmd->sync_cmd = 0; mrsas_release_mfi_cmd(cmd); return(retcode); } /** * mrsas_complete_abort: Completes aborting a command * input: Adapter soft state * Cmd that was issued to abort another cmd * * The mrsas_issue_blocked_abort_cmd() function waits for the command status * to change after sending the command. This function is called from * mrsas_complete_mptmfi_passthru() to wake up the sleep thread associated. */ void mrsas_complete_abort(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { if (cmd->sync_cmd) { cmd->sync_cmd = 0; cmd->cmd_status = 0; sc->chan = (void*)&cmd; wakeup_one((void *)&sc->chan); } return; } /** * mrsas_aen_handler: Callback function for AEN processing from thread context. * input: Adapter soft state * */ void mrsas_aen_handler(struct mrsas_softc *sc) { union mrsas_evt_class_locale class_locale; int doscan = 0; u_int32_t seq_num; int error; if (!sc) { device_printf(sc->mrsas_dev, "invalid instance!\n"); return; } if (sc->evt_detail_mem) { switch (sc->evt_detail_mem->code) { case MR_EVT_PD_INSERTED: mrsas_get_pd_list(sc); mrsas_bus_scan_sim(sc, sc->sim_1); doscan = 0; break; case MR_EVT_PD_REMOVED: mrsas_get_pd_list(sc); mrsas_bus_scan_sim(sc, sc->sim_1); doscan = 0; break; case MR_EVT_LD_OFFLINE: case MR_EVT_CFG_CLEARED: case MR_EVT_LD_DELETED: mrsas_bus_scan_sim(sc, sc->sim_0); doscan = 0; break; case MR_EVT_LD_CREATED: mrsas_get_ld_list(sc); mrsas_bus_scan_sim(sc, sc->sim_0); doscan = 0; break; case MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED: case MR_EVT_FOREIGN_CFG_IMPORTED: case MR_EVT_LD_STATE_CHANGE: doscan = 1; break; default: doscan = 0; break; } } else { device_printf(sc->mrsas_dev, "invalid evt_detail\n"); return; } if (doscan) { mrsas_get_pd_list(sc); mrsas_dprint(sc, MRSAS_AEN, "scanning ...sim 1\n"); mrsas_bus_scan_sim(sc, sc->sim_1); mrsas_get_ld_list(sc); mrsas_dprint(sc, MRSAS_AEN, "scanning ...sim 0\n"); mrsas_bus_scan_sim(sc, sc->sim_0); } seq_num = sc->evt_detail_mem->seq_num + 1; // Register AEN with FW for latest sequence number plus 1 class_locale.members.reserved = 0; class_locale.members.locale = MR_EVT_LOCALE_ALL; class_locale.members.class = MR_EVT_CLASS_DEBUG; if (sc->aen_cmd != NULL ) return ; mtx_lock(&sc->aen_lock); error = mrsas_register_aen(sc, seq_num, class_locale.word); mtx_unlock(&sc->aen_lock); if (error) device_printf(sc->mrsas_dev, "register aen failed error %x\n", error); } /** * mrsas_complete_aen: Completes AEN command * input: Adapter soft state * Cmd that was issued to abort another cmd * * This function will be called from ISR and will continue * event processing from thread context by enqueuing task * in ev_tq (callback function "mrsas_aen_handler"). */ void mrsas_complete_aen(struct mrsas_softc *sc, struct mrsas_mfi_cmd *cmd) { /* * Don't signal app if it is just an aborted previously registered aen */ if ((!cmd->abort_aen) && (sc->remove_in_progress == 0)) { /* TO DO (?) */ } else cmd->abort_aen = 0; sc->aen_cmd = NULL; mrsas_release_mfi_cmd(cmd); if (!sc->remove_in_progress) taskqueue_enqueue(sc->ev_tq, &sc->ev_task); return; } static device_method_t mrsas_methods[] = { DEVMETHOD(device_probe, mrsas_probe), DEVMETHOD(device_attach, mrsas_attach), DEVMETHOD(device_detach, mrsas_detach), DEVMETHOD(device_suspend, mrsas_suspend), DEVMETHOD(device_resume, mrsas_resume), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; static driver_t mrsas_driver = { "mrsas", mrsas_methods, sizeof(struct mrsas_softc) }; static devclass_t mrsas_devclass; DRIVER_MODULE(mrsas, pci, mrsas_driver, mrsas_devclass, 0, 0); MODULE_DEPEND(mrsas, cam, 1,1,1); Index: stable/9/sys/dev/mrsas/mrsas.h =================================================================== --- stable/9/sys/dev/mrsas/mrsas.h (revision 273911) +++ stable/9/sys/dev/mrsas/mrsas.h (revision 273912) @@ -1,2464 +1,2465 @@ /* * Copyright (c) 2014, LSI Corp. * All rights reserved. * Authors: Marian Choy * Support: freebsdraid@lsi.com * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. Neither the name of the nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies,either expressed or implied, of the FreeBSD Project. * * Send feedback to: * Mail to: LSI Corporation, 1621 Barber Lane, Milpitas, CA 95035 * ATTN: MegaRaid FreeBSD * */ #include __FBSDID("$FreeBSD$"); #ifndef MRSAS_H #define MRSAS_H #include /* defines used in kernel.h */ #include #include #include #include #include /* types used in module initialization */ #include /* cdevsw struct */ #include /* uio struct */ #include #include /* structs, prototypes for pci bus stuff */ #include #include #include #include #include /* For pci_get macros! */ #include #include #include #include #include #include #include /* * Device IDs and PCI */ #define MRSAS_TBOLT 0x005b #define MRSAS_INVADER 0x005d #define MRSAS_FURY 0x005f #define MRSAS_PCI_BAR0 0x10 #define MRSAS_PCI_BAR1 0x14 #define MRSAS_PCI_BAR2 0x1C /* * Firmware State Defines */ #define MRSAS_FWSTATE_MAXCMD_MASK 0x0000FFFF #define MRSAS_FWSTATE_SGE_MASK 0x00FF0000 #define MRSAS_FW_STATE_CHNG_INTERRUPT 1 /* * Message Frame Defines */ #define MRSAS_SENSE_LEN 96 #define MRSAS_FUSION_MAX_RESET_TRIES 3 /* * Miscellaneous Defines */ #define BYTE_ALIGNMENT 1 #define MRSAS_MAX_NAME_LENGTH 32 #define MRSAS_VERSION "06.704.01.00-fbsd" #define MRSAS_ULONG_MAX 0xFFFFFFFFFFFFFFFF #define MRSAS_DEFAULT_TIMEOUT 0x14 //temp #define DONE 0 #define MRSAS_PAGE_SIZE 4096 #define MRSAS_RESET_NOTICE_INTERVAL 5 #define MRSAS_IO_TIMEOUT 180000 /* 180 second timeout */ #define MRSAS_LDIO_QUEUE_DEPTH 70 /* 70 percent as default */ #define THRESHOLD_REPLY_COUNT 50 /* Boolean types */ #if (__FreeBSD_version < 901000) typedef enum _boolean { false, true } boolean; #endif enum err { SUCCESS, FAIL }; MALLOC_DECLARE(M_MRSAS); SYSCTL_DECL(_hw_mrsas); #define MRSAS_INFO (1 << 0) #define MRSAS_TRACE (1 << 1) #define MRSAS_FAULT (1 << 2) #define MRSAS_OCR (1 << 3) #define MRSAS_TOUT MRSAS_OCR #define MRSAS_AEN (1 << 4) #define MRSAS_PRL11 (1 << 5) #define mrsas_dprint(sc, level, msg, args...) \ do { \ if (sc->mrsas_debug & level) \ device_printf(sc->mrsas_dev, msg, ##args); \ } while (0) /**************************************************************************** * Raid Context structure which describes MegaRAID specific IO Paramenters * This resides at offset 0x60 where the SGL normally starts in MPT IO Frames ****************************************************************************/ typedef struct _RAID_CONTEXT { u_int8_t Type:4; // 0x00 u_int8_t nseg:4; // 0x00 u_int8_t resvd0; // 0x01 u_int16_t timeoutValue; // 0x02 -0x03 u_int8_t regLockFlags; // 0x04 u_int8_t resvd1; // 0x05 u_int16_t VirtualDiskTgtId; // 0x06 -0x07 u_int64_t regLockRowLBA; // 0x08 - 0x0F u_int32_t regLockLength; // 0x10 - 0x13 u_int16_t nextLMId; // 0x14 - 0x15 u_int8_t exStatus; // 0x16 u_int8_t status; // 0x17 status u_int8_t RAIDFlags; // 0x18 resvd[7:6],ioSubType[5:4],resvd[3:1],preferredCpu[0] u_int8_t numSGE; // 0x19 numSge; not including chain entries u_int16_t configSeqNum; // 0x1A -0x1B u_int8_t spanArm; // 0x1C span[7:5], arm[4:0] u_int8_t resvd2[3]; // 0x1D-0x1f } RAID_CONTEXT; /************************************************************************* * MPI2 Defines ************************************************************************/ #define MPI2_FUNCTION_IOC_INIT (0x02) /* IOC Init */ #define MPI2_WHOINIT_HOST_DRIVER (0x04) #define MPI2_VERSION_MAJOR (0x02) #define MPI2_VERSION_MINOR (0x00) #define MPI2_VERSION_MAJOR_MASK (0xFF00) #define MPI2_VERSION_MAJOR_SHIFT (8) #define MPI2_VERSION_MINOR_MASK (0x00FF) #define MPI2_VERSION_MINOR_SHIFT (0) #define MPI2_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) | \ MPI2_VERSION_MINOR) #define MPI2_HEADER_VERSION_UNIT (0x10) #define MPI2_HEADER_VERSION_DEV (0x00) #define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI2_HEADER_VERSION_UNIT_SHIFT (8) #define MPI2_HEADER_VERSION_DEV_MASK (0x00FF) #define MPI2_HEADER_VERSION_DEV_SHIFT (0) #define MPI2_HEADER_VERSION ((MPI2_HEADER_VERSION_UNIT << 8) | MPI2_HEADER_VERSION_DEV) #define MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR (0x03) #define MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG (0x8000) #define MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG (0x0400) #define MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP (0x0003) #define MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG (0x0200) #define MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD (0x0100) #define MPI2_SCSIIO_EEDPFLAGS_INSERT_OP (0x0004) #define MPI2_FUNCTION_SCSI_IO_REQUEST (0x00) /* SCSI IO */ #define MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY (0x06) #define MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO (0x00) #define MPI2_SGE_FLAGS_64_BIT_ADDRESSING (0x02) #define MPI2_SCSIIO_CONTROL_WRITE (0x01000000) #define MPI2_SCSIIO_CONTROL_READ (0x02000000) #define MPI2_REQ_DESCRIPT_FLAGS_TYPE_MASK (0x0E) #define MPI2_RPY_DESCRIPT_FLAGS_UNUSED (0x0F) #define MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS (0x00) #define MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK (0x0F) #define MPI2_WRSEQ_FLUSH_KEY_VALUE (0x0) #define MPI2_WRITE_SEQUENCE_OFFSET (0x00000004) #define MPI2_WRSEQ_1ST_KEY_VALUE (0xF) #define MPI2_WRSEQ_2ND_KEY_VALUE (0x4) #define MPI2_WRSEQ_3RD_KEY_VALUE (0xB) #define MPI2_WRSEQ_4TH_KEY_VALUE (0x2) #define MPI2_WRSEQ_5TH_KEY_VALUE (0x7) #define MPI2_WRSEQ_6TH_KEY_VALUE (0xD) #ifndef MPI2_POINTER #define MPI2_POINTER * #endif /*************************************** * MPI2 Structures ***************************************/ typedef struct _MPI25_IEEE_SGE_CHAIN64 { u_int64_t Address; u_int32_t Length; u_int16_t Reserved1; u_int8_t NextChainOffset; u_int8_t Flags; } MPI25_IEEE_SGE_CHAIN64, MPI2_POINTER PTR_MPI25_IEEE_SGE_CHAIN64, Mpi25IeeeSgeChain64_t, MPI2_POINTER pMpi25IeeeSgeChain64_t; typedef struct _MPI2_SGE_SIMPLE_UNION { u_int32_t FlagsLength; union { u_int32_t Address32; u_int64_t Address64; } u; } MPI2_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_SGE_SIMPLE_UNION, Mpi2SGESimpleUnion_t, MPI2_POINTER pMpi2SGESimpleUnion_t; typedef struct { u_int8_t CDB[20]; /* 0x00 */ u_int32_t PrimaryReferenceTag; /* 0x14 */ u_int16_t PrimaryApplicationTag; /* 0x18 */ u_int16_t PrimaryApplicationTagMask; /* 0x1A */ u_int32_t TransferLength; /* 0x1C */ } MPI2_SCSI_IO_CDB_EEDP32, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_EEDP32, Mpi2ScsiIoCdbEedp32_t, MPI2_POINTER pMpi2ScsiIoCdbEedp32_t; typedef struct _MPI2_SGE_CHAIN_UNION { u_int16_t Length; u_int8_t NextChainOffset; u_int8_t Flags; union { u_int32_t Address32; u_int64_t Address64; } u; } MPI2_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_SGE_CHAIN_UNION, Mpi2SGEChainUnion_t, MPI2_POINTER pMpi2SGEChainUnion_t; typedef struct _MPI2_IEEE_SGE_SIMPLE32 { u_int32_t Address; u_int32_t FlagsLength; } MPI2_IEEE_SGE_SIMPLE32, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE32, Mpi2IeeeSgeSimple32_t, MPI2_POINTER pMpi2IeeeSgeSimple32_t; typedef struct _MPI2_IEEE_SGE_SIMPLE64 { u_int64_t Address; u_int32_t Length; u_int16_t Reserved1; u_int8_t Reserved2; u_int8_t Flags; } MPI2_IEEE_SGE_SIMPLE64, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE64, Mpi2IeeeSgeSimple64_t, MPI2_POINTER pMpi2IeeeSgeSimple64_t; typedef union _MPI2_IEEE_SGE_SIMPLE_UNION { MPI2_IEEE_SGE_SIMPLE32 Simple32; MPI2_IEEE_SGE_SIMPLE64 Simple64; } MPI2_IEEE_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE_UNION, Mpi2IeeeSgeSimpleUnion_t, MPI2_POINTER pMpi2IeeeSgeSimpleUnion_t; typedef MPI2_IEEE_SGE_SIMPLE32 MPI2_IEEE_SGE_CHAIN32; typedef MPI2_IEEE_SGE_SIMPLE64 MPI2_IEEE_SGE_CHAIN64; typedef union _MPI2_IEEE_SGE_CHAIN_UNION { MPI2_IEEE_SGE_CHAIN32 Chain32; MPI2_IEEE_SGE_CHAIN64 Chain64; } MPI2_IEEE_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_CHAIN_UNION, Mpi2IeeeSgeChainUnion_t, MPI2_POINTER pMpi2IeeeSgeChainUnion_t; typedef union _MPI2_SGE_IO_UNION { MPI2_SGE_SIMPLE_UNION MpiSimple; MPI2_SGE_CHAIN_UNION MpiChain; MPI2_IEEE_SGE_SIMPLE_UNION IeeeSimple; MPI2_IEEE_SGE_CHAIN_UNION IeeeChain; } MPI2_SGE_IO_UNION, MPI2_POINTER PTR_MPI2_SGE_IO_UNION, Mpi2SGEIOUnion_t, MPI2_POINTER pMpi2SGEIOUnion_t; typedef union { u_int8_t CDB32[32]; MPI2_SCSI_IO_CDB_EEDP32 EEDP32; MPI2_SGE_SIMPLE_UNION SGE; } MPI2_SCSI_IO_CDB_UNION, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_UNION, Mpi2ScsiIoCdb_t, MPI2_POINTER pMpi2ScsiIoCdb_t; /* * RAID SCSI IO Request Message * Total SGE count will be one less than _MPI2_SCSI_IO_REQUEST */ typedef struct _MPI2_RAID_SCSI_IO_REQUEST { u_int16_t DevHandle; /* 0x00 */ u_int8_t ChainOffset; /* 0x02 */ u_int8_t Function; /* 0x03 */ u_int16_t Reserved1; /* 0x04 */ u_int8_t Reserved2; /* 0x06 */ u_int8_t MsgFlags; /* 0x07 */ u_int8_t VP_ID; /* 0x08 */ u_int8_t VF_ID; /* 0x09 */ u_int16_t Reserved3; /* 0x0A */ u_int32_t SenseBufferLowAddress; /* 0x0C */ u_int16_t SGLFlags; /* 0x10 */ u_int8_t SenseBufferLength; /* 0x12 */ u_int8_t Reserved4; /* 0x13 */ u_int8_t SGLOffset0; /* 0x14 */ u_int8_t SGLOffset1; /* 0x15 */ u_int8_t SGLOffset2; /* 0x16 */ u_int8_t SGLOffset3; /* 0x17 */ u_int32_t SkipCount; /* 0x18 */ u_int32_t DataLength; /* 0x1C */ u_int32_t BidirectionalDataLength; /* 0x20 */ u_int16_t IoFlags; /* 0x24 */ u_int16_t EEDPFlags; /* 0x26 */ u_int32_t EEDPBlockSize; /* 0x28 */ u_int32_t SecondaryReferenceTag; /* 0x2C */ u_int16_t SecondaryApplicationTag; /* 0x30 */ u_int16_t ApplicationTagTranslationMask; /* 0x32 */ u_int8_t LUN[8]; /* 0x34 */ u_int32_t Control; /* 0x3C */ MPI2_SCSI_IO_CDB_UNION CDB; /* 0x40 */ RAID_CONTEXT RaidContext; /* 0x60 */ MPI2_SGE_IO_UNION SGL; /* 0x80 */ } MRSAS_RAID_SCSI_IO_REQUEST, MPI2_POINTER PTR_MRSAS_RAID_SCSI_IO_REQUEST, MRSASRaidSCSIIORequest_t, MPI2_POINTER pMRSASRaidSCSIIORequest_t; /* * MPT RAID MFA IO Descriptor. */ typedef struct _MRSAS_RAID_MFA_IO_DESCRIPTOR { u_int32_t RequestFlags : 8; u_int32_t MessageAddress1 : 24; /* bits 31:8*/ u_int32_t MessageAddress2; /* bits 61:32 */ } MRSAS_RAID_MFA_IO_REQUEST_DESCRIPTOR,*PMRSAS_RAID_MFA_IO_REQUEST_DESCRIPTOR; /* Default Request Descriptor */ typedef struct _MPI2_DEFAULT_REQUEST_DESCRIPTOR { u_int8_t RequestFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int16_t LMID; /* 0x04 */ u_int16_t DescriptorTypeDependent; /* 0x06 */ } MPI2_DEFAULT_REQUEST_DESCRIPTOR, MPI2_POINTER PTR_MPI2_DEFAULT_REQUEST_DESCRIPTOR, Mpi2DefaultRequestDescriptor_t, MPI2_POINTER pMpi2DefaultRequestDescriptor_t; /* High Priority Request Descriptor */ typedef struct _MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR { u_int8_t RequestFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int16_t LMID; /* 0x04 */ u_int16_t Reserved1; /* 0x06 */ } MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR, MPI2_POINTER PTR_MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR, Mpi2HighPriorityRequestDescriptor_t, MPI2_POINTER pMpi2HighPriorityRequestDescriptor_t; /* SCSI IO Request Descriptor */ typedef struct _MPI2_SCSI_IO_REQUEST_DESCRIPTOR { u_int8_t RequestFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int16_t LMID; /* 0x04 */ u_int16_t DevHandle; /* 0x06 */ } MPI2_SCSI_IO_REQUEST_DESCRIPTOR, MPI2_POINTER PTR_MPI2_SCSI_IO_REQUEST_DESCRIPTOR, Mpi2SCSIIORequestDescriptor_t, MPI2_POINTER pMpi2SCSIIORequestDescriptor_t; /* SCSI Target Request Descriptor */ typedef struct _MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR { u_int8_t RequestFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int16_t LMID; /* 0x04 */ u_int16_t IoIndex; /* 0x06 */ } MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR, MPI2_POINTER PTR_MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR, Mpi2SCSITargetRequestDescriptor_t, MPI2_POINTER pMpi2SCSITargetRequestDescriptor_t; /* RAID Accelerator Request Descriptor */ typedef struct _MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR { u_int8_t RequestFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int16_t LMID; /* 0x04 */ u_int16_t Reserved; /* 0x06 */ } MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR, MPI2_POINTER PTR_MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR, Mpi2RAIDAcceleratorRequestDescriptor_t, MPI2_POINTER pMpi2RAIDAcceleratorRequestDescriptor_t; /* union of Request Descriptors */ typedef union _MRSAS_REQUEST_DESCRIPTOR_UNION { MPI2_DEFAULT_REQUEST_DESCRIPTOR Default; MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR HighPriority; MPI2_SCSI_IO_REQUEST_DESCRIPTOR SCSIIO; MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR SCSITarget; MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR RAIDAccelerator; MRSAS_RAID_MFA_IO_REQUEST_DESCRIPTOR MFAIo; union { struct { u_int32_t low; u_int32_t high; } u; u_int64_t Words; } addr; } MRSAS_REQUEST_DESCRIPTOR_UNION; /* Default Reply Descriptor */ typedef struct _MPI2_DEFAULT_REPLY_DESCRIPTOR { u_int8_t ReplyFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t DescriptorTypeDependent1; /* 0x02 */ u_int32_t DescriptorTypeDependent2; /* 0x04 */ } MPI2_DEFAULT_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_DEFAULT_REPLY_DESCRIPTOR, Mpi2DefaultReplyDescriptor_t, MPI2_POINTER pMpi2DefaultReplyDescriptor_t; /* Address Reply Descriptor */ typedef struct _MPI2_ADDRESS_REPLY_DESCRIPTOR { u_int8_t ReplyFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int32_t ReplyFrameAddress; /* 0x04 */ } MPI2_ADDRESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_ADDRESS_REPLY_DESCRIPTOR, Mpi2AddressReplyDescriptor_t, MPI2_POINTER pMpi2AddressReplyDescriptor_t; /* SCSI IO Success Reply Descriptor */ typedef struct _MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR { u_int8_t ReplyFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int16_t TaskTag; /* 0x04 */ u_int16_t Reserved1; /* 0x06 */ } MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR, Mpi2SCSIIOSuccessReplyDescriptor_t, MPI2_POINTER pMpi2SCSIIOSuccessReplyDescriptor_t; /* TargetAssist Success Reply Descriptor */ typedef struct _MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR { u_int8_t ReplyFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int8_t SequenceNumber; /* 0x04 */ u_int8_t Reserved1; /* 0x05 */ u_int16_t IoIndex; /* 0x06 */ } MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR, Mpi2TargetAssistSuccessReplyDescriptor_t, MPI2_POINTER pMpi2TargetAssistSuccessReplyDescriptor_t; /* Target Command Buffer Reply Descriptor */ typedef struct _MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR { u_int8_t ReplyFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int8_t VP_ID; /* 0x02 */ u_int8_t Flags; /* 0x03 */ u_int16_t InitiatorDevHandle; /* 0x04 */ u_int16_t IoIndex; /* 0x06 */ } MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR, Mpi2TargetCommandBufferReplyDescriptor_t, MPI2_POINTER pMpi2TargetCommandBufferReplyDescriptor_t; /* RAID Accelerator Success Reply Descriptor */ typedef struct _MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR { u_int8_t ReplyFlags; /* 0x00 */ u_int8_t MSIxIndex; /* 0x01 */ u_int16_t SMID; /* 0x02 */ u_int32_t Reserved; /* 0x04 */ } MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR, Mpi2RAIDAcceleratorSuccessReplyDescriptor_t, MPI2_POINTER pMpi2RAIDAcceleratorSuccessReplyDescriptor_t; /* union of Reply Descriptors */ typedef union _MPI2_REPLY_DESCRIPTORS_UNION { MPI2_DEFAULT_REPLY_DESCRIPTOR Default; MPI2_ADDRESS_REPLY_DESCRIPTOR AddressReply; MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR SCSIIOSuccess; MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR TargetAssistSuccess; MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR TargetCommandBuffer; MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR RAIDAcceleratorSuccess; u_int64_t Words; } MPI2_REPLY_DESCRIPTORS_UNION, MPI2_POINTER PTR_MPI2_REPLY_DESCRIPTORS_UNION, Mpi2ReplyDescriptorsUnion_t, MPI2_POINTER pMpi2ReplyDescriptorsUnion_t; -typedef struct { +typedef union { volatile unsigned int val; + unsigned int val_rdonly; } atomic_t; #define atomic_read(v) atomic_load_acq_int(&(v)->val) #define atomic_set(v,i) atomic_store_rel_int(&(v)->val, i) #define atomic_dec(v) atomic_fetchadd_int(&(v)->val, -1) #define atomic_inc(v) atomic_fetchadd_int(&(v)->val, 1) /* IOCInit Request message */ typedef struct _MPI2_IOC_INIT_REQUEST { u_int8_t WhoInit; /* 0x00 */ u_int8_t Reserved1; /* 0x01 */ u_int8_t ChainOffset; /* 0x02 */ u_int8_t Function; /* 0x03 */ u_int16_t Reserved2; /* 0x04 */ u_int8_t Reserved3; /* 0x06 */ u_int8_t MsgFlags; /* 0x07 */ u_int8_t VP_ID; /* 0x08 */ u_int8_t VF_ID; /* 0x09 */ u_int16_t Reserved4; /* 0x0A */ u_int16_t MsgVersion; /* 0x0C */ u_int16_t HeaderVersion; /* 0x0E */ u_int32_t Reserved5; /* 0x10 */ u_int16_t Reserved6; /* 0x14 */ u_int8_t Reserved7; /* 0x16 */ u_int8_t HostMSIxVectors; /* 0x17 */ u_int16_t Reserved8; /* 0x18 */ u_int16_t SystemRequestFrameSize; /* 0x1A */ u_int16_t ReplyDescriptorPostQueueDepth; /* 0x1C */ u_int16_t ReplyFreeQueueDepth; /* 0x1E */ u_int32_t SenseBufferAddressHigh; /* 0x20 */ u_int32_t SystemReplyAddressHigh; /* 0x24 */ u_int64_t SystemRequestFrameBaseAddress; /* 0x28 */ u_int64_t ReplyDescriptorPostQueueAddress;/* 0x30 */ u_int64_t ReplyFreeQueueAddress; /* 0x38 */ u_int64_t TimeStamp; /* 0x40 */ } MPI2_IOC_INIT_REQUEST, MPI2_POINTER PTR_MPI2_IOC_INIT_REQUEST, Mpi2IOCInitRequest_t, MPI2_POINTER pMpi2IOCInitRequest_t; /* * MR private defines */ #define MR_PD_INVALID 0xFFFF #define MAX_SPAN_DEPTH 8 #define MAX_QUAD_DEPTH MAX_SPAN_DEPTH #define MAX_RAIDMAP_SPAN_DEPTH (MAX_SPAN_DEPTH) #define MAX_ROW_SIZE 32 #define MAX_RAIDMAP_ROW_SIZE (MAX_ROW_SIZE) #define MAX_LOGICAL_DRIVES 64 #define MAX_RAIDMAP_LOGICAL_DRIVES (MAX_LOGICAL_DRIVES) #define MAX_RAIDMAP_VIEWS (MAX_LOGICAL_DRIVES) #define MAX_ARRAYS 128 #define MAX_RAIDMAP_ARRAYS (MAX_ARRAYS) #define MAX_PHYSICAL_DEVICES 256 #define MAX_RAIDMAP_PHYSICAL_DEVICES (MAX_PHYSICAL_DEVICES) #define MR_DCMD_LD_MAP_GET_INFO 0x0300e101 // get the mapping information of this LD /******************************************************************* * RAID map related structures ********************************************************************/ typedef struct _MR_DEV_HANDLE_INFO { u_int16_t curDevHdl; // the device handle currently used by fw to issue the command. u_int8_t validHandles; // bitmap of valid device handles. u_int8_t reserved; u_int16_t devHandle[2]; // 0x04 dev handles for all the paths. } MR_DEV_HANDLE_INFO; typedef struct _MR_ARRAY_INFO { u_int16_t pd[MAX_RAIDMAP_ROW_SIZE]; } MR_ARRAY_INFO; // 0x40, Total Size typedef struct _MR_QUAD_ELEMENT { u_int64_t logStart; // 0x00 u_int64_t logEnd; // 0x08 u_int64_t offsetInSpan; // 0x10 u_int32_t diff; // 0x18 u_int32_t reserved1; // 0x1C } MR_QUAD_ELEMENT; // 0x20, Total size typedef struct _MR_SPAN_INFO { u_int32_t noElements; // 0x00 u_int32_t reserved1; // 0x04 MR_QUAD_ELEMENT quad[MAX_RAIDMAP_SPAN_DEPTH]; // 0x08 } MR_SPAN_INFO; // 0x108, Total size typedef struct _MR_LD_SPAN_ { // SPAN structure u_int64_t startBlk; // 0x00, starting block number in array u_int64_t numBlks; // 0x08, number of blocks u_int16_t arrayRef; // 0x10, array reference u_int8_t spanRowSize; // 0x11, span row size u_int8_t spanRowDataSize; // 0x12, span row data size u_int8_t reserved[4]; // 0x13, reserved } MR_LD_SPAN; // 0x18, Total Size typedef struct _MR_SPAN_BLOCK_INFO { u_int64_t num_rows; // number of rows/span MR_LD_SPAN span; // 0x08 MR_SPAN_INFO block_span_info; // 0x20 } MR_SPAN_BLOCK_INFO; typedef struct _MR_LD_RAID { struct { u_int32_t fpCapable :1; u_int32_t reserved5 :3; u_int32_t ldPiMode :4; u_int32_t pdPiMode :4; // Every Pd has to be same. u_int32_t encryptionType :8; // FDE or ctlr encryption (MR_LD_ENCRYPTION_TYPE) u_int32_t fpWriteCapable :1; u_int32_t fpReadCapable :1; u_int32_t fpWriteAcrossStripe :1; u_int32_t fpReadAcrossStripe :1; u_int32_t fpNonRWCapable :1; // TRUE if supporting Non RW IO u_int32_t reserved4 :7; } capability; // 0x00 u_int32_t reserved6; u_int64_t size; // 0x08, LD size in blocks u_int8_t spanDepth; // 0x10, Total Number of Spans u_int8_t level; // 0x11, RAID level u_int8_t stripeShift; // 0x12, shift-count to get stripe size (0=512, 1=1K, 7=64K, etc.) u_int8_t rowSize; // 0x13, number of disks in a row u_int8_t rowDataSize; // 0x14, number of data disks in a row u_int8_t writeMode; // 0x15, WRITE_THROUGH or WRITE_BACK u_int8_t PRL; // 0x16, To differentiate between RAID1 and RAID1E u_int8_t SRL; // 0x17 u_int16_t targetId; // 0x18, ld Target Id. u_int8_t ldState; // 0x1a, state of ld, state corresponds to MR_LD_STATE u_int8_t regTypeReqOnWrite;// 0x1b, Pre calculate region type requests based on MFC etc.. u_int8_t modFactor; // 0x1c, same as rowSize, u_int8_t regTypeReqOnRead; // 0x1d, region lock type used for read, valid only if regTypeOnReadIsValid=1 u_int16_t seqNum; // 0x1e, LD sequence number struct { u_int32_t ldSyncRequired:1; // This LD requires sync command before completing u_int32_t regTypeReqOnReadLsValid:1; // Qualifier for regTypeOnRead u_int32_t reserved:30; } flags; // 0x20 u_int8_t LUN[8]; // 0x24, 8 byte LUN field used for SCSI u_int8_t fpIoTimeoutForLd; // 0x2C, timeout value for FP IOs u_int8_t reserved2[3]; // 0x2D u_int32_t logicalBlockLength; // 0x30 Logical block size for the LD struct { u_int32_t LdPiExp:4; // 0x34, P_I_EXPONENT for ReadCap 16 u_int32_t LdLogicalBlockExp:4; // 0x34, LOGICAL BLOCKS PER PHYS BLOCK u_int32_t reserved1:24; // 0x34 } exponent; u_int8_t reserved3[0x80-0x38]; // 0x38 } MR_LD_RAID; // 0x80, Total Size typedef struct _MR_LD_SPAN_MAP { MR_LD_RAID ldRaid; // 0x00 u_int8_t dataArmMap[MAX_RAIDMAP_ROW_SIZE]; // 0x80, needed for GET_ARM() - R0/1/5 only. MR_SPAN_BLOCK_INFO spanBlock[MAX_RAIDMAP_SPAN_DEPTH]; // 0xA0 } MR_LD_SPAN_MAP; // 0x9E0 typedef struct _MR_FW_RAID_MAP { u_int32_t totalSize; // total size of this structure, including this field. union { struct { // Simple method of version checking variables u_int32_t maxLd; u_int32_t maxSpanDepth; u_int32_t maxRowSize; u_int32_t maxPdCount; u_int32_t maxArrays; } validationInfo; u_int32_t version[5]; u_int32_t reserved1[5]; } raid_desc; u_int32_t ldCount; // count of lds. u_int32_t Reserved1; u_int8_t ldTgtIdToLd[MAX_RAIDMAP_LOGICAL_DRIVES+MAX_RAIDMAP_VIEWS]; // 0x20 // This doesn't correspond to // FW Ld Tgt Id to LD, but will purge. For example: if tgt Id is 4 // and FW LD is 2, and there is only one LD, FW will populate the // array like this. [0xFF, 0xFF, 0xFF, 0xFF, 0x0,.....]. This is to // help reduce the entire strcture size if there are few LDs or // driver is looking info for 1 LD only. u_int8_t fpPdIoTimeoutSec; // timeout value used by driver in FP IOs u_int8_t reserved2[7]; MR_ARRAY_INFO arMapInfo[MAX_RAIDMAP_ARRAYS]; // 0x00a8 MR_DEV_HANDLE_INFO devHndlInfo[MAX_RAIDMAP_PHYSICAL_DEVICES]; // 0x20a8 MR_LD_SPAN_MAP ldSpanMap[1]; // 0x28a8-[0-MAX_RAIDMAP_LOGICAL_DRIVES+MAX_RAIDMAP_VIEWS+1]; } MR_FW_RAID_MAP; // 0x3288, Total Size typedef struct _LD_LOAD_BALANCE_INFO { u_int8_t loadBalanceFlag; u_int8_t reserved1; u_int16_t raid1DevHandle[2]; atomic_t scsi_pending_cmds[2]; u_int64_t last_accessed_block[2]; } LD_LOAD_BALANCE_INFO, *PLD_LOAD_BALANCE_INFO; /* SPAN_SET is info caclulated from span info from Raid map per ld */ typedef struct _LD_SPAN_SET { u_int64_t log_start_lba; u_int64_t log_end_lba; u_int64_t span_row_start; u_int64_t span_row_end; u_int64_t data_strip_start; u_int64_t data_strip_end; u_int64_t data_row_start; u_int64_t data_row_end; u_int8_t strip_offset[MAX_SPAN_DEPTH]; u_int32_t span_row_data_width; u_int32_t diff; u_int32_t reserved[2]; }LD_SPAN_SET, *PLD_SPAN_SET; typedef struct LOG_BLOCK_SPAN_INFO { LD_SPAN_SET span_set[MAX_SPAN_DEPTH]; }LD_SPAN_INFO, *PLD_SPAN_INFO; #pragma pack(1) typedef struct _MR_FW_RAID_MAP_ALL { MR_FW_RAID_MAP raidMap; MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES - 1]; } MR_FW_RAID_MAP_ALL; #pragma pack() struct IO_REQUEST_INFO { u_int64_t ldStartBlock; u_int32_t numBlocks; u_int16_t ldTgtId; u_int8_t isRead; u_int16_t devHandle; u_int64_t pdBlock; u_int8_t fpOkForIo; u_int8_t IoforUnevenSpan; u_int8_t start_span; u_int8_t reserved; u_int64_t start_row; }; typedef struct _MR_LD_TARGET_SYNC { u_int8_t targetId; u_int8_t reserved; u_int16_t seqNum; } MR_LD_TARGET_SYNC; #define IEEE_SGE_FLAGS_ADDR_MASK (0x03) #define IEEE_SGE_FLAGS_SYSTEM_ADDR (0x00) #define IEEE_SGE_FLAGS_IOCDDR_ADDR (0x01) #define IEEE_SGE_FLAGS_IOCPLB_ADDR (0x02) #define IEEE_SGE_FLAGS_IOCPLBNTA_ADDR (0x03) #define IEEE_SGE_FLAGS_CHAIN_ELEMENT (0x80) #define IEEE_SGE_FLAGS_END_OF_LIST (0x40) union desc_value { u_int64_t word; struct { u_int32_t low; u_int32_t high; } u; }; /******************************************************************* * Temporary command ********************************************************************/ struct mrsas_tmp_dcmd { bus_dma_tag_t tmp_dcmd_tag; // tag for tmp DMCD cmd bus_dmamap_t tmp_dcmd_dmamap; // dmamap for tmp DCMD cmd void *tmp_dcmd_mem; // virtual addr of tmp DCMD cmd bus_addr_t tmp_dcmd_phys_addr; //physical addr of tmp DCMD }; /******************************************************************* * Register set, included legacy controllers 1068 and 1078, * structure extended for 1078 registers ********************************************************************/ #pragma pack(1) typedef struct _mrsas_register_set { u_int32_t doorbell; /*0000h*/ u_int32_t fusion_seq_offset; /*0004h*/ u_int32_t fusion_host_diag; /*0008h*/ u_int32_t reserved_01; /*000Ch*/ u_int32_t inbound_msg_0; /*0010h*/ u_int32_t inbound_msg_1; /*0014h*/ u_int32_t outbound_msg_0; /*0018h*/ u_int32_t outbound_msg_1; /*001Ch*/ u_int32_t inbound_doorbell; /*0020h*/ u_int32_t inbound_intr_status; /*0024h*/ u_int32_t inbound_intr_mask; /*0028h*/ u_int32_t outbound_doorbell; /*002Ch*/ u_int32_t outbound_intr_status; /*0030h*/ u_int32_t outbound_intr_mask; /*0034h*/ u_int32_t reserved_1[2]; /*0038h*/ u_int32_t inbound_queue_port; /*0040h*/ u_int32_t outbound_queue_port; /*0044h*/ u_int32_t reserved_2[9]; /*0048h*/ u_int32_t reply_post_host_index; /*006Ch*/ u_int32_t reserved_2_2[12]; /*0070h*/ u_int32_t outbound_doorbell_clear; /*00A0h*/ u_int32_t reserved_3[3]; /*00A4h*/ u_int32_t outbound_scratch_pad ; /*00B0h*/ u_int32_t outbound_scratch_pad_2; /*00B4h*/ u_int32_t reserved_4[2]; /*00B8h*/ u_int32_t inbound_low_queue_port ; /*00C0h*/ u_int32_t inbound_high_queue_port ; /*00C4h*/ u_int32_t reserved_5; /*00C8h*/ u_int32_t res_6[11]; /*CCh*/ u_int32_t host_diag; u_int32_t seq_offset; u_int32_t index_registers[807]; /*00CCh*/ } mrsas_reg_set; #pragma pack() /******************************************************************* * Firmware Interface Defines ******************************************************************* * MFI stands for MegaRAID SAS FW Interface. This is just a moniker * for protocol between the software and firmware. Commands are * issued using "message frames". ******************************************************************/ /* * FW posts its state in upper 4 bits of outbound_msg_0 register */ #define MFI_STATE_MASK 0xF0000000 #define MFI_STATE_UNDEFINED 0x00000000 #define MFI_STATE_BB_INIT 0x10000000 #define MFI_STATE_FW_INIT 0x40000000 #define MFI_STATE_WAIT_HANDSHAKE 0x60000000 #define MFI_STATE_FW_INIT_2 0x70000000 #define MFI_STATE_DEVICE_SCAN 0x80000000 #define MFI_STATE_BOOT_MESSAGE_PENDING 0x90000000 #define MFI_STATE_FLUSH_CACHE 0xA0000000 #define MFI_STATE_READY 0xB0000000 #define MFI_STATE_OPERATIONAL 0xC0000000 #define MFI_STATE_FAULT 0xF0000000 #define MFI_RESET_REQUIRED 0x00000001 #define MFI_RESET_ADAPTER 0x00000002 #define MEGAMFI_FRAME_SIZE 64 #define MRSAS_MFI_FRAME_SIZE 1024 #define MRSAS_MFI_SENSE_SIZE 128 /* * During FW init, clear pending cmds & reset state using inbound_msg_0 * * ABORT : Abort all pending cmds * READY : Move from OPERATIONAL to READY state; discard queue info * MFIMODE : Discard (possible) low MFA posted in 64-bit mode (??) * CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver * HOTPLUG : Resume from Hotplug * MFI_STOP_ADP : Send signal to FW to stop processing */ #define WRITE_SEQUENCE_OFFSET (0x0000000FC) // I20 #define HOST_DIAGNOSTIC_OFFSET (0x000000F8) // I20 #define DIAG_WRITE_ENABLE (0x00000080) #define DIAG_RESET_ADAPTER (0x00000004) #define MFI_ADP_RESET 0x00000040 #define MFI_INIT_ABORT 0x00000001 #define MFI_INIT_READY 0x00000002 #define MFI_INIT_MFIMODE 0x00000004 #define MFI_INIT_CLEAR_HANDSHAKE 0x00000008 #define MFI_INIT_HOTPLUG 0x00000010 #define MFI_STOP_ADP 0x00000020 #define MFI_RESET_FLAGS MFI_INIT_READY| \ MFI_INIT_MFIMODE| \ MFI_INIT_ABORT /* * MFI frame flags */ #define MFI_FRAME_POST_IN_REPLY_QUEUE 0x0000 #define MFI_FRAME_DONT_POST_IN_REPLY_QUEUE 0x0001 #define MFI_FRAME_SGL32 0x0000 #define MFI_FRAME_SGL64 0x0002 #define MFI_FRAME_SENSE32 0x0000 #define MFI_FRAME_SENSE64 0x0004 #define MFI_FRAME_DIR_NONE 0x0000 #define MFI_FRAME_DIR_WRITE 0x0008 #define MFI_FRAME_DIR_READ 0x0010 #define MFI_FRAME_DIR_BOTH 0x0018 #define MFI_FRAME_IEEE 0x0020 /* * Definition for cmd_status */ #define MFI_CMD_STATUS_POLL_MODE 0xFF /* * MFI command opcodes */ #define MFI_CMD_INIT 0x00 #define MFI_CMD_LD_READ 0x01 #define MFI_CMD_LD_WRITE 0x02 #define MFI_CMD_LD_SCSI_IO 0x03 #define MFI_CMD_PD_SCSI_IO 0x04 #define MFI_CMD_DCMD 0x05 #define MFI_CMD_ABORT 0x06 #define MFI_CMD_SMP 0x07 #define MFI_CMD_STP 0x08 #define MFI_CMD_INVALID 0xff #define MR_DCMD_CTRL_GET_INFO 0x01010000 #define MR_DCMD_LD_GET_LIST 0x03010000 #define MR_DCMD_CTRL_CACHE_FLUSH 0x01101000 #define MR_FLUSH_CTRL_CACHE 0x01 #define MR_FLUSH_DISK_CACHE 0x02 #define MR_DCMD_CTRL_SHUTDOWN 0x01050000 #define MR_DCMD_HIBERNATE_SHUTDOWN 0x01060000 #define MR_ENABLE_DRIVE_SPINDOWN 0x01 #define MR_DCMD_CTRL_EVENT_GET_INFO 0x01040100 #define MR_DCMD_CTRL_EVENT_GET 0x01040300 #define MR_DCMD_CTRL_EVENT_WAIT 0x01040500 #define MR_DCMD_LD_GET_PROPERTIES 0x03030000 #define MR_DCMD_CLUSTER 0x08000000 #define MR_DCMD_CLUSTER_RESET_ALL 0x08010100 #define MR_DCMD_CLUSTER_RESET_LD 0x08010200 #define MR_DCMD_PD_LIST_QUERY 0x02010100 #define MR_DCMD_CTRL_MISC_CPX 0x0100e200 #define MR_DCMD_CTRL_MISC_CPX_INIT_DATA_GET 0x0100e201 #define MR_DCMD_CTRL_MISC_CPX_QUEUE_DATA 0x0100e202 #define MR_DCMD_CTRL_MISC_CPX_UNREGISTER 0x0100e203 #define MAX_MR_ROW_SIZE 32 #define MR_CPX_DIR_WRITE 1 #define MR_CPX_DIR_READ 0 #define MR_CPX_VERSION 1 #define MR_DCMD_CTRL_IO_METRICS_GET 0x01170200 // get IO metrics #define MR_EVT_CFG_CLEARED 0x0004 #define MR_EVT_LD_STATE_CHANGE 0x0051 #define MR_EVT_PD_INSERTED 0x005b #define MR_EVT_PD_REMOVED 0x0070 #define MR_EVT_LD_CREATED 0x008a #define MR_EVT_LD_DELETED 0x008b #define MR_EVT_FOREIGN_CFG_IMPORTED 0x00db #define MR_EVT_LD_OFFLINE 0x00fc #define MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED 0x0152 #define MR_EVT_CTRL_PERF_COLLECTION 0x017e /* * MFI command completion codes */ enum MFI_STAT { MFI_STAT_OK = 0x00, MFI_STAT_INVALID_CMD = 0x01, MFI_STAT_INVALID_DCMD = 0x02, MFI_STAT_INVALID_PARAMETER = 0x03, MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04, MFI_STAT_ABORT_NOT_POSSIBLE = 0x05, MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06, MFI_STAT_APP_IN_USE = 0x07, MFI_STAT_APP_NOT_INITIALIZED = 0x08, MFI_STAT_ARRAY_INDEX_INVALID = 0x09, MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a, MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b, MFI_STAT_DEVICE_NOT_FOUND = 0x0c, MFI_STAT_DRIVE_TOO_SMALL = 0x0d, MFI_STAT_FLASH_ALLOC_FAIL = 0x0e, MFI_STAT_FLASH_BUSY = 0x0f, MFI_STAT_FLASH_ERROR = 0x10, MFI_STAT_FLASH_IMAGE_BAD = 0x11, MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12, MFI_STAT_FLASH_NOT_OPEN = 0x13, MFI_STAT_FLASH_NOT_STARTED = 0x14, MFI_STAT_FLUSH_FAILED = 0x15, MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16, MFI_STAT_LD_CC_IN_PROGRESS = 0x17, MFI_STAT_LD_INIT_IN_PROGRESS = 0x18, MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19, MFI_STAT_LD_MAX_CONFIGURED = 0x1a, MFI_STAT_LD_NOT_OPTIMAL = 0x1b, MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c, MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d, MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e, MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f, MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20, MFI_STAT_MFC_HW_ERROR = 0x21, MFI_STAT_NO_HW_PRESENT = 0x22, MFI_STAT_NOT_FOUND = 0x23, MFI_STAT_NOT_IN_ENCL = 0x24, MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25, MFI_STAT_PD_TYPE_WRONG = 0x26, MFI_STAT_PR_DISABLED = 0x27, MFI_STAT_ROW_INDEX_INVALID = 0x28, MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29, MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a, MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b, MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c, MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d, MFI_STAT_SCSI_IO_FAILED = 0x2e, MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f, MFI_STAT_SHUTDOWN_FAILED = 0x30, MFI_STAT_TIME_NOT_SET = 0x31, MFI_STAT_WRONG_STATE = 0x32, MFI_STAT_LD_OFFLINE = 0x33, MFI_STAT_PEER_NOTIFICATION_REJECTED = 0x34, MFI_STAT_PEER_NOTIFICATION_FAILED = 0x35, MFI_STAT_RESERVATION_IN_PROGRESS = 0x36, MFI_STAT_I2C_ERRORS_DETECTED = 0x37, MFI_STAT_PCI_ERRORS_DETECTED = 0x38, MFI_STAT_CONFIG_SEQ_MISMATCH = 0x67, MFI_STAT_INVALID_STATUS = 0xFF }; /* * Number of mailbox bytes in DCMD message frame */ #define MFI_MBOX_SIZE 12 enum MR_EVT_CLASS { MR_EVT_CLASS_DEBUG = -2, MR_EVT_CLASS_PROGRESS = -1, MR_EVT_CLASS_INFO = 0, MR_EVT_CLASS_WARNING = 1, MR_EVT_CLASS_CRITICAL = 2, MR_EVT_CLASS_FATAL = 3, MR_EVT_CLASS_DEAD = 4, }; enum MR_EVT_LOCALE { MR_EVT_LOCALE_LD = 0x0001, MR_EVT_LOCALE_PD = 0x0002, MR_EVT_LOCALE_ENCL = 0x0004, MR_EVT_LOCALE_BBU = 0x0008, MR_EVT_LOCALE_SAS = 0x0010, MR_EVT_LOCALE_CTRL = 0x0020, MR_EVT_LOCALE_CONFIG = 0x0040, MR_EVT_LOCALE_CLUSTER = 0x0080, MR_EVT_LOCALE_ALL = 0xffff, }; enum MR_EVT_ARGS { MR_EVT_ARGS_NONE, MR_EVT_ARGS_CDB_SENSE, MR_EVT_ARGS_LD, MR_EVT_ARGS_LD_COUNT, MR_EVT_ARGS_LD_LBA, MR_EVT_ARGS_LD_OWNER, MR_EVT_ARGS_LD_LBA_PD_LBA, MR_EVT_ARGS_LD_PROG, MR_EVT_ARGS_LD_STATE, MR_EVT_ARGS_LD_STRIP, MR_EVT_ARGS_PD, MR_EVT_ARGS_PD_ERR, MR_EVT_ARGS_PD_LBA, MR_EVT_ARGS_PD_LBA_LD, MR_EVT_ARGS_PD_PROG, MR_EVT_ARGS_PD_STATE, MR_EVT_ARGS_PCI, MR_EVT_ARGS_RATE, MR_EVT_ARGS_STR, MR_EVT_ARGS_TIME, MR_EVT_ARGS_ECC, MR_EVT_ARGS_LD_PROP, MR_EVT_ARGS_PD_SPARE, MR_EVT_ARGS_PD_INDEX, MR_EVT_ARGS_DIAG_PASS, MR_EVT_ARGS_DIAG_FAIL, MR_EVT_ARGS_PD_LBA_LBA, MR_EVT_ARGS_PORT_PHY, MR_EVT_ARGS_PD_MISSING, MR_EVT_ARGS_PD_ADDRESS, MR_EVT_ARGS_BITMAP, MR_EVT_ARGS_CONNECTOR, MR_EVT_ARGS_PD_PD, MR_EVT_ARGS_PD_FRU, MR_EVT_ARGS_PD_PATHINFO, MR_EVT_ARGS_PD_POWER_STATE, MR_EVT_ARGS_GENERIC, }; /* * Thunderbolt (and later) Defines */ #define MRSAS_MAX_SZ_CHAIN_FRAME 1024 #define MFI_FUSION_ENABLE_INTERRUPT_MASK (0x00000009) #define MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE 256 #define MRSAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST 0xF0 #define MRSAS_MPI2_FUNCTION_LD_IO_REQUEST 0xF1 #define MRSAS_LOAD_BALANCE_FLAG 0x1 #define MRSAS_DCMD_MBOX_PEND_FLAG 0x1 #define HOST_DIAG_WRITE_ENABLE 0x80 #define HOST_DIAG_RESET_ADAPTER 0x4 #define MRSAS_TBOLT_MAX_RESET_TRIES 3 #define MRSAS_MAX_MFI_CMDS 32 /* * Invader Defines */ #define MPI2_TYPE_CUDA 0x2 #define MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH 0x4000 #define MR_RL_FLAGS_GRANT_DESTINATION_CPU0 0x00 #define MR_RL_FLAGS_GRANT_DESTINATION_CPU1 0x10 #define MR_RL_FLAGS_GRANT_DESTINATION_CUDA 0x80 #define MR_RL_FLAGS_SEQ_NUM_ENABLE 0x8 /* * T10 PI defines */ #define MR_PROT_INFO_TYPE_CONTROLLER 0x8 #define MRSAS_SCSI_VARIABLE_LENGTH_CMD 0x7f #define MRSAS_SCSI_SERVICE_ACTION_READ32 0x9 #define MRSAS_SCSI_SERVICE_ACTION_WRITE32 0xB #define MRSAS_SCSI_ADDL_CDB_LEN 0x18 #define MRSAS_RD_WR_PROTECT_CHECK_ALL 0x20 #define MRSAS_RD_WR_PROTECT_CHECK_NONE 0x60 #define MRSAS_SCSIBLOCKSIZE 512 /* * Raid context flags */ #define MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT 0x4 #define MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_MASK 0x30 typedef enum MR_RAID_FLAGS_IO_SUB_TYPE { MR_RAID_FLAGS_IO_SUB_TYPE_NONE = 0, MR_RAID_FLAGS_IO_SUB_TYPE_SYSTEM_PD = 1, } MR_RAID_FLAGS_IO_SUB_TYPE; /* * Request descriptor types */ #define MRSAS_REQ_DESCRIPT_FLAGS_LD_IO 0x7 #define MRSAS_REQ_DESCRIPT_FLAGS_MFA 0x1 #define MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK 0x2 #define MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT 1 #define MRSAS_FP_CMD_LEN 16 #define MRSAS_FUSION_IN_RESET 0 #define RAID_CTX_SPANARM_ARM_SHIFT (0) #define RAID_CTX_SPANARM_ARM_MASK (0x1f) #define RAID_CTX_SPANARM_SPAN_SHIFT (5) #define RAID_CTX_SPANARM_SPAN_MASK (0xE0) /* * Define region lock types */ typedef enum _REGION_TYPE { REGION_TYPE_UNUSED = 0, // lock is currently not active REGION_TYPE_SHARED_READ = 1, // shared lock (for reads) REGION_TYPE_SHARED_WRITE = 2, REGION_TYPE_EXCLUSIVE = 3, // exclusive lock (for writes) } REGION_TYPE; /* * MR private defines */ #define MR_PD_INVALID 0xFFFF #define MAX_SPAN_DEPTH 8 #define MAX_RAIDMAP_SPAN_DEPTH (MAX_SPAN_DEPTH) #define MAX_ROW_SIZE 32 #define MAX_RAIDMAP_ROW_SIZE (MAX_ROW_SIZE) #define MAX_LOGICAL_DRIVES 64 #define MAX_RAIDMAP_LOGICAL_DRIVES (MAX_LOGICAL_DRIVES) #define MAX_RAIDMAP_VIEWS (MAX_LOGICAL_DRIVES) #define MAX_ARRAYS 128 #define MAX_RAIDMAP_ARRAYS (MAX_ARRAYS) #define MAX_PHYSICAL_DEVICES 256 #define MAX_RAIDMAP_PHYSICAL_DEVICES (MAX_PHYSICAL_DEVICES) #define MR_DCMD_LD_MAP_GET_INFO 0x0300e101 /* * SCSI-CAM Related Defines */ #define MRSAS_SCSI_MAX_LUNS 0 //zero for now #define MRSAS_SCSI_INITIATOR_ID 255 #define MRSAS_SCSI_MAX_CMDS 8 #define MRSAS_SCSI_MAX_CDB_LEN 16 #define MRSAS_SCSI_SENSE_BUFFERSIZE 96 #define MRSAS_MAX_SGL 70 #define MRSAS_MAX_IO_SIZE (256 * 1024) #define MRSAS_INTERNAL_CMDS 32 /* Request types */ #define MRSAS_REQ_TYPE_INTERNAL_CMD 0x0 #define MRSAS_REQ_TYPE_AEN_FETCH 0x1 #define MRSAS_REQ_TYPE_PASSTHRU 0x2 #define MRSAS_REQ_TYPE_GETSET_PARAM 0x3 #define MRSAS_REQ_TYPE_SCSI_IO 0x4 /* Request states */ #define MRSAS_REQ_STATE_FREE 0 #define MRSAS_REQ_STATE_BUSY 1 #define MRSAS_REQ_STATE_TRAN 2 #define MRSAS_REQ_STATE_COMPLETE 3 enum mrsas_req_flags { MRSAS_DIR_UNKNOWN = 0x1, MRSAS_DIR_IN = 0x2, MRSAS_DIR_OUT = 0x4, MRSAS_DIR_NONE = 0x8, }; /* * Adapter Reset States */ enum { MRSAS_HBA_OPERATIONAL = 0, MRSAS_ADPRESET_SM_INFAULT = 1, MRSAS_ADPRESET_SM_FW_RESET_SUCCESS = 2, MRSAS_ADPRESET_SM_OPERATIONAL = 3, MRSAS_HW_CRITICAL_ERROR = 4, MRSAS_ADPRESET_INPROG_SIGN = 0xDEADDEAD, }; /* * MPT Command Structure */ struct mrsas_mpt_cmd { MRSAS_RAID_SCSI_IO_REQUEST *io_request; bus_addr_t io_request_phys_addr; MPI2_SGE_IO_UNION *chain_frame; bus_addr_t chain_frame_phys_addr; u_int32_t sge_count; u_int8_t *sense; bus_addr_t sense_phys_addr; u_int8_t retry_for_fw_reset; MRSAS_REQUEST_DESCRIPTOR_UNION *request_desc; u_int32_t sync_cmd_idx; //For getting MFI cmd from list when complete u_int32_t index; u_int8_t flags; u_int8_t load_balance; bus_size_t length; // request length u_int32_t error_code; // error during request dmamap load bus_dmamap_t data_dmamap; void *data; union ccb *ccb_ptr; // pointer to ccb struct callout cm_callout; struct mrsas_softc *sc; TAILQ_ENTRY(mrsas_mpt_cmd) next; }; /* * MFI Command Structure */ struct mrsas_mfi_cmd { union mrsas_frame *frame; bus_dmamap_t frame_dmamap; // mfi frame dmamap void *frame_mem; // mfi frame virtual addr bus_addr_t frame_phys_addr; // mfi frame physical addr u_int8_t *sense; bus_dmamap_t sense_dmamap; // mfi sense dmamap void *sense_mem; // mfi sense virtual addr bus_addr_t sense_phys_addr; u_int32_t index; u_int8_t sync_cmd; u_int8_t cmd_status; u_int8_t abort_aen; u_int8_t retry_for_fw_reset; struct mrsas_softc *sc; union ccb *ccb_ptr; union { struct { u_int16_t smid; u_int16_t resvd; } context; u_int32_t frame_count; } cmd_id; TAILQ_ENTRY(mrsas_mfi_cmd) next; }; /* * define constants for device list query options */ enum MR_PD_QUERY_TYPE { MR_PD_QUERY_TYPE_ALL = 0, MR_PD_QUERY_TYPE_STATE = 1, MR_PD_QUERY_TYPE_POWER_STATE = 2, MR_PD_QUERY_TYPE_MEDIA_TYPE = 3, MR_PD_QUERY_TYPE_SPEED = 4, MR_PD_QUERY_TYPE_EXPOSED_TO_HOST = 5, }; #define MR_EVT_CFG_CLEARED 0x0004 #define MR_EVT_LD_STATE_CHANGE 0x0051 #define MR_EVT_PD_INSERTED 0x005b #define MR_EVT_PD_REMOVED 0x0070 #define MR_EVT_LD_CREATED 0x008a #define MR_EVT_LD_DELETED 0x008b #define MR_EVT_FOREIGN_CFG_IMPORTED 0x00db #define MR_EVT_LD_OFFLINE 0x00fc #define MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED 0x0152 enum MR_PD_STATE { MR_PD_STATE_UNCONFIGURED_GOOD = 0x00, MR_PD_STATE_UNCONFIGURED_BAD = 0x01, MR_PD_STATE_HOT_SPARE = 0x02, MR_PD_STATE_OFFLINE = 0x10, MR_PD_STATE_FAILED = 0x11, MR_PD_STATE_REBUILD = 0x14, MR_PD_STATE_ONLINE = 0x18, MR_PD_STATE_COPYBACK = 0x20, MR_PD_STATE_SYSTEM = 0x40, }; /* * defines the physical drive address structure */ #pragma pack(1) struct MR_PD_ADDRESS { u_int16_t deviceId; u_int16_t enclDeviceId; union { struct { u_int8_t enclIndex; u_int8_t slotNumber; } mrPdAddress; struct { u_int8_t enclPosition; u_int8_t enclConnectorIndex; } mrEnclAddress; } u1; u_int8_t scsiDevType; union { u_int8_t connectedPortBitmap; u_int8_t connectedPortNumbers; } u2; u_int64_t sasAddr[2]; }; #pragma pack() /* * defines the physical drive list structure */ #pragma pack(1) struct MR_PD_LIST { u_int32_t size; u_int32_t count; struct MR_PD_ADDRESS addr[1]; }; #pragma pack() #pragma pack(1) struct mrsas_pd_list { u_int16_t tid; u_int8_t driveType; u_int8_t driveState; }; #pragma pack() /* * defines the logical drive reference structure */ typedef union _MR_LD_REF { // LD reference structure struct { u_int8_t targetId; // LD target id (0 to MAX_TARGET_ID) u_int8_t reserved; // reserved to make in line with MR_PD_REF u_int16_t seqNum; // Sequence Number } ld_context; u_int32_t ref; // shorthand reference to full 32-bits } MR_LD_REF; // 4 bytes /* * defines the logical drive list structure */ #pragma pack(1) struct MR_LD_LIST { u_int32_t ldCount; // number of LDs u_int32_t reserved; // pad to 8-byte boundary struct { MR_LD_REF ref; // LD reference u_int8_t state; // current LD state (MR_LD_STATE) u_int8_t reserved[3]; // pad to 8-byte boundary u_int64_t size; // LD size } ldList[MAX_LOGICAL_DRIVES]; }; #pragma pack() /* * SAS controller properties */ #pragma pack(1) struct mrsas_ctrl_prop { u_int16_t seq_num; u_int16_t pred_fail_poll_interval; u_int16_t intr_throttle_count; u_int16_t intr_throttle_timeouts; u_int8_t rebuild_rate; u_int8_t patrol_read_rate; u_int8_t bgi_rate; u_int8_t cc_rate; u_int8_t recon_rate; u_int8_t cache_flush_interval; u_int8_t spinup_drv_count; u_int8_t spinup_delay; u_int8_t cluster_enable; u_int8_t coercion_mode; u_int8_t alarm_enable; u_int8_t disable_auto_rebuild; u_int8_t disable_battery_warn; u_int8_t ecc_bucket_size; u_int16_t ecc_bucket_leak_rate; u_int8_t restore_hotspare_on_insertion; u_int8_t expose_encl_devices; u_int8_t maintainPdFailHistory; u_int8_t disallowHostRequestReordering; u_int8_t abortCCOnError; // set TRUE to abort CC on detecting an inconsistency u_int8_t loadBalanceMode; // load balance mode (MR_LOAD_BALANCE_MODE) u_int8_t disableAutoDetectBackplane; // 0 - use auto detect logic of backplanes // like SGPIO, i2c SEP using h/w mechansim // like GPIO pins. // 1 - disable auto detect SGPIO, // 2 - disable i2c SEP auto detect // 3 - disable both auto detect u_int8_t snapVDSpace; // % of source LD to be reserved for a VDs snapshot in // snapshot repository, for metadata and user data. // 1=5%, 2=10%, 3=15% and so on. /* * Add properties that can be controlled by a bit in the following structure. */ struct { u_int32_t copyBackDisabled : 1; // set TRUE to disable copyBack // (0=copback enabled) u_int32_t SMARTerEnabled : 1; u_int32_t prCorrectUnconfiguredAreas : 1; u_int32_t useFdeOnly : 1; u_int32_t disableNCQ : 1; u_int32_t SSDSMARTerEnabled : 1; u_int32_t SSDPatrolReadEnabled : 1; u_int32_t enableSpinDownUnconfigured : 1; u_int32_t autoEnhancedImport : 1; u_int32_t enableSecretKeyControl : 1; u_int32_t disableOnlineCtrlReset : 1; u_int32_t allowBootWithPinnedCache : 1; u_int32_t disableSpinDownHS : 1; u_int32_t enableJBOD : 1; u_int32_t reserved :18; } OnOffProperties; u_int8_t autoSnapVDSpace; // % of source LD to be reserved for auto // snapshot in snapshot repository, for // metadata and user data. // 1=5%, 2=10%, 3=15% and so on. u_int8_t viewSpace; // snapshot writeable VIEWs capacity as a % // of source LD capacity. 0=READ only. // 1=5%, 2=10%, 3=15% and so on u_int16_t spinDownTime; // # of idle minutes before device is spun // down (0=use FW defaults). u_int8_t reserved[24]; }; #pragma pack() /* * SAS controller information */ //#pragma pack(1) struct mrsas_ctrl_info { /* * PCI device information */ struct { u_int16_t vendor_id; u_int16_t device_id; u_int16_t sub_vendor_id; u_int16_t sub_device_id; u_int8_t reserved[24]; } __packed pci; /* * Host interface information */ struct { u_int8_t PCIX:1; u_int8_t PCIE:1; u_int8_t iSCSI:1; u_int8_t SAS_3G:1; u_int8_t reserved_0:4; u_int8_t reserved_1[6]; u_int8_t port_count; u_int64_t port_addr[8]; } __packed host_interface; /* * Device (backend) interface information */ struct { u_int8_t SPI:1; u_int8_t SAS_3G:1; u_int8_t SATA_1_5G:1; u_int8_t SATA_3G:1; u_int8_t reserved_0:4; u_int8_t reserved_1[6]; u_int8_t port_count; u_int64_t port_addr[8]; } __packed device_interface; /* * List of components residing in flash. All str are null terminated */ u_int32_t image_check_word; u_int32_t image_component_count; struct { char name[8]; char version[32]; char build_date[16]; char built_time[16]; } __packed image_component[8]; /* * List of flash components that have been flashed on the card, but * are not in use, pending reset of the adapter. This list will be * empty if a flash operation has not occurred. All stings are null * terminated */ u_int32_t pending_image_component_count; struct { char name[8]; char version[32]; char build_date[16]; char build_time[16]; } __packed pending_image_component[8]; u_int8_t max_arms; u_int8_t max_spans; u_int8_t max_arrays; u_int8_t max_lds; char product_name[80]; char serial_no[32]; /* * Other physical/controller/operation information. Indicates the * presence of the hardware */ struct { u_int32_t bbu:1; u_int32_t alarm:1; u_int32_t nvram:1; u_int32_t uart:1; u_int32_t reserved:28; } __packed hw_present; u_int32_t current_fw_time; /* * Maximum data transfer sizes */ u_int16_t max_concurrent_cmds; u_int16_t max_sge_count; u_int32_t max_request_size; /* * Logical and physical device counts */ u_int16_t ld_present_count; u_int16_t ld_degraded_count; u_int16_t ld_offline_count; u_int16_t pd_present_count; u_int16_t pd_disk_present_count; u_int16_t pd_disk_pred_failure_count; u_int16_t pd_disk_failed_count; /* * Memory size information */ u_int16_t nvram_size; u_int16_t memory_size; u_int16_t flash_size; /* * Error counters */ u_int16_t mem_correctable_error_count; u_int16_t mem_uncorrectable_error_count; /* * Cluster information */ u_int8_t cluster_permitted; u_int8_t cluster_active; /* * Additional max data transfer sizes */ u_int16_t max_strips_per_io; /* * Controller capabilities structures */ struct { u_int32_t raid_level_0:1; u_int32_t raid_level_1:1; u_int32_t raid_level_5:1; u_int32_t raid_level_1E:1; u_int32_t raid_level_6:1; u_int32_t reserved:27; } __packed raid_levels; struct { u_int32_t rbld_rate:1; u_int32_t cc_rate:1; u_int32_t bgi_rate:1; u_int32_t recon_rate:1; u_int32_t patrol_rate:1; u_int32_t alarm_control:1; u_int32_t cluster_supported:1; u_int32_t bbu:1; u_int32_t spanning_allowed:1; u_int32_t dedicated_hotspares:1; u_int32_t revertible_hotspares:1; u_int32_t foreign_config_import:1; u_int32_t self_diagnostic:1; u_int32_t mixed_redundancy_arr:1; u_int32_t global_hot_spares:1; u_int32_t reserved:17; } __packed adapter_operations; struct { u_int32_t read_policy:1; u_int32_t write_policy:1; u_int32_t io_policy:1; u_int32_t access_policy:1; u_int32_t disk_cache_policy:1; u_int32_t reserved:27; } __packed ld_operations; struct { u_int8_t min; u_int8_t max; u_int8_t reserved[2]; } __packed stripe_sz_ops; struct { u_int32_t force_online:1; u_int32_t force_offline:1; u_int32_t force_rebuild:1; u_int32_t reserved:29; } __packed pd_operations; struct { u_int32_t ctrl_supports_sas:1; u_int32_t ctrl_supports_sata:1; u_int32_t allow_mix_in_encl:1; u_int32_t allow_mix_in_ld:1; u_int32_t allow_sata_in_cluster:1; u_int32_t reserved:27; } __packed pd_mix_support; /* * Define ECC single-bit-error bucket information */ u_int8_t ecc_bucket_count; u_int8_t reserved_2[11]; /* * Include the controller properties (changeable items) */ struct mrsas_ctrl_prop properties; /* * Define FW pkg version (set in envt v'bles on OEM basis) */ char package_version[0x60]; /* * If adapterOperations.supportMoreThan8Phys is set, and deviceInterface.portCount is greater than 8, * SAS Addrs for first 8 ports shall be populated in deviceInterface.portAddr, and the rest shall be * populated in deviceInterfacePortAddr2. */ u_int64_t deviceInterfacePortAddr2[8]; //0x6a0 u_int8_t reserved3[128]; //0x6e0 struct { //0x760 u_int16_t minPdRaidLevel_0 : 4; u_int16_t maxPdRaidLevel_0 : 12; u_int16_t minPdRaidLevel_1 : 4; u_int16_t maxPdRaidLevel_1 : 12; u_int16_t minPdRaidLevel_5 : 4; u_int16_t maxPdRaidLevel_5 : 12; u_int16_t minPdRaidLevel_1E : 4; u_int16_t maxPdRaidLevel_1E : 12; u_int16_t minPdRaidLevel_6 : 4; u_int16_t maxPdRaidLevel_6 : 12; u_int16_t minPdRaidLevel_10 : 4; u_int16_t maxPdRaidLevel_10 : 12; u_int16_t minPdRaidLevel_50 : 4; u_int16_t maxPdRaidLevel_50 : 12; u_int16_t minPdRaidLevel_60 : 4; u_int16_t maxPdRaidLevel_60 : 12; u_int16_t minPdRaidLevel_1E_RLQ0 : 4; u_int16_t maxPdRaidLevel_1E_RLQ0 : 12; u_int16_t minPdRaidLevel_1E0_RLQ0 : 4; u_int16_t maxPdRaidLevel_1E0_RLQ0 : 12; u_int16_t reserved[6]; } pdsForRaidLevels; u_int16_t maxPds; //0x780 u_int16_t maxDedHSPs; //0x782 u_int16_t maxGlobalHSPs; //0x784 u_int16_t ddfSize; //0x786 u_int8_t maxLdsPerArray; //0x788 u_int8_t partitionsInDDF; //0x789 u_int8_t lockKeyBinding; //0x78a u_int8_t maxPITsPerLd; //0x78b u_int8_t maxViewsPerLd; //0x78c u_int8_t maxTargetId; //0x78d u_int16_t maxBvlVdSize; //0x78e u_int16_t maxConfigurableSSCSize; //0x790 u_int16_t currentSSCsize; //0x792 char expanderFwVersion[12]; //0x794 u_int16_t PFKTrialTimeRemaining; //0x7A0 u_int16_t cacheMemorySize; //0x7A2 struct { //0x7A4 u_int32_t supportPIcontroller :1; u_int32_t supportLdPIType1 :1; u_int32_t supportLdPIType2 :1; u_int32_t supportLdPIType3 :1; u_int32_t supportLdBBMInfo :1; u_int32_t supportShieldState :1; u_int32_t blockSSDWriteCacheChange :1; u_int32_t supportSuspendResumeBGops :1; u_int32_t supportEmergencySpares :1; u_int32_t supportSetLinkSpeed :1; u_int32_t supportBootTimePFKChange :1; u_int32_t supportJBOD :1; u_int32_t disableOnlinePFKChange :1; u_int32_t supportPerfTuning :1; u_int32_t supportSSDPatrolRead :1; u_int32_t realTimeScheduler :1; u_int32_t supportResetNow :1; u_int32_t supportEmulatedDrives :1; u_int32_t headlessMode :1; u_int32_t dedicatedHotSparesLimited :1; u_int32_t supportUnevenSpans :1; u_int32_t reserved :11; } adapterOperations2; u_int8_t driverVersion[32]; //0x7A8 u_int8_t maxDAPdCountSpinup60; //0x7C8 u_int8_t temperatureROC; //0x7C9 u_int8_t temperatureCtrl; //0x7CA u_int8_t reserved4; //0x7CB u_int16_t maxConfigurablePds; //0x7CC u_int8_t reserved5[2]; //0x7CD reserved for future use /* * HA cluster information */ struct { u_int32_t peerIsPresent :1; u_int32_t peerIsIncompatible :1; u_int32_t hwIncompatible :1; u_int32_t fwVersionMismatch :1; u_int32_t ctrlPropIncompatible :1; u_int32_t premiumFeatureMismatch :1; u_int32_t reserved :26; } cluster; char clusterId[16]; //0x7D4 u_int8_t pad[0x800-0x7E4]; //0x7E4 } __packed; /* * Ld and PD Max Support Defines */ #define MRSAS_MAX_PD 256 #define MRSAS_MAX_LD 64 /* * When SCSI mid-layer calls driver's reset routine, driver waits for * MRSAS_RESET_WAIT_TIME seconds for all outstanding IO to complete. Note * that the driver cannot _actually_ abort or reset pending commands. While * it is waiting for the commands to complete, it prints a diagnostic message * every MRSAS_RESET_NOTICE_INTERVAL seconds */ #define MRSAS_RESET_WAIT_TIME 180 #define MRSAS_INTERNAL_CMD_WAIT_TIME 180 #define MRSAS_IOC_INIT_WAIT_TIME 60 #define MRSAS_RESET_NOTICE_INTERVAL 5 #define MRSAS_IOCTL_CMD 0 #define MRSAS_DEFAULT_CMD_TIMEOUT 90 #define MRSAS_THROTTLE_QUEUE_DEPTH 16 /* * FW reports the maximum of number of commands that it can accept (maximum * commands that can be outstanding) at any time. The driver must report a * lower number to the mid layer because it can issue a few internal commands * itself (E.g, AEN, abort cmd, IOCTLs etc). The number of commands it needs * is shown below */ #define MRSAS_INT_CMDS 32 #define MRSAS_SKINNY_INT_CMDS 5 #define MRSAS_MAX_MSIX_QUEUES 16 /* * FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit * SGLs based on the size of bus_addr_t */ #define IS_DMA64 (sizeof(bus_addr_t) == 8) #define MFI_XSCALE_OMR0_CHANGE_INTERRUPT 0x00000001 // MFI state change interrupt #define MFI_INTR_FLAG_REPLY_MESSAGE 0x00000001 #define MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE 0x00000002 #define MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT 0x00000004 //MFI state change interrupt #define MFI_OB_INTR_STATUS_MASK 0x00000002 #define MFI_POLL_TIMEOUT_SECS 60 #define MFI_REPLY_1078_MESSAGE_INTERRUPT 0x80000000 #define MFI_REPLY_GEN2_MESSAGE_INTERRUPT 0x00000001 #define MFI_GEN2_ENABLE_INTERRUPT_MASK 0x00000001 #define MFI_REPLY_SKINNY_MESSAGE_INTERRUPT 0x40000000 #define MFI_SKINNY_ENABLE_INTERRUPT_MASK (0x00000001) #define MFI_1068_PCSR_OFFSET 0x84 #define MFI_1068_FW_HANDSHAKE_OFFSET 0x64 #define MFI_1068_FW_READY 0xDDDD0000 #pragma pack(1) struct mrsas_sge32 { u_int32_t phys_addr; u_int32_t length; }; #pragma pack() #pragma pack(1) struct mrsas_sge64 { u_int64_t phys_addr; u_int32_t length; }; #pragma pack() #pragma pack() union mrsas_sgl { struct mrsas_sge32 sge32[1]; struct mrsas_sge64 sge64[1]; }; #pragma pack() #pragma pack(1) struct mrsas_header { u_int8_t cmd; /*00e */ u_int8_t sense_len; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t scsi_status; /*03h */ u_int8_t target_id; /*04h */ u_int8_t lun; /*05h */ u_int8_t cdb_len; /*06h */ u_int8_t sge_count; /*07h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t timeout; /*12h */ u_int32_t data_xferlen; /*14h */ }; #pragma pack() #pragma pack(1) struct mrsas_init_frame { u_int8_t cmd; /*00h */ u_int8_t reserved_0; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t reserved_1; /*03h */ u_int32_t reserved_2; /*04h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t reserved_3; /*12h */ u_int32_t data_xfer_len; /*14h */ u_int32_t queue_info_new_phys_addr_lo; /*18h */ u_int32_t queue_info_new_phys_addr_hi; /*1Ch */ u_int32_t queue_info_old_phys_addr_lo; /*20h */ u_int32_t queue_info_old_phys_addr_hi; /*24h */ u_int32_t driver_ver_lo; /*28h */ u_int32_t driver_ver_hi; /*2Ch */ u_int32_t reserved_4[4]; /*30h */ }; #pragma pack() #pragma pack(1) struct mrsas_io_frame { u_int8_t cmd; /*00h */ u_int8_t sense_len; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t scsi_status; /*03h */ u_int8_t target_id; /*04h */ u_int8_t access_byte; /*05h */ u_int8_t reserved_0; /*06h */ u_int8_t sge_count; /*07h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t timeout; /*12h */ u_int32_t lba_count; /*14h */ u_int32_t sense_buf_phys_addr_lo; /*18h */ u_int32_t sense_buf_phys_addr_hi; /*1Ch */ u_int32_t start_lba_lo; /*20h */ u_int32_t start_lba_hi; /*24h */ union mrsas_sgl sgl; /*28h */ }; #pragma pack() #pragma pack(1) struct mrsas_pthru_frame { u_int8_t cmd; /*00h */ u_int8_t sense_len; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t scsi_status; /*03h */ u_int8_t target_id; /*04h */ u_int8_t lun; /*05h */ u_int8_t cdb_len; /*06h */ u_int8_t sge_count; /*07h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t timeout; /*12h */ u_int32_t data_xfer_len; /*14h */ u_int32_t sense_buf_phys_addr_lo; /*18h */ u_int32_t sense_buf_phys_addr_hi; /*1Ch */ u_int8_t cdb[16]; /*20h */ union mrsas_sgl sgl; /*30h */ }; #pragma pack() #pragma pack(1) struct mrsas_dcmd_frame { u_int8_t cmd; /*00h */ u_int8_t reserved_0; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t reserved_1[4]; /*03h */ u_int8_t sge_count; /*07h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t timeout; /*12h */ u_int32_t data_xfer_len; /*14h */ u_int32_t opcode; /*18h */ union { /*1Ch */ u_int8_t b[12]; u_int16_t s[6]; u_int32_t w[3]; } mbox; union mrsas_sgl sgl; /*28h */ }; #pragma pack() #pragma pack(1) struct mrsas_abort_frame { u_int8_t cmd; /*00h */ u_int8_t reserved_0; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t reserved_1; /*03h */ u_int32_t reserved_2; /*04h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t reserved_3; /*12h */ u_int32_t reserved_4; /*14h */ u_int32_t abort_context; /*18h */ u_int32_t pad_1; /*1Ch */ u_int32_t abort_mfi_phys_addr_lo; /*20h */ u_int32_t abort_mfi_phys_addr_hi; /*24h */ u_int32_t reserved_5[6]; /*28h */ }; #pragma pack() #pragma pack(1) struct mrsas_smp_frame { u_int8_t cmd; /*00h */ u_int8_t reserved_1; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t connection_status; /*03h */ u_int8_t reserved_2[3]; /*04h */ u_int8_t sge_count; /*07h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t timeout; /*12h */ u_int32_t data_xfer_len; /*14h */ u_int64_t sas_addr; /*18h */ union { struct mrsas_sge32 sge32[2]; /* [0]: resp [1]: req */ struct mrsas_sge64 sge64[2]; /* [0]: resp [1]: req */ } sgl; }; #pragma pack() #pragma pack(1) struct mrsas_stp_frame { u_int8_t cmd; /*00h */ u_int8_t reserved_1; /*01h */ u_int8_t cmd_status; /*02h */ u_int8_t reserved_2; /*03h */ u_int8_t target_id; /*04h */ u_int8_t reserved_3[2]; /*05h */ u_int8_t sge_count; /*07h */ u_int32_t context; /*08h */ u_int32_t pad_0; /*0Ch */ u_int16_t flags; /*10h */ u_int16_t timeout; /*12h */ u_int32_t data_xfer_len; /*14h */ u_int16_t fis[10]; /*18h */ u_int32_t stp_flags; union { struct mrsas_sge32 sge32[2]; /* [0]: resp [1]: data */ struct mrsas_sge64 sge64[2]; /* [0]: resp [1]: data */ } sgl; }; #pragma pack() union mrsas_frame { struct mrsas_header hdr; struct mrsas_init_frame init; struct mrsas_io_frame io; struct mrsas_pthru_frame pthru; struct mrsas_dcmd_frame dcmd; struct mrsas_abort_frame abort; struct mrsas_smp_frame smp; struct mrsas_stp_frame stp; u_int8_t raw_bytes[64]; }; #pragma pack(1) union mrsas_evt_class_locale { struct { u_int16_t locale; u_int8_t reserved; int8_t class; } __packed members; u_int32_t word; } __packed; #pragma pack() #pragma pack(1) struct mrsas_evt_log_info { u_int32_t newest_seq_num; u_int32_t oldest_seq_num; u_int32_t clear_seq_num; u_int32_t shutdown_seq_num; u_int32_t boot_seq_num; } __packed; #pragma pack() struct mrsas_progress { u_int16_t progress; u_int16_t elapsed_seconds; } __packed; struct mrsas_evtarg_ld { u_int16_t target_id; u_int8_t ld_index; u_int8_t reserved; } __packed; struct mrsas_evtarg_pd { u_int16_t device_id; u_int8_t encl_index; u_int8_t slot_number; } __packed; struct mrsas_evt_detail { u_int32_t seq_num; u_int32_t time_stamp; u_int32_t code; union mrsas_evt_class_locale cl; u_int8_t arg_type; u_int8_t reserved1[15]; union { struct { struct mrsas_evtarg_pd pd; u_int8_t cdb_length; u_int8_t sense_length; u_int8_t reserved[2]; u_int8_t cdb[16]; u_int8_t sense[64]; } __packed cdbSense; struct mrsas_evtarg_ld ld; struct { struct mrsas_evtarg_ld ld; u_int64_t count; } __packed ld_count; struct { u_int64_t lba; struct mrsas_evtarg_ld ld; } __packed ld_lba; struct { struct mrsas_evtarg_ld ld; u_int32_t prevOwner; u_int32_t newOwner; } __packed ld_owner; struct { u_int64_t ld_lba; u_int64_t pd_lba; struct mrsas_evtarg_ld ld; struct mrsas_evtarg_pd pd; } __packed ld_lba_pd_lba; struct { struct mrsas_evtarg_ld ld; struct mrsas_progress prog; } __packed ld_prog; struct { struct mrsas_evtarg_ld ld; u_int32_t prev_state; u_int32_t new_state; } __packed ld_state; struct { u_int64_t strip; struct mrsas_evtarg_ld ld; } __packed ld_strip; struct mrsas_evtarg_pd pd; struct { struct mrsas_evtarg_pd pd; u_int32_t err; } __packed pd_err; struct { u_int64_t lba; struct mrsas_evtarg_pd pd; } __packed pd_lba; struct { u_int64_t lba; struct mrsas_evtarg_pd pd; struct mrsas_evtarg_ld ld; } __packed pd_lba_ld; struct { struct mrsas_evtarg_pd pd; struct mrsas_progress prog; } __packed pd_prog; struct { struct mrsas_evtarg_pd pd; u_int32_t prevState; u_int32_t newState; } __packed pd_state; struct { u_int16_t vendorId; u_int16_t deviceId; u_int16_t subVendorId; u_int16_t subDeviceId; } __packed pci; u_int32_t rate; char str[96]; struct { u_int32_t rtc; u_int32_t elapsedSeconds; } __packed time; struct { u_int32_t ecar; u_int32_t elog; char str[64]; } __packed ecc; u_int8_t b[96]; u_int16_t s[48]; u_int32_t w[24]; u_int64_t d[12]; } args; char description[128]; } __packed; /******************************************************************* * per-instance data ********************************************************************/ struct mrsas_softc { device_t mrsas_dev; // bus device struct cdev *mrsas_cdev; // controller device uint16_t device_id; // pci device struct resource *reg_res; // register interface window int reg_res_id; // register resource id bus_space_tag_t bus_tag; // bus space tag bus_space_handle_t bus_handle; // bus space handle bus_dma_tag_t mrsas_parent_tag; // bus dma parent tag bus_dma_tag_t verbuf_tag; // verbuf tag bus_dmamap_t verbuf_dmamap; // verbuf dmamap void *verbuf_mem; // verbuf mem bus_addr_t verbuf_phys_addr; // verbuf physical addr bus_dma_tag_t sense_tag; // bus dma verbuf tag bus_dmamap_t sense_dmamap; // bus dma verbuf dmamap void *sense_mem; // pointer to sense buf bus_addr_t sense_phys_addr; // bus dma verbuf mem bus_dma_tag_t io_request_tag; // bus dma io request tag bus_dmamap_t io_request_dmamap; // bus dma io request dmamap void *io_request_mem; // bus dma io request mem bus_addr_t io_request_phys_addr; // io request physical address bus_dma_tag_t chain_frame_tag; // bus dma chain frame tag bus_dmamap_t chain_frame_dmamap; // bus dma chain frame dmamap void *chain_frame_mem; // bus dma chain frame mem bus_addr_t chain_frame_phys_addr; // chain frame phys address bus_dma_tag_t reply_desc_tag; // bus dma io request tag bus_dmamap_t reply_desc_dmamap; // bus dma io request dmamap void *reply_desc_mem; // bus dma io request mem bus_addr_t reply_desc_phys_addr; // bus dma io request mem bus_dma_tag_t ioc_init_tag; // bus dma io request tag bus_dmamap_t ioc_init_dmamap; // bus dma io request dmamap void *ioc_init_mem; // bus dma io request mem bus_addr_t ioc_init_phys_mem; // io request physical address bus_dma_tag_t data_tag; // bus dma data from OS tag struct cam_sim *sim_0; // SIM pointer struct cam_sim *sim_1; // SIM pointer struct cam_path *path_0; // ldio path pointer to CAM struct cam_path *path_1; // syspd path pointer to CAM struct mtx sim_lock; // sim lock struct mtx pci_lock; // serialize pci access struct mtx io_lock; // IO lock struct mtx ioctl_lock; // IOCTL lock struct mtx mpt_cmd_pool_lock; // lock for cmd pool linked list struct mtx mfi_cmd_pool_lock; // lock for cmd pool linked list struct mtx raidmap_lock; // lock for raid map access/update struct mtx aen_lock; // aen lock uint32_t max_fw_cmds; // Max commands from FW uint32_t max_num_sge; // Max number of SGEs struct resource *mrsas_irq; // interrupt interface window void *intr_handle; // handle int irq_id; // intr resource id struct mrsas_mpt_cmd **mpt_cmd_list; struct mrsas_mfi_cmd **mfi_cmd_list; TAILQ_HEAD(, mrsas_mpt_cmd) mrsas_mpt_cmd_list_head; TAILQ_HEAD(, mrsas_mfi_cmd) mrsas_mfi_cmd_list_head; bus_addr_t req_frames_desc_phys; u_int8_t *req_frames_desc; u_int8_t *req_desc; bus_addr_t io_request_frames_phys; u_int8_t *io_request_frames; bus_addr_t reply_frames_desc_phys; u_int16_t last_reply_idx; u_int32_t reply_q_depth; u_int32_t request_alloc_sz; u_int32_t reply_alloc_sz; u_int32_t io_frames_alloc_sz; u_int32_t chain_frames_alloc_sz; u_int16_t max_sge_in_main_msg; u_int16_t max_sge_in_chain; u_int8_t chain_offset_io_request; u_int8_t chain_offset_mfi_pthru; u_int32_t map_sz; u_int64_t map_id; struct mrsas_mfi_cmd *map_update_cmd; struct mrsas_mfi_cmd *aen_cmd; u_int8_t fast_path_io; void* chan; void* ocr_chan; u_int8_t adprecovery; u_int8_t remove_in_progress; u_int8_t ocr_thread_active; u_int8_t do_timedout_reset; u_int32_t reset_in_progress; u_int32_t reset_count; bus_dma_tag_t raidmap_tag[2]; // bus dma tag for RAID map bus_dmamap_t raidmap_dmamap[2]; // bus dma dmamap RAID map void *raidmap_mem[2]; // bus dma mem RAID map bus_addr_t raidmap_phys_addr[2]; // RAID map physical address bus_dma_tag_t mficmd_frame_tag; // tag for mfi frame bus_dma_tag_t mficmd_sense_tag; // tag for mfi sense bus_dma_tag_t evt_detail_tag; // event detail tag bus_dmamap_t evt_detail_dmamap; // event detail dmamap struct mrsas_evt_detail *evt_detail_mem; // event detail mem bus_addr_t evt_detail_phys_addr; // event detail physical addr bus_dma_tag_t ctlr_info_tag; // tag for get ctlr info cmd bus_dmamap_t ctlr_info_dmamap; // get ctlr info cmd dmamap void *ctlr_info_mem; // get ctlr info cmd virtual addr bus_addr_t ctlr_info_phys_addr; //get ctlr info cmd physical addr u_int32_t max_sectors_per_req; - u_int8_t disableOnlineCtrlReset; + u_int32_t disableOnlineCtrlReset; atomic_t fw_outstanding; u_int32_t mrsas_debug; u_int32_t mrsas_io_timeout; u_int32_t mrsas_fw_fault_check_delay; u_int32_t io_cmds_highwater; u_int8_t UnevenSpanSupport; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct proc *ocr_thread; u_int32_t last_seq_num; bus_dma_tag_t el_info_tag; // tag for get event log info cmd bus_dmamap_t el_info_dmamap; // get event log info cmd dmamap void *el_info_mem; // get event log info cmd virtual addr bus_addr_t el_info_phys_addr; //get event log info cmd physical addr struct mrsas_pd_list pd_list[MRSAS_MAX_PD]; struct mrsas_pd_list local_pd_list[MRSAS_MAX_PD]; u_int8_t ld_ids[MRSAS_MAX_LD]; struct taskqueue *ev_tq; //taskqueue for events struct task ev_task; u_int32_t CurLdCount; u_int64_t reset_flags; LD_LOAD_BALANCE_INFO load_balance_info[MAX_LOGICAL_DRIVES]; LD_SPAN_INFO log_to_span[MAX_LOGICAL_DRIVES]; }; /* Compatibility shims for different OS versions */ #if __FreeBSD_version >= 800001 #define mrsas_kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) \ kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) #define mrsas_kproc_exit(arg) kproc_exit(arg) #else #define mrsas_kproc_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) \ kthread_create(func, farg, proc_ptr, flags, stackpgs, fmtstr, arg) #define mrsas_kproc_exit(arg) kthread_exit(arg) #endif static __inline void clear_bit(int b, volatile void *p) { atomic_clear_int(((volatile int *)p) + (b >> 5), 1 << (b & 0x1f)); } static __inline void set_bit(int b, volatile void *p) { atomic_set_int(((volatile int *)p) + (b >> 5), 1 << (b & 0x1f)); } static __inline int test_bit(int b, volatile void *p) { return ((volatile int *)p)[b >> 5] & (1 << (b & 0x1f)); } #endif /* MRSAS_H */ Index: stable/9/sys/dev/mxge/if_mxge.c =================================================================== --- stable/9/sys/dev/mxge/if_mxge.c (revision 273911) +++ stable/9/sys/dev/mxge/if_mxge.c (revision 273912) @@ -1,5031 +1,5031 @@ /****************************************************************************** Copyright (c) 2006-2013, Myricom Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Myricom Inc, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX for pci_cfg_restore */ #include /* for pmap_mapdev() */ #include #if defined(__i386) || defined(__amd64) #include #endif #include #include /*#define MXGE_FAKE_IFP*/ #include #ifdef IFNET_BUF_RING #include #endif #include "opt_inet.h" #include "opt_inet6.h" /* tunable params */ static int mxge_nvidia_ecrc_enable = 1; static int mxge_force_firmware = 0; static int mxge_intr_coal_delay = 30; static int mxge_deassert_wait = 1; static int mxge_flow_control = 1; static int mxge_verbose = 0; static int mxge_ticks; static int mxge_max_slices = 1; static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; static int mxge_always_promisc = 0; static int mxge_initial_mtu = ETHERMTU_JUMBO; static int mxge_throttle = 0; static char *mxge_fw_unaligned = "mxge_ethp_z8e"; static char *mxge_fw_aligned = "mxge_eth_z8e"; static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; static int mxge_probe(device_t dev); static int mxge_attach(device_t dev); static int mxge_detach(device_t dev); static int mxge_shutdown(device_t dev); static void mxge_intr(void *arg); static device_method_t mxge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mxge_probe), DEVMETHOD(device_attach, mxge_attach), DEVMETHOD(device_detach, mxge_detach), DEVMETHOD(device_shutdown, mxge_shutdown), {0, 0} }; static driver_t mxge_driver = { "mxge", mxge_methods, sizeof(mxge_softc_t), }; static devclass_t mxge_devclass; /* Declare ourselves to be a child of the PCI bus.*/ DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); MODULE_DEPEND(mxge, firmware, 1, 1, 1); MODULE_DEPEND(mxge, zlib, 1, 1, 1); static int mxge_load_firmware(mxge_softc_t *sc, int adopt); static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); static int mxge_close(mxge_softc_t *sc, int down); static int mxge_open(mxge_softc_t *sc); static void mxge_tick(void *arg); static int mxge_probe(device_t dev) { int rev; if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { rev = pci_get_revid(dev); switch (rev) { case MXGE_PCI_REV_Z8E: device_set_desc(dev, "Myri10G-PCIE-8A"); break; case MXGE_PCI_REV_Z8ES: device_set_desc(dev, "Myri10G-PCIE-8B"); break; default: device_set_desc(dev, "Myri10G-PCIE-8??"); device_printf(dev, "Unrecognized rev %d NIC\n", rev); break; } return 0; } return ENXIO; } static void mxge_enable_wc(mxge_softc_t *sc) { #if defined(__i386) || defined(__amd64) vm_offset_t len; int err; sc->wc = 1; len = rman_get_size(sc->mem_res); err = pmap_change_attr((vm_offset_t) sc->sram, len, PAT_WRITE_COMBINING); if (err != 0) { device_printf(sc->dev, "pmap_change_attr failed, %d\n", err); sc->wc = 0; } #endif } /* callback to get our DMA address */ static void mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error == 0) { *(bus_addr_t *) arg = segs->ds_addr; } } static int mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, bus_size_t alignment) { int err; device_t dev = sc->dev; bus_size_t boundary, maxsegsize; if (bytes > 4096 && alignment == 4096) { boundary = 0; maxsegsize = bytes; } else { boundary = 4096; maxsegsize = 4096; } /* allocate DMAable memory tags */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ alignment, /* alignment */ boundary, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ bytes, /* maxsize */ 1, /* num segs */ maxsegsize, /* maxsegsize */ BUS_DMA_COHERENT, /* flags */ NULL, NULL, /* lock */ &dma->dmat); /* tag */ if (err != 0) { device_printf(dev, "couldn't alloc tag (err = %d)\n", err); return err; } /* allocate DMAable memory & map */ err = bus_dmamem_alloc(dma->dmat, &dma->addr, (BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO), &dma->map); if (err != 0) { device_printf(dev, "couldn't alloc mem (err = %d)\n", err); goto abort_with_dmat; } /* load the memory */ err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, mxge_dmamap_callback, (void *)&dma->bus_addr, 0); if (err != 0) { device_printf(dev, "couldn't load map (err = %d)\n", err); goto abort_with_mem; } return 0; abort_with_mem: bus_dmamem_free(dma->dmat, dma->addr, dma->map); abort_with_dmat: (void)bus_dma_tag_destroy(dma->dmat); return err; } static void mxge_dma_free(mxge_dma_t *dma) { bus_dmamap_unload(dma->dmat, dma->map); bus_dmamem_free(dma->dmat, dma->addr, dma->map); (void)bus_dma_tag_destroy(dma->dmat); } /* * The eeprom strings on the lanaiX have the format * SN=x\0 * MAC=x:x:x:x:x:x\0 * PC=text\0 */ static int mxge_parse_strings(mxge_softc_t *sc) { char *ptr; int i, found_mac, found_sn2; char *endptr; ptr = sc->eeprom_strings; found_mac = 0; found_sn2 = 0; while (*ptr != '\0') { if (strncmp(ptr, "MAC=", 4) == 0) { ptr += 4; for (i = 0;;) { sc->mac_addr[i] = strtoul(ptr, &endptr, 16); if (endptr - ptr != 2) goto abort; ptr = endptr; if (++i == 6) break; if (*ptr++ != ':') goto abort; } found_mac = 1; } else if (strncmp(ptr, "PC=", 3) == 0) { ptr += 3; strlcpy(sc->product_code_string, ptr, sizeof(sc->product_code_string)); } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { ptr += 3; strlcpy(sc->serial_number_string, ptr, sizeof(sc->serial_number_string)); } else if (strncmp(ptr, "SN2=", 4) == 0) { /* SN2 takes precedence over SN */ ptr += 4; found_sn2 = 1; strlcpy(sc->serial_number_string, ptr, sizeof(sc->serial_number_string)); } while (*ptr++ != '\0') {} } if (found_mac) return 0; abort: device_printf(sc->dev, "failed to parse eeprom_strings\n"); return ENXIO; } #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ static void mxge_enable_nvidia_ecrc(mxge_softc_t *sc) { uint32_t val; unsigned long base, off; char *va, *cfgptr; device_t pdev, mcp55; uint16_t vendor_id, device_id, word; uintptr_t bus, slot, func, ivend, idev; uint32_t *ptr32; if (!mxge_nvidia_ecrc_enable) return; pdev = device_get_parent(device_get_parent(sc->dev)); if (pdev == NULL) { device_printf(sc->dev, "could not find parent?\n"); return; } vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); device_id = pci_read_config(pdev, PCIR_DEVICE, 2); if (vendor_id != 0x10de) return; base = 0; if (device_id == 0x005d) { /* ck804, base address is magic */ base = 0xe0000000UL; } else if (device_id >= 0x0374 && device_id <= 0x378) { /* mcp55, base address stored in chipset */ mcp55 = pci_find_bsf(0, 0, 0); if (mcp55 && 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { word = pci_read_config(mcp55, 0x90, 2); base = ((unsigned long)word & 0x7ffeU) << 25; } } if (!base) return; /* XXXX Test below is commented because it is believed that doing config read/write beyond 0xff will access the config space for the next larger function. Uncomment this and remove the hacky pmap_mapdev() way of accessing config space when FreeBSD grows support for extended pcie config space access */ #if 0 /* See if we can, by some miracle, access the extended config space */ val = pci_read_config(pdev, 0x178, 4); if (val != 0xffffffff) { val |= 0x40; pci_write_config(pdev, 0x178, val, 4); return; } #endif /* Rather than using normal pci config space writes, we must * map the Nvidia config space ourselves. This is because on * opteron/nvidia class machine the 0xe000000 mapping is * handled by the nvidia chipset, that means the internal PCI * device (the on-chip northbridge), or the amd-8131 bridge * and things behind them are not visible by this method. */ BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_BUS, &bus); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_SLOT, &slot); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_FUNCTION, &func); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_VENDOR, &ivend); BUS_READ_IVAR(device_get_parent(pdev), pdev, PCI_IVAR_DEVICE, &idev); off = base + 0x00100000UL * (unsigned long)bus + 0x00001000UL * (unsigned long)(func + 8 * slot); /* map it into the kernel */ va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); if (va == NULL) { device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); return; } /* get a pointer to the config space mapped into the kernel */ cfgptr = va + (off & PAGE_MASK); /* make sure that we can really access it */ vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); if (! (vendor_id == ivend && device_id == idev)) { device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", vendor_id, device_id); pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); return; } ptr32 = (uint32_t*)(cfgptr + 0x178); val = *ptr32; if (val == 0xffffffff) { device_printf(sc->dev, "extended mapping failed\n"); pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); return; } *ptr32 = val | 0x40; pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); if (mxge_verbose) device_printf(sc->dev, "Enabled ECRC on upstream Nvidia bridge " "at %d:%d:%d\n", (int)bus, (int)slot, (int)func); return; } #else static void mxge_enable_nvidia_ecrc(mxge_softc_t *sc) { device_printf(sc->dev, "Nforce 4 chipset on non-x86/amd64!?!?!\n"); return; } #endif static int mxge_dma_test(mxge_softc_t *sc, int test_type) { mxge_cmd_t cmd; bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; int status; uint32_t len; char *test = " "; /* Run a small DMA test. * The magic multipliers to the length tell the firmware * to do DMA read, write, or read+write tests. The * results are returned in cmd.data0. The upper 16 * bits of the return is the number of transfers completed. * The lower 16 bits is the time in 0.5us ticks that the * transfers took to complete. */ len = sc->tx_boundary; cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); cmd.data2 = len * 0x10000; status = mxge_send_cmd(sc, test_type, &cmd); if (status != 0) { test = "read"; goto abort; } sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); cmd.data2 = len * 0x1; status = mxge_send_cmd(sc, test_type, &cmd); if (status != 0) { test = "write"; goto abort; } sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); cmd.data2 = len * 0x10001; status = mxge_send_cmd(sc, test_type, &cmd); if (status != 0) { test = "read/write"; goto abort; } sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / (cmd.data0 & 0xffff); abort: if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) device_printf(sc->dev, "DMA %s benchmark failed: %d\n", test, status); return status; } /* * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput * when the PCI-E Completion packets are aligned on an 8-byte * boundary. Some PCI-E chip sets always align Completion packets; on * the ones that do not, the alignment can be enforced by enabling * ECRC generation (if supported). * * When PCI-E Completion packets are not aligned, it is actually more * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. * * If the driver can neither enable ECRC nor verify that it has * already been enabled, then it must use a firmware image which works * around unaligned completion packets (ethp_z8e.dat), and it should * also ensure that it never gives the device a Read-DMA which is * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is * enabled, then the driver should use the aligned (eth_z8e.dat) * firmware image, and set tx_boundary to 4KB. */ static int mxge_firmware_probe(mxge_softc_t *sc) { device_t dev = sc->dev; int reg, status; uint16_t pectl; sc->tx_boundary = 4096; /* * Verify the max read request size was set to 4KB * before trying the test with 4KB. */ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { pectl = pci_read_config(dev, reg + 0x8, 2); if ((pectl & (5 << 12)) != (5 << 12)) { device_printf(dev, "Max Read Req. size != 4k (0x%x\n", pectl); sc->tx_boundary = 2048; } } /* * load the optimized firmware (which assumes aligned PCIe * completions) in order to see if it works on this host. */ sc->fw_name = mxge_fw_aligned; status = mxge_load_firmware(sc, 1); if (status != 0) { return status; } /* * Enable ECRC if possible */ mxge_enable_nvidia_ecrc(sc); /* * Run a DMA test which watches for unaligned completions and * aborts on the first one seen. Not required on Z8ES or newer. */ if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) return 0; status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); if (status == 0) return 0; /* keep the aligned firmware */ if (status != E2BIG) device_printf(dev, "DMA test failed: %d\n", status); if (status == ENOSYS) device_printf(dev, "Falling back to ethp! " "Please install up to date fw\n"); return status; } static int mxge_select_firmware(mxge_softc_t *sc) { int aligned = 0; int force_firmware = mxge_force_firmware; if (sc->throttle) force_firmware = sc->throttle; if (force_firmware != 0) { if (force_firmware == 1) aligned = 1; else aligned = 0; if (mxge_verbose) device_printf(sc->dev, "Assuming %s completions (forced)\n", aligned ? "aligned" : "unaligned"); goto abort; } /* if the PCIe link width is 4 or less, we can use the aligned firmware and skip any checks */ if (sc->link_width != 0 && sc->link_width <= 4) { device_printf(sc->dev, "PCIe x%d Link, expect reduced performance\n", sc->link_width); aligned = 1; goto abort; } if (0 == mxge_firmware_probe(sc)) return 0; abort: if (aligned) { sc->fw_name = mxge_fw_aligned; sc->tx_boundary = 4096; } else { sc->fw_name = mxge_fw_unaligned; sc->tx_boundary = 2048; } return (mxge_load_firmware(sc, 0)); } static int mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) { if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { device_printf(sc->dev, "Bad firmware type: 0x%x\n", be32toh(hdr->mcp_type)); return EIO; } /* save firmware version for sysctl */ strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); if (mxge_verbose) device_printf(sc->dev, "firmware id: %s\n", hdr->version); sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, &sc->fw_ver_minor, &sc->fw_ver_tiny); if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { device_printf(sc->dev, "Found firmware version %s\n", sc->fw_version); device_printf(sc->dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); return EINVAL; } return 0; } static void * z_alloc(void *nil, u_int items, u_int size) { void *ptr; ptr = malloc(items * size, M_TEMP, M_NOWAIT); return ptr; } static void z_free(void *nil, void *ptr) { free(ptr, M_TEMP); } static int mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) { z_stream zs; char *inflate_buffer; const struct firmware *fw; const mcp_gen_header_t *hdr; unsigned hdr_offset; int status; unsigned int i; char dummy; size_t fw_len; fw = firmware_get(sc->fw_name); if (fw == NULL) { device_printf(sc->dev, "Could not find firmware image %s\n", sc->fw_name); return ENOENT; } /* setup zlib and decompress f/w */ bzero(&zs, sizeof (zs)); zs.zalloc = z_alloc; zs.zfree = z_free; status = inflateInit(&zs); if (status != Z_OK) { status = EIO; goto abort_with_fw; } /* the uncompressed size is stored as the firmware version, which would otherwise go unused */ fw_len = (size_t) fw->version; inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); if (inflate_buffer == NULL) goto abort_with_zs; zs.avail_in = fw->datasize; zs.next_in = __DECONST(char *, fw->data); zs.avail_out = fw_len; zs.next_out = inflate_buffer; status = inflate(&zs, Z_FINISH); if (status != Z_STREAM_END) { device_printf(sc->dev, "zlib %d\n", status); status = EIO; goto abort_with_buffer; } /* check id */ hdr_offset = htobe32(*(const uint32_t *) (inflate_buffer + MCP_HEADER_PTR_OFFSET)); if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { device_printf(sc->dev, "Bad firmware file"); status = EIO; goto abort_with_buffer; } hdr = (const void*)(inflate_buffer + hdr_offset); status = mxge_validate_firmware(sc, hdr); if (status != 0) goto abort_with_buffer; /* Copy the inflated firmware to NIC SRAM. */ for (i = 0; i < fw_len; i += 256) { mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, min(256U, (unsigned)(fw_len - i))); wmb(); dummy = *sc->sram; wmb(); } *limit = fw_len; status = 0; abort_with_buffer: free(inflate_buffer, M_TEMP); abort_with_zs: inflateEnd(&zs); abort_with_fw: firmware_put(fw, FIRMWARE_UNLOAD); return status; } /* * Enable or disable periodic RDMAs from the host to make certain * chipsets resend dropped PCIe messages */ static void mxge_dummy_rdma(mxge_softc_t *sc, int enable) { char buf_bytes[72]; volatile uint32_t *confirm; volatile char *submit; uint32_t *buf, dma_low, dma_high; int i; buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); /* clear confirmation addr */ confirm = (volatile uint32_t *)sc->cmd; *confirm = 0; wmb(); /* send an rdma command to the PCIe engine, and wait for the response in the confirmation address. The firmware should write a -1 there to indicate it is alive and well */ dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); buf[0] = htobe32(dma_high); /* confirm addr MSW */ buf[1] = htobe32(dma_low); /* confirm addr LSW */ buf[2] = htobe32(0xffffffff); /* confirm data */ dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); buf[3] = htobe32(dma_high); /* dummy addr MSW */ buf[4] = htobe32(dma_low); /* dummy addr LSW */ buf[5] = htobe32(enable); /* enable? */ submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); mxge_pio_copy(submit, buf, 64); wmb(); DELAY(1000); wmb(); i = 0; while (*confirm != 0xffffffff && i < 20) { DELAY(1000); i++; } if (*confirm != 0xffffffff) { device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", (enable ? "enable" : "disable"), confirm, *confirm); } return; } static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) { mcp_cmd_t *buf; char buf_bytes[sizeof(*buf) + 8]; volatile mcp_cmd_response_t *response = sc->cmd; volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; uint32_t dma_low, dma_high; int err, sleep_total = 0; /* ensure buf is aligned to 8 bytes */ buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); buf->data0 = htobe32(data->data0); buf->data1 = htobe32(data->data1); buf->data2 = htobe32(data->data2); buf->cmd = htobe32(cmd); dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); buf->response_addr.low = htobe32(dma_low); buf->response_addr.high = htobe32(dma_high); mtx_lock(&sc->cmd_mtx); response->result = 0xffffffff; wmb(); mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); /* wait up to 20ms */ err = EAGAIN; for (sleep_total = 0; sleep_total < 20; sleep_total++) { bus_dmamap_sync(sc->cmd_dma.dmat, sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); wmb(); switch (be32toh(response->result)) { case 0: data->data0 = be32toh(response->data); err = 0; break; case 0xffffffff: DELAY(1000); break; case MXGEFW_CMD_UNKNOWN: err = ENOSYS; break; case MXGEFW_CMD_ERROR_UNALIGNED: err = E2BIG; break; case MXGEFW_CMD_ERROR_BUSY: err = EBUSY; break; case MXGEFW_CMD_ERROR_I2C_ABSENT: err = ENXIO; break; default: device_printf(sc->dev, "mxge: command %d " "failed, result = %d\n", cmd, be32toh(response->result)); err = ENXIO; break; } if (err != EAGAIN) break; } if (err == EAGAIN) device_printf(sc->dev, "mxge: command %d timed out" "result = %d\n", cmd, be32toh(response->result)); mtx_unlock(&sc->cmd_mtx); return err; } static int mxge_adopt_running_firmware(mxge_softc_t *sc) { struct mcp_gen_header *hdr; const size_t bytes = sizeof (struct mcp_gen_header); size_t hdr_offset; int status; /* find running firmware header */ hdr_offset = htobe32(*(volatile uint32_t *) (sc->sram + MCP_HEADER_PTR_OFFSET)); if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { device_printf(sc->dev, "Running firmware has bad header offset (%d)\n", (int)hdr_offset); return EIO; } /* copy header of running firmware from SRAM to host memory to * validate firmware */ hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); if (hdr == NULL) { device_printf(sc->dev, "could not malloc firmware hdr\n"); return ENOMEM; } bus_space_read_region_1(rman_get_bustag(sc->mem_res), rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); status = mxge_validate_firmware(sc, hdr); free(hdr, M_DEVBUF); /* * check to see if adopted firmware has bug where adopting * it will cause broadcasts to be filtered unless the NIC * is kept in ALLMULTI mode */ if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { sc->adopted_rx_filter_bug = 1; device_printf(sc->dev, "Adopting fw %d.%d.%d: " "working around rx filter bug\n", sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); } return status; } static int mxge_load_firmware(mxge_softc_t *sc, int adopt) { volatile uint32_t *confirm; volatile char *submit; char buf_bytes[72]; uint32_t *buf, size, dma_low, dma_high; int status, i; buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); size = sc->sram_size; status = mxge_load_firmware_helper(sc, &size); if (status) { if (!adopt) return status; /* Try to use the currently running firmware, if it is new enough */ status = mxge_adopt_running_firmware(sc); if (status) { device_printf(sc->dev, "failed to adopt running firmware\n"); return status; } device_printf(sc->dev, "Successfully adopted running firmware\n"); if (sc->tx_boundary == 4096) { device_printf(sc->dev, "Using firmware currently running on NIC" ". For optimal\n"); device_printf(sc->dev, "performance consider loading optimized " "firmware\n"); } sc->fw_name = mxge_fw_unaligned; sc->tx_boundary = 2048; return 0; } /* clear confirmation addr */ confirm = (volatile uint32_t *)sc->cmd; *confirm = 0; wmb(); /* send a reload command to the bootstrap MCP, and wait for the response in the confirmation address. The firmware should write a -1 there to indicate it is alive and well */ dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); buf[0] = htobe32(dma_high); /* confirm addr MSW */ buf[1] = htobe32(dma_low); /* confirm addr LSW */ buf[2] = htobe32(0xffffffff); /* confirm data */ /* FIX: All newest firmware should un-protect the bottom of the sram before handoff. However, the very first interfaces do not. Therefore the handoff copy must skip the first 8 bytes */ /* where the code starts*/ buf[3] = htobe32(MXGE_FW_OFFSET + 8); buf[4] = htobe32(size - 8); /* length of code */ buf[5] = htobe32(8); /* where to copy to */ buf[6] = htobe32(0); /* where to jump to */ submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); mxge_pio_copy(submit, buf, 64); wmb(); DELAY(1000); wmb(); i = 0; while (*confirm != 0xffffffff && i < 20) { DELAY(1000*10); i++; bus_dmamap_sync(sc->cmd_dma.dmat, sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); } if (*confirm != 0xffffffff) { device_printf(sc->dev,"handoff failed (%p = 0x%x)", confirm, *confirm); return ENXIO; } return 0; } static int mxge_update_mac_address(mxge_softc_t *sc) { mxge_cmd_t cmd; uint8_t *addr = sc->mac_addr; int status; cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3]); cmd.data1 = ((addr[4] << 8) | (addr[5])); status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); return status; } static int mxge_change_pause(mxge_softc_t *sc, int pause) { mxge_cmd_t cmd; int status; if (pause) status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); else status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); if (status) { device_printf(sc->dev, "Failed to set flow control mode\n"); return ENXIO; } sc->pause = pause; return 0; } static void mxge_change_promisc(mxge_softc_t *sc, int promisc) { mxge_cmd_t cmd; int status; if (mxge_always_promisc) promisc = 1; if (promisc) status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); else status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); if (status) { device_printf(sc->dev, "Failed to set promisc mode\n"); } } static void mxge_set_multicast_list(mxge_softc_t *sc) { mxge_cmd_t cmd; struct ifmultiaddr *ifma; struct ifnet *ifp = sc->ifp; int err; /* This firmware is known to not support multicast */ if (!sc->fw_multicast_support) return; /* Disable multicast filtering while we play with the lists*/ err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); if (err != 0) { device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," " error status: %d\n", err); return; } if (sc->adopted_rx_filter_bug) return; if (ifp->if_flags & IFF_ALLMULTI) /* request to disable multicast filtering, so quit here */ return; /* Flush all the filters */ err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); if (err != 0) { device_printf(sc->dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" ", error status: %d\n", err); return; } /* Walk the multicast list, and add each address */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &cmd.data0, 4); bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, &cmd.data1, 2); cmd.data0 = htonl(cmd.data0); cmd.data1 = htonl(cmd.data1); err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); if (err != 0) { device_printf(sc->dev, "Failed " "MXGEFW_JOIN_MULTICAST_GROUP, error status:" "%d\t", err); /* abort, leaving multicast filtering off */ if_maddr_runlock(ifp); return; } } if_maddr_runlock(ifp); /* Enable multicast filtering */ err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); if (err != 0) { device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" ", error status: %d\n", err); } } static int mxge_max_mtu(mxge_softc_t *sc) { mxge_cmd_t cmd; int status; if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) return MXGEFW_MAX_MTU - MXGEFW_PAD; /* try to set nbufs to see if it we can use virtually contiguous jumbos */ cmd.data0 = 0; status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); if (status == 0) return MXGEFW_MAX_MTU - MXGEFW_PAD; /* otherwise, we're limited to MJUMPAGESIZE */ return MJUMPAGESIZE - MXGEFW_PAD; } static int mxge_reset(mxge_softc_t *sc, int interrupts_setup) { struct mxge_slice_state *ss; mxge_rx_done_t *rx_done; volatile uint32_t *irq_claim; mxge_cmd_t cmd; int slice, status; /* try to send a reset command to the card to see if it is alive */ memset(&cmd, 0, sizeof (cmd)); status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); if (status != 0) { device_printf(sc->dev, "failed reset\n"); return ENXIO; } mxge_dummy_rdma(sc, 1); /* set the intrq size */ cmd.data0 = sc->rx_ring_size; status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); /* * Even though we already know how many slices are supported * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES * has magic side effects, and must be called after a reset. * It must be called prior to calling any RSS related cmds, * including assigning an interrupt queue for anything but * slice 0. It must also be called *after* * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by * the firmware to compute offsets. */ if (sc->num_slices > 1) { /* ask the maximum number of slices it supports */ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); if (status != 0) { device_printf(sc->dev, "failed to get number of slices\n"); return status; } /* * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior * to setting up the interrupt queue DMA */ cmd.data0 = sc->num_slices; cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; #ifdef IFNET_BUF_RING cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; #endif status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); if (status != 0) { device_printf(sc->dev, "failed to set number of slices\n"); return status; } } if (interrupts_setup) { /* Now exchange information about interrupts */ for (slice = 0; slice < sc->num_slices; slice++) { rx_done = &sc->ss[slice].rx_done; memset(rx_done->entry, 0, sc->rx_ring_size); cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); cmd.data2 = slice; status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); } } status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); if (status != 0) { device_printf(sc->dev, "failed set interrupt parameters\n"); return status; } *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); /* run a DMA benchmark */ (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ss->irq_claim = irq_claim + (2 * slice); /* reset mcp/driver shared state back to 0 */ ss->rx_done.idx = 0; ss->rx_done.cnt = 0; ss->tx.req = 0; ss->tx.done = 0; ss->tx.pkt_done = 0; ss->tx.queue_active = 0; ss->tx.activate = 0; ss->tx.deactivate = 0; ss->tx.wake = 0; ss->tx.defrag = 0; ss->tx.stall = 0; ss->rx_big.cnt = 0; ss->rx_small.cnt = 0; ss->lc.lro_bad_csum = 0; ss->lc.lro_queued = 0; ss->lc.lro_flushed = 0; if (ss->fw_stats != NULL) { bzero(ss->fw_stats, sizeof *ss->fw_stats); } } sc->rdma_tags_available = 15; status = mxge_update_mac_address(sc); mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); mxge_change_pause(sc, sc->pause); mxge_set_multicast_list(sc); if (sc->throttle) { cmd.data0 = sc->throttle; if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) { device_printf(sc->dev, "can't enable throttle\n"); } } return status; } static int mxge_change_throttle(SYSCTL_HANDLER_ARGS) { mxge_cmd_t cmd; mxge_softc_t *sc; int err; unsigned int throttle; sc = arg1; throttle = sc->throttle; err = sysctl_handle_int(oidp, &throttle, arg2, req); if (err != 0) { return err; } if (throttle == sc->throttle) return 0; if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) return EINVAL; mtx_lock(&sc->driver_mtx); cmd.data0 = throttle; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); if (err == 0) sc->throttle = throttle; mtx_unlock(&sc->driver_mtx); return err; } static int mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) { mxge_softc_t *sc; unsigned int intr_coal_delay; int err; sc = arg1; intr_coal_delay = sc->intr_coal_delay; err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); if (err != 0) { return err; } if (intr_coal_delay == sc->intr_coal_delay) return 0; if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) return EINVAL; mtx_lock(&sc->driver_mtx); *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); sc->intr_coal_delay = intr_coal_delay; mtx_unlock(&sc->driver_mtx); return err; } static int mxge_change_flow_control(SYSCTL_HANDLER_ARGS) { mxge_softc_t *sc; unsigned int enabled; int err; sc = arg1; enabled = sc->pause; err = sysctl_handle_int(oidp, &enabled, arg2, req); if (err != 0) { return err; } if (enabled == sc->pause) return 0; mtx_lock(&sc->driver_mtx); err = mxge_change_pause(sc, enabled); mtx_unlock(&sc->driver_mtx); return err; } static int mxge_handle_be32(SYSCTL_HANDLER_ARGS) { int err; if (arg1 == NULL) return EFAULT; arg2 = be32toh(*(int *)arg1); arg1 = NULL; err = sysctl_handle_int(oidp, arg1, arg2, req); return err; } static void mxge_rem_sysctls(mxge_softc_t *sc) { struct mxge_slice_state *ss; int slice; if (sc->slice_sysctl_tree == NULL) return; for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; if (ss == NULL || ss->sysctl_tree == NULL) continue; sysctl_ctx_free(&ss->sysctl_ctx); ss->sysctl_tree = NULL; } sysctl_ctx_free(&sc->slice_sysctl_ctx); sc->slice_sysctl_tree = NULL; } static void mxge_add_sysctls(mxge_softc_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; mcp_irq_data_t *fw; struct mxge_slice_state *ss; int slice; char slice_num[8]; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); fw = sc->ss[0].fw_stats; /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", - CTLFLAG_RD, &sc->fw_version, + CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", - CTLFLAG_RD, &sc->serial_number_string, + CTLFLAG_RD, sc->serial_number_string, 0, "serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", - CTLFLAG_RD, &sc->product_code_string, + CTLFLAG_RD, sc->product_code_string, 0, "product_code"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", CTLFLAG_RD, &sc->link_width, 0, "tx_boundary"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", CTLFLAG_RD, &sc->tx_boundary, 0, "tx_boundary"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", CTLFLAG_RD, &sc->wc, 0, "write combining PIO?"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", CTLFLAG_RD, &sc->read_write_dma, 0, "DMA concurrent Read/Write speed in MB/s"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", CTLFLAG_RD, &sc->watchdog_resets, 0, "Number of times NIC was reset"); /* performance related tunables */ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", "interrupt coalescing delay in usecs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", "transmit throttling"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", "interrupt coalescing delay in usecs"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", CTLFLAG_RW, &mxge_deassert_wait, 0, "Wait for IRQ line to go low in ihandler"); /* stats block from firmware is in network byte order. Need to swap it */ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, mxge_handle_be32, "I", "link up"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, mxge_handle_be32, "I", "rdma_tags_available"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", "dropped_bad_crc32"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", "dropped_link_error_or_filtered"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", "dropped_link_overflow"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", "dropped_multicast_filtered"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", "dropped_no_big_buffer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", "dropped_no_small_buffer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, mxge_handle_be32, "I", "dropped_overrun"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", "dropped_unicast_filtered"); /* verbose printing? */ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "verbose", CTLFLAG_RW, &mxge_verbose, 0, "verbose printing"); /* add counters exported for debugging from all slices */ sysctl_ctx_init(&sc->slice_sysctl_ctx); sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; sysctl_ctx_init(&ss->sysctl_ctx); ctx = &ss->sysctl_ctx; children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); sprintf(slice_num, "%d", slice); ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, CTLFLAG_RD, 0, ""); children = SYSCTL_CHILDREN(ss->sysctl_tree); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", CTLFLAG_RD, &ss->rx_small.cnt, 0, "rx_small_cnt"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", CTLFLAG_RD, &ss->rx_big.cnt, 0, "rx_small_cnt"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 0, "number of lro merge queues flushed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 0, "number of bad csums preventing LRO"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 0, "number of frames appended to lro merge" "queues"); #ifndef IFNET_BUF_RING /* only transmit from slice 0 for now */ if (slice > 0) continue; #endif SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_stall", CTLFLAG_RD, &ss->tx.stall, 0, "tx_stall"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_wake", CTLFLAG_RD, &ss->tx.wake, 0, "tx_wake"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_defrag", CTLFLAG_RD, &ss->tx.defrag, 0, "tx_defrag"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); } } /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy backwards one at a time and handle ring wraps */ static inline void mxge_submit_req_backwards(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) { int idx, starting_slot; starting_slot = tx->req; while (cnt > 1) { cnt--; idx = (starting_slot + cnt) & tx->mask; mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); wmb(); } } /* * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy * at most 32 bytes at a time, so as to avoid involving the software * pio handler in the nic. We re-write the first segment's flags * to mark them valid only after writing the entire chain */ static inline void mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) { int idx, i; uint32_t *src_ints; volatile uint32_t *dst_ints; mcp_kreq_ether_send_t *srcp; volatile mcp_kreq_ether_send_t *dstp, *dst; uint8_t last_flags; idx = tx->req & tx->mask; last_flags = src->flags; src->flags = 0; wmb(); dst = dstp = &tx->lanai[idx]; srcp = src; if ((idx + cnt) < tx->mask) { for (i = 0; i < (cnt - 1); i += 2) { mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); wmb(); /* force write every 32 bytes */ srcp += 2; dstp += 2; } } else { /* submit all but the first request, and ensure that it is submitted below */ mxge_submit_req_backwards(tx, src, cnt); i = 0; } if (i < cnt) { /* submit the first request */ mxge_pio_copy(dstp, srcp, sizeof(*src)); wmb(); /* barrier before setting valid flag */ } /* re-write the last 32-bits with the valid flags */ src->flags = last_flags; src_ints = (uint32_t *)src; src_ints+=3; dst_ints = (volatile uint32_t *)dst; dst_ints+=3; *dst_ints = *src_ints; tx->req += cnt; wmb(); } static int mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, struct mxge_pkt_info *pi) { struct ether_vlan_header *eh; uint16_t etype; int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); #if IFCAP_TSO6 && defined(INET6) int nxt; #endif eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); pi->ip_off = ETHER_HDR_LEN; } switch (etype) { case ETHERTYPE_IP: /* * ensure ip header is in first mbuf, copy it to a * scratch buffer if not */ pi->ip = (struct ip *)(m->m_data + pi->ip_off); pi->ip6 = NULL; if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), ss->scratch); pi->ip = (struct ip *)(ss->scratch + pi->ip_off); } pi->ip_hlen = pi->ip->ip_hl << 2; if (!tso) return 0; if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr))) { m_copydata(m, 0, pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr), ss->scratch); pi->ip = (struct ip *)(ss->scratch + pi->ip_off); } pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); break; #if IFCAP_TSO6 && defined(INET6) case ETHERTYPE_IPV6: pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), ss->scratch); pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); } nxt = 0; pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); pi->ip_hlen -= pi->ip_off; if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) return EINVAL; if (!tso) return 0; if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) return EINVAL; if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr))) { m_copydata(m, 0, pi->ip_off + pi->ip_hlen + sizeof(struct tcphdr), ss->scratch); pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); } pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); break; #endif default: return EINVAL; } return 0; } #if IFCAP_TSO4 static void mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, int busdma_seg_cnt, struct mxge_pkt_info *pi) { mxge_tx_ring_t *tx; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; uint32_t low, high_swapped; int len, seglen, cum_len, cum_len_next; int next_is_first, chop, cnt, rdma_count, small; uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; uint8_t flags, flags_next; static int once; mss = m->m_pkthdr.tso_segsz; /* negative cum_len signifies to the * send loop that we are still in the * header portion of the TSO packet. */ cksum_offset = pi->ip_off + pi->ip_hlen; cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); /* TSO implies checksum offload on this hardware */ if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { /* * If packet has full TCP csum, replace it with pseudo hdr * sum that the NIC expects, otherwise the NIC will emit * packets with bad TCP checksums. */ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); if (pi->ip6) { #if (CSUM_TCP_IPV6 != 0) && defined(INET6) m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; sum = in6_cksum_pseudo(pi->ip6, m->m_pkthdr.len - cksum_offset, IPPROTO_TCP, 0); #endif } else { #ifdef INET m->m_pkthdr.csum_flags |= CSUM_TCP; sum = in_pseudo(pi->ip->ip_src.s_addr, pi->ip->ip_dst.s_addr, htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); #endif } m_copyback(m, offsetof(struct tcphdr, th_sum) + cksum_offset, sizeof(sum), (caddr_t)&sum); } flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; /* for TSO, pseudo_hdr_offset holds mss. * The firmware figures out where to put * the checksum by parsing the header. */ pseudo_hdr_offset = htobe16(mss); if (pi->ip6) { /* * for IPv6 TSO, the "checksum offset" is re-purposed * to store the TCP header len */ cksum_offset = (pi->tcp->th_off << 2); } tx = &ss->tx; req = tx->req_list; seg = tx->seg_list; cnt = 0; rdma_count = 0; /* "rdma_count" is the number of RDMAs belonging to the * current packet BEFORE the current send request. For * non-TSO packets, this is equal to "count". * For TSO packets, rdma_count needs to be reset * to 0 after a segment cut. * * The rdma_count field of the send request is * the number of RDMAs of the packet starting at * that request. For TSO send requests with one ore more cuts * in the middle, this is the number of RDMAs starting * after the last cut in the request. All previous * segments before the last cut implicitly have 1 RDMA. * * Since the number of RDMAs is not known beforehand, * it must be filled-in retroactively - after each * segmentation cut or at the end of the entire packet. */ while (busdma_seg_cnt) { /* Break the busdma segment up into pieces*/ low = MXGE_LOWPART_TO_U32(seg->ds_addr); high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); len = seg->ds_len; while (len) { flags_next = flags & ~MXGEFW_FLAGS_FIRST; seglen = len; cum_len_next = cum_len + seglen; (req-rdma_count)->rdma_count = rdma_count + 1; if (__predict_true(cum_len >= 0)) { /* payload */ chop = (cum_len_next > mss); cum_len_next = cum_len_next % mss; next_is_first = (cum_len_next == 0); flags |= chop * MXGEFW_FLAGS_TSO_CHOP; flags_next |= next_is_first * MXGEFW_FLAGS_FIRST; rdma_count |= -(chop | next_is_first); rdma_count += chop & !next_is_first; } else if (cum_len_next >= 0) { /* header ends */ rdma_count = -1; cum_len_next = 0; seglen = -cum_len; small = (mss <= MXGEFW_SEND_SMALL_SIZE); flags_next = MXGEFW_FLAGS_TSO_PLD | MXGEFW_FLAGS_FIRST | (small * MXGEFW_FLAGS_SMALL); } req->addr_high = high_swapped; req->addr_low = htobe32(low); req->pseudo_hdr_offset = pseudo_hdr_offset; req->pad = 0; req->rdma_count = 1; req->length = htobe16(seglen); req->cksum_offset = cksum_offset; req->flags = flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); low += seglen; len -= seglen; cum_len = cum_len_next; flags = flags_next; req++; cnt++; rdma_count++; if (cksum_offset != 0 && !pi->ip6) { if (__predict_false(cksum_offset > seglen)) cksum_offset -= seglen; else cksum_offset = 0; } if (__predict_false(cnt > tx->max_desc)) goto drop; } busdma_seg_cnt--; seg++; } (req-rdma_count)->rdma_count = rdma_count; do { req--; req->flags |= MXGEFW_FLAGS_TSO_LAST; } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; mxge_submit_req(tx, tx->req_list, cnt); #ifdef IFNET_BUF_RING if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { /* tell the NIC to start polling this slice */ *tx->send_go = 1; tx->queue_active = 1; tx->activate++; wmb(); } #endif return; drop: bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); m_freem(m); ss->oerrors++; if (!once) { printf("tx->max_desc exceeded via TSO!\n"); printf("mss = %d, %ld, %d!\n", mss, (long)seg - (long)tx->seg_list, tx->max_desc); once = 1; } return; } #endif /* IFCAP_TSO4 */ #ifdef MXGE_NEW_VLAN_API /* * We reproduce the software vlan tag insertion from * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" * vlan tag insertion. We need to advertise this in order to have the * vlan interface respect our csum offload flags. */ static struct mbuf * mxge_vlan_tag_insert(struct mbuf *m) { struct ether_vlan_header *evl; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); if (__predict_false(m == NULL)) return NULL; if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (__predict_false(m == NULL)) return NULL; } /* * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ evl = mtod(m, struct ether_vlan_header *); bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); evl->evl_encap_proto = htons(ETHERTYPE_VLAN); evl->evl_tag = htons(m->m_pkthdr.ether_vtag); m->m_flags &= ~M_VLANTAG; return m; } #endif /* MXGE_NEW_VLAN_API */ static void mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) { struct mxge_pkt_info pi = {0,0,0,0}; mxge_softc_t *sc; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; struct mbuf *m_tmp; struct ifnet *ifp; mxge_tx_ring_t *tx; int cnt, cum_len, err, i, idx, odd_flag; uint16_t pseudo_hdr_offset; uint8_t flags, cksum_offset; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; #ifdef MXGE_NEW_VLAN_API if (m->m_flags & M_VLANTAG) { m = mxge_vlan_tag_insert(m); if (__predict_false(m == NULL)) goto drop_without_m; } #endif if (m->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { if (mxge_parse_tx(ss, m, &pi)) goto drop; } /* (try to) map the frame for DMA */ idx = tx->req & tx->mask; err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, m, tx->seg_list, &cnt, BUS_DMA_NOWAIT); if (__predict_false(err == EFBIG)) { /* Too many segments in the chain. Try to defrag */ m_tmp = m_defrag(m, M_NOWAIT); if (m_tmp == NULL) { goto drop; } ss->tx.defrag++; m = m_tmp; err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, m, tx->seg_list, &cnt, BUS_DMA_NOWAIT); } if (__predict_false(err != 0)) { device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" " packet len = %d\n", err, m->m_pkthdr.len); goto drop; } bus_dmamap_sync(tx->dmat, tx->info[idx].map, BUS_DMASYNC_PREWRITE); tx->info[idx].m = m; #if IFCAP_TSO4 /* TSO is different enough, we handle it in another routine */ if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { mxge_encap_tso(ss, m, cnt, &pi); return; } #endif req = tx->req_list; cksum_offset = 0; pseudo_hdr_offset = 0; flags = MXGEFW_FLAGS_NO_TSO; /* checksum offloading? */ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { /* ensure ip header is in first mbuf, copy it to a scratch buffer if not */ cksum_offset = pi.ip_off + pi.ip_hlen; pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; pseudo_hdr_offset = htobe16(pseudo_hdr_offset); req->cksum_offset = cksum_offset; flags |= MXGEFW_FLAGS_CKSUM; odd_flag = MXGEFW_FLAGS_ALIGN_ODD; } else { odd_flag = 0; } if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) flags |= MXGEFW_FLAGS_SMALL; /* convert segments into a request list */ cum_len = 0; seg = tx->seg_list; req->flags = MXGEFW_FLAGS_FIRST; for (i = 0; i < cnt; i++) { req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); req->length = htobe16(seg->ds_len); req->cksum_offset = cksum_offset; if (cksum_offset > seg->ds_len) cksum_offset -= seg->ds_len; else cksum_offset = 0; req->pseudo_hdr_offset = pseudo_hdr_offset; req->pad = 0; /* complete solid 16-byte block */ req->rdma_count = 1; req->flags |= flags | ((cum_len & 1) * odd_flag); cum_len += seg->ds_len; seg++; req++; req->flags = 0; } req--; /* pad runts to 60 bytes */ if (cum_len < 60) { req++; req->addr_low = htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); req->length = htobe16(60 - cum_len); req->cksum_offset = 0; req->pseudo_hdr_offset = pseudo_hdr_offset; req->pad = 0; /* complete solid 16-byte block */ req->rdma_count = 1; req->flags |= flags | ((cum_len & 1) * odd_flag); cnt++; } tx->req_list[0].rdma_count = cnt; #if 0 /* print what the firmware will see */ for (i = 0; i < cnt; i++) { printf("%d: addr: 0x%x 0x%x len:%d pso%d," "cso:%d, flags:0x%x, rdma:%d\n", i, (int)ntohl(tx->req_list[i].addr_high), (int)ntohl(tx->req_list[i].addr_low), (int)ntohs(tx->req_list[i].length), (int)ntohs(tx->req_list[i].pseudo_hdr_offset), tx->req_list[i].cksum_offset, tx->req_list[i].flags, tx->req_list[i].rdma_count); } printf("--------------\n"); #endif tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; mxge_submit_req(tx, tx->req_list, cnt); #ifdef IFNET_BUF_RING if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { /* tell the NIC to start polling this slice */ *tx->send_go = 1; tx->queue_active = 1; tx->activate++; wmb(); } #endif return; drop: m_freem(m); drop_without_m: ss->oerrors++; return; } #ifdef IFNET_BUF_RING static void mxge_qflush(struct ifnet *ifp) { mxge_softc_t *sc = ifp->if_softc; mxge_tx_ring_t *tx; struct mbuf *m; int slice; for (slice = 0; slice < sc->num_slices; slice++) { tx = &sc->ss[slice].tx; mtx_lock(&tx->mtx); while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) m_freem(m); mtx_unlock(&tx->mtx); } if_qflush(ifp); } static inline void mxge_start_locked(struct mxge_slice_state *ss) { mxge_softc_t *sc; struct mbuf *m; struct ifnet *ifp; mxge_tx_ring_t *tx; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { m = drbr_dequeue(ifp, tx->br); if (m == NULL) { return; } /* let BPF see it */ BPF_MTAP(ifp, m); /* give it to the nic */ mxge_encap(ss, m); } /* ran out of transmit slots */ if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) && (!drbr_empty(ifp, tx->br))) { ss->if_drv_flags |= IFF_DRV_OACTIVE; tx->stall++; } } static int mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) { mxge_softc_t *sc; struct ifnet *ifp; mxge_tx_ring_t *tx; int err; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { err = drbr_enqueue(ifp, tx->br, m); return (err); } if (!drbr_needs_enqueue(ifp, tx->br) && ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { /* let BPF see it */ BPF_MTAP(ifp, m); /* give it to the nic */ mxge_encap(ss, m); } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { return (err); } if (!drbr_empty(ifp, tx->br)) mxge_start_locked(ss); return (0); } static int mxge_transmit(struct ifnet *ifp, struct mbuf *m) { mxge_softc_t *sc = ifp->if_softc; struct mxge_slice_state *ss; mxge_tx_ring_t *tx; int err = 0; int slice; slice = m->m_pkthdr.flowid; slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ ss = &sc->ss[slice]; tx = &ss->tx; if (mtx_trylock(&tx->mtx)) { err = mxge_transmit_locked(ss, m); mtx_unlock(&tx->mtx); } else { err = drbr_enqueue(ifp, tx->br, m); } return (err); } #else static inline void mxge_start_locked(struct mxge_slice_state *ss) { mxge_softc_t *sc; struct mbuf *m; struct ifnet *ifp; mxge_tx_ring_t *tx; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { return; } /* let BPF see it */ BPF_MTAP(ifp, m); /* give it to the nic */ mxge_encap(ss, m); } /* ran out of transmit slots */ if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; tx->stall++; } } #endif static void mxge_start(struct ifnet *ifp) { mxge_softc_t *sc = ifp->if_softc; struct mxge_slice_state *ss; /* only use the first slice for now */ ss = &sc->ss[0]; mtx_lock(&ss->tx.mtx); mxge_start_locked(ss); mtx_unlock(&ss->tx.mtx); } /* * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy * at most 32 bytes at a time, so as to avoid involving the software * pio handler in the nic. We re-write the first segment's low * DMA address to mark it valid only after we write the entire chunk * in a burst */ static inline void mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) { uint32_t low; low = src->addr_low; src->addr_low = 0xffffffff; mxge_pio_copy(dst, src, 4 * sizeof (*src)); wmb(); mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); wmb(); src->addr_low = low; dst->addr_low = low; wmb(); } static int mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) { bus_dma_segment_t seg; struct mbuf *m; mxge_rx_ring_t *rx = &ss->rx_small; int cnt, err; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { rx->alloc_fail++; err = ENOBUFS; goto done; } m->m_len = MHLEN; err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, &seg, &cnt, BUS_DMA_NOWAIT); if (err != 0) { m_free(m); goto done; } rx->info[idx].m = m; rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); done: if ((idx & 7) == 7) mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); return err; } static int mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) { bus_dma_segment_t seg[3]; struct mbuf *m; mxge_rx_ring_t *rx = &ss->rx_big; int cnt, err, i; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); if (m == NULL) { rx->alloc_fail++; err = ENOBUFS; goto done; } m->m_len = rx->mlen; err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, seg, &cnt, BUS_DMA_NOWAIT); if (err != 0) { m_free(m); goto done; } rx->info[idx].m = m; rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); #if MXGE_VIRT_JUMBOS for (i = 1; i < cnt; i++) { rx->shadow[idx + i].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); rx->shadow[idx + i].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); } #endif done: for (i = 0; i < rx->nbufs; i++) { if ((idx & 7) == 7) { mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); } idx++; } return err; } #ifdef INET6 static uint16_t mxge_csum_generic(uint16_t *raw, int len) { uint32_t csum; csum = 0; while (len > 0) { csum += *raw; raw++; len -= 2; } csum = (csum >> 16) + (csum & 0xffff); csum = (csum >> 16) + (csum & 0xffff); return (uint16_t)csum; } static inline uint16_t mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) { uint32_t partial; int nxt, cksum_offset; struct ip6_hdr *ip6 = p; uint16_t c; nxt = ip6->ip6_nxt; cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, IPPROTO_IPV6, &nxt); if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) return (1); } /* * IPv6 headers do not contain a checksum, and hence * do not checksum to zero, so they don't "fall out" * of the partial checksum calculation like IPv4 * headers do. We need to fix the partial checksum by * subtracting the checksum of the IPv6 header. */ partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - ETHER_HDR_LEN); csum += ~partial; csum += (csum < ~partial); csum = (csum >> 16) + (csum & 0xFFFF); csum = (csum >> 16) + (csum & 0xFFFF); c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, csum); c ^= 0xffff; return (c); } #endif /* INET6 */ /* * Myri10GE hardware checksums are not valid if the sender * padded the frame with non-zero padding. This is because * the firmware just does a simple 16-bit 1s complement * checksum across the entire frame, excluding the first 14 * bytes. It is best to simply to check the checksum and * tell the stack about it only if the checksum is good */ static inline uint16_t mxge_rx_csum(struct mbuf *m, int csum) { struct ether_header *eh; #ifdef INET struct ip *ip; #endif #if defined(INET) || defined(INET6) int cap = m->m_pkthdr.rcvif->if_capenable; #endif uint16_t c, etype; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); switch (etype) { #ifdef INET case ETHERTYPE_IP: if ((cap & IFCAP_RXCSUM) == 0) return (1); ip = (struct ip *)(eh + 1); if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) return (1); c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(ntohs(csum) + ntohs(ip->ip_len) - (ip->ip_hl << 2) + ip->ip_p)); c ^= 0xffff; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: if ((cap & IFCAP_RXCSUM_IPV6) == 0) return (1); c = mxge_rx_csum6((eh + 1), m, csum); break; #endif default: c = 1; } return (c); } static void mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) { struct ether_vlan_header *evl; struct ether_header *eh; uint32_t partial; evl = mtod(m, struct ether_vlan_header *); eh = mtod(m, struct ether_header *); /* * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes * after what the firmware thought was the end of the ethernet * header. */ /* put checksum into host byte order */ *csum = ntohs(*csum); partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); (*csum) += ~partial; (*csum) += ((*csum) < ~partial); (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); /* restore checksum to network byte order; later consumers expect this */ *csum = htons(*csum); /* save the tag */ #ifdef MXGE_NEW_VLAN_API m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); #else { struct m_tag *mtag; mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), M_NOWAIT); if (mtag == NULL) return; VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); m_tag_prepend(m, mtag); } #endif m->m_flags |= M_VLANTAG; /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); } static inline void mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, int lro) { mxge_softc_t *sc; struct ifnet *ifp; struct mbuf *m; struct ether_header *eh; mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; sc = ss->sc; ifp = sc->ifp; rx = &ss->rx_big; idx = rx->cnt & rx->mask; rx->cnt += rx->nbufs; /* save a pointer to the received mbuf */ m = rx->info[idx].m; /* try to replace the received mbuf */ if (mxge_get_buf_big(ss, rx->extra_map, idx)) { /* drop the frame -- the old mbuf is re-cycled */ ifp->if_ierrors++; return; } /* unmap the received buffer */ old_map = rx->info[idx].map; bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rx->dmat, old_map); /* swap the bus_dmamap_t's */ rx->info[idx].map = rx->extra_map; rx->extra_map = old_map; /* mcp implicitly skips 1st 2 bytes so that packet is properly * aligned */ m->m_data += MXGEFW_PAD; m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && (0 == mxge_rx_csum(m, csum))) { /* Tell the stack that the checksum is good */ m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; #if defined(INET) || defined (INET6) if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) return; #endif } /* flowid only valid if RSS hashing is enabled */ if (sc->num_slices > 1) { m->m_pkthdr.flowid = (ss - sc->ss); m->m_flags |= M_FLOWID; } /* pass the frame up the stack */ (*ifp->if_input)(ifp, m); } static inline void mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, int lro) { mxge_softc_t *sc; struct ifnet *ifp; struct ether_header *eh; struct mbuf *m; mxge_rx_ring_t *rx; bus_dmamap_t old_map; int idx; sc = ss->sc; ifp = sc->ifp; rx = &ss->rx_small; idx = rx->cnt & rx->mask; rx->cnt++; /* save a pointer to the received mbuf */ m = rx->info[idx].m; /* try to replace the received mbuf */ if (mxge_get_buf_small(ss, rx->extra_map, idx)) { /* drop the frame -- the old mbuf is re-cycled */ ifp->if_ierrors++; return; } /* unmap the received buffer */ old_map = rx->info[idx].map; bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rx->dmat, old_map); /* swap the bus_dmamap_t's */ rx->info[idx].map = rx->extra_map; rx->extra_map = old_map; /* mcp implicitly skips 1st 2 bytes so that packet is properly * aligned */ m->m_data += MXGEFW_PAD; m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { mxge_vlan_tag_remove(m, &csum); } /* if the checksum is valid, mark it in the mbuf header */ if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && (0 == mxge_rx_csum(m, csum))) { /* Tell the stack that the checksum is good */ m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; #if defined(INET) || defined (INET6) if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) return; #endif } /* flowid only valid if RSS hashing is enabled */ if (sc->num_slices > 1) { m->m_pkthdr.flowid = (ss - sc->ss); m->m_flags |= M_FLOWID; } /* pass the frame up the stack */ (*ifp->if_input)(ifp, m); } static inline void mxge_clean_rx_done(struct mxge_slice_state *ss) { mxge_rx_done_t *rx_done = &ss->rx_done; int limit = 0; uint16_t length; uint16_t checksum; int lro; lro = ss->sc->ifp->if_capenable & IFCAP_LRO; while (rx_done->entry[rx_done->idx].length != 0) { length = ntohs(rx_done->entry[rx_done->idx].length); rx_done->entry[rx_done->idx].length = 0; checksum = rx_done->entry[rx_done->idx].checksum; if (length <= (MHLEN - MXGEFW_PAD)) mxge_rx_done_small(ss, length, checksum, lro); else mxge_rx_done_big(ss, length, checksum, lro); rx_done->cnt++; rx_done->idx = rx_done->cnt & rx_done->mask; /* limit potential for livelock */ if (__predict_false(++limit > rx_done->mask / 2)) break; } #if defined(INET) || defined (INET6) while (!SLIST_EMPTY(&ss->lc.lro_active)) { struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); tcp_lro_flush(&ss->lc, lro); } #endif } static inline void mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) { struct ifnet *ifp; mxge_tx_ring_t *tx; struct mbuf *m; bus_dmamap_t map; int idx; int *flags; tx = &ss->tx; ifp = ss->sc->ifp; while (tx->pkt_done != mcp_idx) { idx = tx->done & tx->mask; tx->done++; m = tx->info[idx].m; /* mbuf and DMA map only attached to the first segment per-mbuf */ if (m != NULL) { ss->obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) ss->omcasts++; ss->opackets++; tx->info[idx].m = NULL; map = tx->info[idx].map; bus_dmamap_unload(tx->dmat, map); m_freem(m); } if (tx->info[idx].flag) { tx->info[idx].flag = 0; tx->pkt_done++; } } /* If we have space, clear IFF_OACTIVE to tell the stack that its OK to send packets */ #ifdef IFNET_BUF_RING flags = &ss->if_drv_flags; #else flags = &ifp->if_drv_flags; #endif mtx_lock(&ss->tx.mtx); if ((*flags) & IFF_DRV_OACTIVE && tx->req - tx->done < (tx->mask + 1)/4) { *(flags) &= ~IFF_DRV_OACTIVE; ss->tx.wake++; mxge_start_locked(ss); } #ifdef IFNET_BUF_RING if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { /* let the NIC stop polling this queue, since there * are no more transmits pending */ if (tx->req == tx->done) { *tx->send_stop = 1; tx->queue_active = 0; tx->deactivate++; wmb(); } } #endif mtx_unlock(&ss->tx.mtx); } static struct mxge_media_type mxge_xfp_media_types[] = { {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, {0, (1 << 5), "10GBASE-ER"}, {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, {0, (1 << 3), "10GBASE-SW"}, {0, (1 << 2), "10GBASE-LW"}, {0, (1 << 1), "10GBASE-EW"}, {0, (1 << 0), "Reserved"} }; static struct mxge_media_type mxge_sfp_media_types[] = { {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, {0, (1 << 7), "Reserved"}, {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} }; static void mxge_media_set(mxge_softc_t *sc, int media_type) { ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); sc->current_media = media_type; sc->media.ifm_media = sc->media.ifm_cur->ifm_media; } static void mxge_media_init(mxge_softc_t *sc) { char *ptr; int i; ifmedia_removeall(&sc->media); mxge_media_set(sc, IFM_AUTO); /* * parse the product code to deterimine the interface type * (CX4, XFP, Quad Ribbon Fiber) by looking at the character * after the 3rd dash in the driver's cached copy of the * EEPROM's product code string. */ ptr = sc->product_code_string; if (ptr == NULL) { device_printf(sc->dev, "Missing product code\n"); return; } for (i = 0; i < 3; i++, ptr++) { ptr = index(ptr, '-'); if (ptr == NULL) { device_printf(sc->dev, "only %d dashes in PC?!?\n", i); return; } } if (*ptr == 'C' || *(ptr +1) == 'C') { /* -C is CX4 */ sc->connector = MXGE_CX4; mxge_media_set(sc, IFM_10G_CX4); } else if (*ptr == 'Q') { /* -Q is Quad Ribbon Fiber */ sc->connector = MXGE_QRF; device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); /* FreeBSD has no media type for Quad ribbon fiber */ } else if (*ptr == 'R') { /* -R is XFP */ sc->connector = MXGE_XFP; } else if (*ptr == 'S' || *(ptr +1) == 'S') { /* -S or -2S is SFP+ */ sc->connector = MXGE_SFP; } else { device_printf(sc->dev, "Unknown media type: %c\n", *ptr); } } /* * Determine the media type for a NIC. Some XFPs will identify * themselves only when their link is up, so this is initiated via a * link up interrupt. However, this can potentially take up to * several milliseconds, so it is run via the watchdog routine, rather * than in the interrupt handler itself. */ static void mxge_media_probe(mxge_softc_t *sc) { mxge_cmd_t cmd; char *cage_type; struct mxge_media_type *mxge_media_types = NULL; int i, err, ms, mxge_media_type_entries; uint32_t byte; sc->need_media_probe = 0; if (sc->connector == MXGE_XFP) { /* -R is XFP */ mxge_media_types = mxge_xfp_media_types; mxge_media_type_entries = sizeof (mxge_xfp_media_types) / sizeof (mxge_xfp_media_types[0]); byte = MXGE_XFP_COMPLIANCE_BYTE; cage_type = "XFP"; } else if (sc->connector == MXGE_SFP) { /* -S or -2S is SFP+ */ mxge_media_types = mxge_sfp_media_types; mxge_media_type_entries = sizeof (mxge_sfp_media_types) / sizeof (mxge_sfp_media_types[0]); cage_type = "SFP+"; byte = 3; } else { /* nothing to do; media type cannot change */ return; } /* * At this point we know the NIC has an XFP cage, so now we * try to determine what is in the cage by using the * firmware's XFP I2C commands to read the XFP 10GbE compilance * register. We read just one byte, which may take over * a millisecond */ cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ cmd.data1 = byte; err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { device_printf(sc->dev, "failed to read XFP\n"); } if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); } if (err != MXGEFW_CMD_OK) { return; } /* now we wait for the data to be cached */ cmd.data0 = byte; err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { DELAY(1000); cmd.data0 = byte; err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); } if (err != MXGEFW_CMD_OK) { device_printf(sc->dev, "failed to read %s (%d, %dms)\n", cage_type, err, ms); return; } if (cmd.data0 == mxge_media_types[0].bitmask) { if (mxge_verbose) device_printf(sc->dev, "%s:%s\n", cage_type, mxge_media_types[0].name); if (sc->current_media != mxge_media_types[0].flag) { mxge_media_init(sc); mxge_media_set(sc, mxge_media_types[0].flag); } return; } for (i = 1; i < mxge_media_type_entries; i++) { if (cmd.data0 & mxge_media_types[i].bitmask) { if (mxge_verbose) device_printf(sc->dev, "%s:%s\n", cage_type, mxge_media_types[i].name); if (sc->current_media != mxge_media_types[i].flag) { mxge_media_init(sc); mxge_media_set(sc, mxge_media_types[i].flag); } return; } } if (mxge_verbose) device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, cmd.data0); return; } static void mxge_intr(void *arg) { struct mxge_slice_state *ss = arg; mxge_softc_t *sc = ss->sc; mcp_irq_data_t *stats = ss->fw_stats; mxge_tx_ring_t *tx = &ss->tx; mxge_rx_done_t *rx_done = &ss->rx_done; uint32_t send_done_count; uint8_t valid; #ifndef IFNET_BUF_RING /* an interrupt on a non-zero slice is implicitly valid since MSI-X irqs are not shared */ if (ss != sc->ss) { mxge_clean_rx_done(ss); *ss->irq_claim = be32toh(3); return; } #endif /* make sure the DMA has finished */ if (!stats->valid) { return; } valid = stats->valid; if (sc->legacy_irq) { /* lower legacy IRQ */ *sc->irq_deassert = 0; if (!mxge_deassert_wait) /* don't wait for conf. that irq is low */ stats->valid = 0; } else { stats->valid = 0; } /* loop while waiting for legacy irq deassertion */ do { /* check for transmit completes and receives */ send_done_count = be32toh(stats->send_done_count); while ((send_done_count != tx->pkt_done) || (rx_done->entry[rx_done->idx].length != 0)) { if (send_done_count != tx->pkt_done) mxge_tx_done(ss, (int)send_done_count); mxge_clean_rx_done(ss); send_done_count = be32toh(stats->send_done_count); } if (sc->legacy_irq && mxge_deassert_wait) wmb(); } while (*((volatile uint8_t *) &stats->valid)); /* fw link & error stats meaningful only on the first slice */ if (__predict_false((ss == sc->ss) && stats->stats_updated)) { if (sc->link_state != stats->link_up) { sc->link_state = stats->link_up; if (sc->link_state) { if_link_state_change(sc->ifp, LINK_STATE_UP); sc->ifp->if_baudrate = IF_Gbps(10UL); if (mxge_verbose) device_printf(sc->dev, "link up\n"); } else { if_link_state_change(sc->ifp, LINK_STATE_DOWN); sc->ifp->if_baudrate = 0; if (mxge_verbose) device_printf(sc->dev, "link down\n"); } sc->need_media_probe = 1; } if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { sc->rdma_tags_available = be32toh(stats->rdma_tags_available); device_printf(sc->dev, "RDMA timed out! %d tags " "left\n", sc->rdma_tags_available); } if (stats->link_down) { sc->down_cnt += stats->link_down; sc->link_state = 0; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } } /* check to see if we have rx token to pass back */ if (valid & 0x1) *ss->irq_claim = be32toh(3); *(ss->irq_claim + 1) = be32toh(3); } static void mxge_init(void *arg) { mxge_softc_t *sc = arg; struct ifnet *ifp = sc->ifp; mtx_lock(&sc->driver_mtx); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) (void) mxge_open(sc); mtx_unlock(&sc->driver_mtx); } static void mxge_free_slice_mbufs(struct mxge_slice_state *ss) { int i; #if defined(INET) || defined(INET6) tcp_lro_free(&ss->lc); #endif for (i = 0; i <= ss->rx_big.mask; i++) { if (ss->rx_big.info[i].m == NULL) continue; bus_dmamap_unload(ss->rx_big.dmat, ss->rx_big.info[i].map); m_freem(ss->rx_big.info[i].m); ss->rx_big.info[i].m = NULL; } for (i = 0; i <= ss->rx_small.mask; i++) { if (ss->rx_small.info[i].m == NULL) continue; bus_dmamap_unload(ss->rx_small.dmat, ss->rx_small.info[i].map); m_freem(ss->rx_small.info[i].m); ss->rx_small.info[i].m = NULL; } /* transmit ring used only on the first slice */ if (ss->tx.info == NULL) return; for (i = 0; i <= ss->tx.mask; i++) { ss->tx.info[i].flag = 0; if (ss->tx.info[i].m == NULL) continue; bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); m_freem(ss->tx.info[i].m); ss->tx.info[i].m = NULL; } } static void mxge_free_mbufs(mxge_softc_t *sc) { int slice; for (slice = 0; slice < sc->num_slices; slice++) mxge_free_slice_mbufs(&sc->ss[slice]); } static void mxge_free_slice_rings(struct mxge_slice_state *ss) { int i; if (ss->rx_done.entry != NULL) mxge_dma_free(&ss->rx_done.dma); ss->rx_done.entry = NULL; if (ss->tx.req_bytes != NULL) free(ss->tx.req_bytes, M_DEVBUF); ss->tx.req_bytes = NULL; if (ss->tx.seg_list != NULL) free(ss->tx.seg_list, M_DEVBUF); ss->tx.seg_list = NULL; if (ss->rx_small.shadow != NULL) free(ss->rx_small.shadow, M_DEVBUF); ss->rx_small.shadow = NULL; if (ss->rx_big.shadow != NULL) free(ss->rx_big.shadow, M_DEVBUF); ss->rx_big.shadow = NULL; if (ss->tx.info != NULL) { if (ss->tx.dmat != NULL) { for (i = 0; i <= ss->tx.mask; i++) { bus_dmamap_destroy(ss->tx.dmat, ss->tx.info[i].map); } bus_dma_tag_destroy(ss->tx.dmat); } free(ss->tx.info, M_DEVBUF); } ss->tx.info = NULL; if (ss->rx_small.info != NULL) { if (ss->rx_small.dmat != NULL) { for (i = 0; i <= ss->rx_small.mask; i++) { bus_dmamap_destroy(ss->rx_small.dmat, ss->rx_small.info[i].map); } bus_dmamap_destroy(ss->rx_small.dmat, ss->rx_small.extra_map); bus_dma_tag_destroy(ss->rx_small.dmat); } free(ss->rx_small.info, M_DEVBUF); } ss->rx_small.info = NULL; if (ss->rx_big.info != NULL) { if (ss->rx_big.dmat != NULL) { for (i = 0; i <= ss->rx_big.mask; i++) { bus_dmamap_destroy(ss->rx_big.dmat, ss->rx_big.info[i].map); } bus_dmamap_destroy(ss->rx_big.dmat, ss->rx_big.extra_map); bus_dma_tag_destroy(ss->rx_big.dmat); } free(ss->rx_big.info, M_DEVBUF); } ss->rx_big.info = NULL; } static void mxge_free_rings(mxge_softc_t *sc) { int slice; for (slice = 0; slice < sc->num_slices; slice++) mxge_free_slice_rings(&sc->ss[slice]); } static int mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, int tx_ring_entries) { mxge_softc_t *sc = ss->sc; size_t bytes; int err, i; /* allocate per-slice receive resources */ ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; ss->rx_done.mask = (2 * rx_ring_entries) - 1; /* allocate the rx shadow rings */ bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); /* allocate the rx host info rings */ bytes = rx_ring_entries * sizeof (*ss->rx_small.info); ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); bytes = rx_ring_entries * sizeof (*ss->rx_big.info); ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); /* allocate the rx busdma resources */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ 4096, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ MHLEN, /* maxsize */ 1, /* num segs */ MHLEN, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ &ss->rx_small.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating rx_small dmat\n", err); return err; } err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ #if MXGE_VIRT_JUMBOS 4096, /* boundary */ #else 0, /* boundary */ #endif BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 3*4096, /* maxsize */ #if MXGE_VIRT_JUMBOS 3, /* num segs */ 4096, /* maxsegsize*/ #else 1, /* num segs */ MJUM9BYTES, /* maxsegsize*/ #endif BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ &ss->rx_big.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating rx_big dmat\n", err); return err; } for (i = 0; i <= ss->rx_small.mask; i++) { err = bus_dmamap_create(ss->rx_small.dmat, 0, &ss->rx_small.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d rx_small dmamap\n", err); return err; } } err = bus_dmamap_create(ss->rx_small.dmat, 0, &ss->rx_small.extra_map); if (err != 0) { device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); return err; } for (i = 0; i <= ss->rx_big.mask; i++) { err = bus_dmamap_create(ss->rx_big.dmat, 0, &ss->rx_big.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d rx_big dmamap\n", err); return err; } } err = bus_dmamap_create(ss->rx_big.dmat, 0, &ss->rx_big.extra_map); if (err != 0) { device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); return err; } /* now allocate TX resouces */ #ifndef IFNET_BUF_RING /* only use a single TX ring for now */ if (ss != ss->sc->ss) return 0; #endif ss->tx.mask = tx_ring_entries - 1; ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); /* allocate the tx request copy block */ bytes = 8 + sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); /* ensure req_list entries are aligned to 8 bytes */ ss->tx.req_list = (mcp_kreq_ether_send_t *) ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); /* allocate the tx busdma segment list */ bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; ss->tx.seg_list = (bus_dma_segment_t *) malloc(bytes, M_DEVBUF, M_WAITOK); /* allocate the tx host info ring */ bytes = tx_ring_entries * sizeof (*ss->tx.info); ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); /* allocate the tx busdma resources */ err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 1, /* alignment */ sc->tx_boundary, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 65536 + 256, /* maxsize */ ss->tx.max_desc - 2, /* num segs */ sc->tx_boundary, /* maxsegsz */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lock */ &ss->tx.dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating tx dmat\n", err); return err; } /* now use these tags to setup dmamaps for each slot in the ring */ for (i = 0; i <= ss->tx.mask; i++) { err = bus_dmamap_create(ss->tx.dmat, 0, &ss->tx.info[i].map); if (err != 0) { device_printf(sc->dev, "Err %d tx dmamap\n", err); return err; } } return 0; } static int mxge_alloc_rings(mxge_softc_t *sc) { mxge_cmd_t cmd; int tx_ring_size; int tx_ring_entries, rx_ring_entries; int err, slice; /* get ring sizes */ err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); tx_ring_size = cmd.data0; if (err != 0) { device_printf(sc->dev, "Cannot determine tx ring sizes\n"); goto abort; } tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; IFQ_SET_READY(&sc->ifp->if_snd); for (slice = 0; slice < sc->num_slices; slice++) { err = mxge_alloc_slice_rings(&sc->ss[slice], rx_ring_entries, tx_ring_entries); if (err != 0) goto abort; } return 0; abort: mxge_free_rings(sc); return err; } static void mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) { int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; if (bufsize < MCLBYTES) { /* easy, everything fits in a single buffer */ *big_buf_size = MCLBYTES; *cl_size = MCLBYTES; *nbufs = 1; return; } if (bufsize < MJUMPAGESIZE) { /* still easy, everything still fits in a single buffer */ *big_buf_size = MJUMPAGESIZE; *cl_size = MJUMPAGESIZE; *nbufs = 1; return; } #if MXGE_VIRT_JUMBOS /* now we need to use virtually contiguous buffers */ *cl_size = MJUM9BYTES; *big_buf_size = 4096; *nbufs = mtu / 4096 + 1; /* needs to be a power of two, so round up */ if (*nbufs == 3) *nbufs = 4; #else *cl_size = MJUM9BYTES; *big_buf_size = MJUM9BYTES; *nbufs = 1; #endif } static int mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) { mxge_softc_t *sc; mxge_cmd_t cmd; bus_dmamap_t map; int err, i, slice; sc = ss->sc; slice = ss - sc->ss; #if defined(INET) || defined(INET6) (void)tcp_lro_init(&ss->lc); #endif ss->lc.ifp = sc->ifp; /* get the lanai pointers to the send and receive rings */ err = 0; #ifndef IFNET_BUF_RING /* We currently only send from the first slice */ if (slice == 0) { #endif cmd.data0 = slice; err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); ss->tx.send_go = (volatile uint32_t *) (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); ss->tx.send_stop = (volatile uint32_t *) (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); #ifndef IFNET_BUF_RING } #endif cmd.data0 = slice; err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); ss->rx_small.lanai = (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); cmd.data0 = slice; err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); ss->rx_big.lanai = (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); if (err != 0) { device_printf(sc->dev, "failed to get ring sizes or locations\n"); return EIO; } /* stock receive rings */ for (i = 0; i <= ss->rx_small.mask; i++) { map = ss->rx_small.info[i].map; err = mxge_get_buf_small(ss, map, i); if (err) { device_printf(sc->dev, "alloced %d/%d smalls\n", i, ss->rx_small.mask + 1); return ENOMEM; } } for (i = 0; i <= ss->rx_big.mask; i++) { ss->rx_big.shadow[i].addr_low = 0xffffffff; ss->rx_big.shadow[i].addr_high = 0xffffffff; } ss->rx_big.nbufs = nbufs; ss->rx_big.cl_size = cl_size; ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { map = ss->rx_big.info[i].map; err = mxge_get_buf_big(ss, map, i); if (err) { device_printf(sc->dev, "alloced %d/%d bigs\n", i, ss->rx_big.mask + 1); return ENOMEM; } } return 0; } static int mxge_open(mxge_softc_t *sc) { mxge_cmd_t cmd; int err, big_bytes, nbufs, slice, cl_size, i; bus_addr_t bus; volatile uint8_t *itable; struct mxge_slice_state *ss; /* Copy the MAC address in case it was overridden */ bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); err = mxge_reset(sc, 1); if (err != 0) { device_printf(sc->dev, "failed to reset\n"); return EIO; } if (sc->num_slices > 1) { /* setup the indirection table */ cmd.data0 = sc->num_slices; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); if (err != 0) { device_printf(sc->dev, "failed to setup rss tables\n"); return err; } /* just enable an identity mapping */ itable = sc->sram + cmd.data0; for (i = 0; i < sc->num_slices; i++) itable[i] = (uint8_t)i; cmd.data0 = 1; cmd.data1 = mxge_rss_hash_type; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); if (err != 0) { device_printf(sc->dev, "failed to enable slices\n"); return err; } } mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); cmd.data0 = nbufs; err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); /* error is only meaningful if we're trying to set MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ if (err && nbufs > 1) { device_printf(sc->dev, "Failed to set alway-use-n to %d\n", nbufs); return EIO; } /* Give the firmware the mtu and the big and small buffer sizes. The firmware wants the big buf size to be a power of two. Luckily, FreeBSD's clusters are powers of two */ cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); cmd.data0 = MHLEN - MXGEFW_PAD; err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); cmd.data0 = big_bytes; err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); if (err != 0) { device_printf(sc->dev, "failed to setup params\n"); goto abort; } /* Now give him the pointer to the stats block */ for (slice = 0; #ifdef IFNET_BUF_RING slice < sc->num_slices; #else slice < 1; #endif slice++) { ss = &sc->ss[slice]; cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); cmd.data2 = sizeof(struct mcp_irq_data); cmd.data2 |= (slice << 16); err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); } if (err != 0) { bus = sc->ss->fw_stats_dma.bus_addr; bus += offsetof(struct mcp_irq_data, send_done_count); cmd.data0 = MXGE_LOWPART_TO_U32(bus); cmd.data1 = MXGE_HIGHPART_TO_U32(bus); err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); /* Firmware cannot support multicast without STATS_DMA_V2 */ sc->fw_multicast_support = 0; } else { sc->fw_multicast_support = 1; } if (err != 0) { device_printf(sc->dev, "failed to setup params\n"); goto abort; } for (slice = 0; slice < sc->num_slices; slice++) { err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); if (err != 0) { device_printf(sc->dev, "couldn't open slice %d\n", slice); goto abort; } } /* Finally, start the firmware running */ err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); if (err) { device_printf(sc->dev, "Couldn't bring up link\n"); goto abort; } #ifdef IFNET_BUF_RING for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ss->if_drv_flags |= IFF_DRV_RUNNING; ss->if_drv_flags &= ~IFF_DRV_OACTIVE; } #endif sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return 0; abort: mxge_free_mbufs(sc); return err; } static int mxge_close(mxge_softc_t *sc, int down) { mxge_cmd_t cmd; int err, old_down_cnt; #ifdef IFNET_BUF_RING struct mxge_slice_state *ss; int slice; #endif #ifdef IFNET_BUF_RING for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ss->if_drv_flags &= ~IFF_DRV_RUNNING; } #endif sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (!down) { old_down_cnt = sc->down_cnt; wmb(); err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); if (err) { device_printf(sc->dev, "Couldn't bring down link\n"); } if (old_down_cnt == sc->down_cnt) { /* wait for down irq */ DELAY(10 * sc->intr_coal_delay); } wmb(); if (old_down_cnt == sc->down_cnt) { device_printf(sc->dev, "never got down irq\n"); } } mxge_free_mbufs(sc); return 0; } static void mxge_setup_cfg_space(mxge_softc_t *sc) { device_t dev = sc->dev; int reg; uint16_t lnk, pectl; /* find the PCIe link width and set max read request to 4KB*/ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { lnk = pci_read_config(dev, reg + 0x12, 2); sc->link_width = (lnk >> 4) & 0x3f; if (sc->pectl == 0) { pectl = pci_read_config(dev, reg + 0x8, 2); pectl = (pectl & ~0x7000) | (5 << 12); pci_write_config(dev, reg + 0x8, pectl, 2); sc->pectl = pectl; } else { /* restore saved pectl after watchdog reset */ pci_write_config(dev, reg + 0x8, sc->pectl, 2); } } /* Enable DMA and Memory space access */ pci_enable_busmaster(dev); } static uint32_t mxge_read_reboot(mxge_softc_t *sc) { device_t dev = sc->dev; uint32_t vs; /* find the vendor specific offset */ if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { device_printf(sc->dev, "could not find vendor specific offset\n"); return (uint32_t)-1; } /* enable read32 mode */ pci_write_config(dev, vs + 0x10, 0x3, 1); /* tell NIC which register to read */ pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); return (pci_read_config(dev, vs + 0x14, 4)); } static void mxge_watchdog_reset(mxge_softc_t *sc) { struct pci_devinfo *dinfo; struct mxge_slice_state *ss; int err, running, s, num_tx_slices = 1; uint32_t reboot; uint16_t cmd; err = ENXIO; device_printf(sc->dev, "Watchdog reset!\n"); /* * check to see if the NIC rebooted. If it did, then all of * PCI config space has been reset, and things like the * busmaster bit will be zero. If this is the case, then we * must restore PCI config space before the NIC can be used * again */ cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); if (cmd == 0xffff) { /* * maybe the watchdog caught the NIC rebooting; wait * up to 100ms for it to finish. If it does not come * back, then give up */ DELAY(1000*100); cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); if (cmd == 0xffff) { device_printf(sc->dev, "NIC disappeared!\n"); } } if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { /* print the reboot status */ reboot = mxge_read_reboot(sc); device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", reboot); running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; if (running) { /* * quiesce NIC so that TX routines will not try to * xmit after restoration of BAR */ /* Mark the link as down */ if (sc->link_state) { sc->link_state = 0; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } #ifdef IFNET_BUF_RING num_tx_slices = sc->num_slices; #endif /* grab all TX locks to ensure no tx */ for (s = 0; s < num_tx_slices; s++) { ss = &sc->ss[s]; mtx_lock(&ss->tx.mtx); } mxge_close(sc, 1); } /* restore PCI configuration space */ dinfo = device_get_ivars(sc->dev); pci_cfg_restore(sc->dev, dinfo); /* and redo any changes we made to our config space */ mxge_setup_cfg_space(sc); /* reload f/w */ err = mxge_load_firmware(sc, 0); if (err) { device_printf(sc->dev, "Unable to re-load f/w\n"); } if (running) { if (!err) err = mxge_open(sc); /* release all TX locks */ for (s = 0; s < num_tx_slices; s++) { ss = &sc->ss[s]; #ifdef IFNET_BUF_RING mxge_start_locked(ss); #endif mtx_unlock(&ss->tx.mtx); } } sc->watchdog_resets++; } else { device_printf(sc->dev, "NIC did not reboot, not resetting\n"); err = 0; } if (err) { device_printf(sc->dev, "watchdog reset failed\n"); } else { if (sc->dying == 2) sc->dying = 0; callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); } } static void mxge_watchdog_task(void *arg, int pending) { mxge_softc_t *sc = arg; mtx_lock(&sc->driver_mtx); mxge_watchdog_reset(sc); mtx_unlock(&sc->driver_mtx); } static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) { tx = &sc->ss[slice].tx; device_printf(sc->dev, "slice %d struck? ring state:\n", slice); device_printf(sc->dev, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", tx->req, tx->done, tx->queue_active); device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", tx->activate, tx->deactivate); device_printf(sc->dev, "pkt_done=%d fw=%d\n", tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); } static int mxge_watchdog(mxge_softc_t *sc) { mxge_tx_ring_t *tx; uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); int i, err = 0; /* see if we have outstanding transmits, which have been pending for more than mxge_ticks */ for (i = 0; #ifdef IFNET_BUF_RING (i < sc->num_slices) && (err == 0); #else (i < 1) && (err == 0); #endif i++) { tx = &sc->ss[i].tx; if (tx->req != tx->done && tx->watchdog_req != tx->watchdog_done && tx->done == tx->watchdog_done) { /* check for pause blocking before resetting */ if (tx->watchdog_rx_pause == rx_pause) { mxge_warn_stuck(sc, tx, i); taskqueue_enqueue(sc->tq, &sc->watchdog_task); return (ENXIO); } else device_printf(sc->dev, "Flow control blocking " "xmits, check link partner\n"); } tx->watchdog_req = tx->req; tx->watchdog_done = tx->done; tx->watchdog_rx_pause = rx_pause; } if (sc->need_media_probe) mxge_media_probe(sc); return (err); } static u_long mxge_update_stats(mxge_softc_t *sc) { struct mxge_slice_state *ss; u_long pkts = 0; u_long ipackets = 0; u_long opackets = 0; #ifdef IFNET_BUF_RING u_long obytes = 0; u_long omcasts = 0; u_long odrops = 0; #endif u_long oerrors = 0; int slice; for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ipackets += ss->ipackets; opackets += ss->opackets; #ifdef IFNET_BUF_RING obytes += ss->obytes; omcasts += ss->omcasts; odrops += ss->tx.br->br_drops; #endif oerrors += ss->oerrors; } pkts = (ipackets - sc->ifp->if_ipackets); pkts += (opackets - sc->ifp->if_opackets); sc->ifp->if_ipackets = ipackets; sc->ifp->if_opackets = opackets; #ifdef IFNET_BUF_RING sc->ifp->if_obytes = obytes; sc->ifp->if_omcasts = omcasts; sc->ifp->if_snd.ifq_drops = odrops; #endif sc->ifp->if_oerrors = oerrors; return pkts; } static void mxge_tick(void *arg) { mxge_softc_t *sc = arg; u_long pkts = 0; int err = 0; int running, ticks; uint16_t cmd; ticks = mxge_ticks; running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; if (running) { /* aggregate stats from different slices */ pkts = mxge_update_stats(sc); if (!sc->watchdog_countdown) { err = mxge_watchdog(sc); sc->watchdog_countdown = 4; } sc->watchdog_countdown--; } if (pkts == 0) { /* ensure NIC did not suffer h/w fault while idle */ cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { sc->dying = 2; taskqueue_enqueue(sc->tq, &sc->watchdog_task); err = ENXIO; } /* look less often if NIC is idle */ ticks *= 4; } if (err == 0) callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); } static int mxge_media_change(struct ifnet *ifp) { return EINVAL; } static int mxge_change_mtu(mxge_softc_t *sc, int mtu) { struct ifnet *ifp = sc->ifp; int real_mtu, old_mtu; int err = 0; real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; if ((real_mtu > sc->max_mtu) || real_mtu < 60) return EINVAL; mtx_lock(&sc->driver_mtx); old_mtu = ifp->if_mtu; ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mxge_close(sc, 0); err = mxge_open(sc); if (err != 0) { ifp->if_mtu = old_mtu; mxge_close(sc, 0); (void) mxge_open(sc); } } mtx_unlock(&sc->driver_mtx); return err; } static void mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { mxge_softc_t *sc = ifp->if_softc; if (sc == NULL) return; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER | IFM_FDX; ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; ifmr->ifm_active |= sc->current_media; } static int mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { mxge_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int err, mask; err = 0; switch (command) { case SIOCSIFADDR: case SIOCGIFADDR: err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: err = mxge_change_mtu(sc, ifr->ifr_mtu); break; case SIOCSIFFLAGS: mtx_lock(&sc->driver_mtx); if (sc->dying) { mtx_unlock(&sc->driver_mtx); return EINVAL; } if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { err = mxge_open(sc); } else { /* take care of promis can allmulti flag chages */ mxge_change_promisc(sc, ifp->if_flags & IFF_PROMISC); mxge_set_multicast_list(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mxge_close(sc, 0); } } mtx_unlock(&sc->driver_mtx); break; case SIOCADDMULTI: case SIOCDELMULTI: mtx_lock(&sc->driver_mtx); mxge_set_multicast_list(sc); mtx_unlock(&sc->driver_mtx); break; case SIOCSIFCAP: mtx_lock(&sc->driver_mtx); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); } } else if (mask & IFCAP_RXCSUM) { if (IFCAP_RXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_RXCSUM; } else { ifp->if_capenable |= IFCAP_RXCSUM; } } if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } else { printf("mxge requires tx checksum offload" " be enabled to use TSO\n"); err = EINVAL; } } #if IFCAP_TSO6 if (mask & IFCAP_TXCSUM_IPV6) { if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP); } else { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } } else if (mask & IFCAP_RXCSUM_IPV6) { if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; } else { ifp->if_capenable |= IFCAP_RXCSUM_IPV6; } } if (mask & IFCAP_TSO6) { if (IFCAP_TSO6 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO6; } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO6; ifp->if_hwassist |= CSUM_TSO; } else { printf("mxge requires tx checksum offload" " be enabled to use TSO\n"); err = EINVAL; } } #endif /*IFCAP_TSO6 */ if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; mtx_unlock(&sc->driver_mtx); VLAN_CAPABILITIES(ifp); break; case SIOCGIFMEDIA: mtx_lock(&sc->driver_mtx); mxge_media_probe(sc); mtx_unlock(&sc->driver_mtx); err = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->media, command); break; default: err = ENOTTY; } return err; } static void mxge_fetch_tunables(mxge_softc_t *sc) { TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", &mxge_flow_control); TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); TUNABLE_INT_FETCH("hw.mxge.force_firmware", &mxge_force_firmware); TUNABLE_INT_FETCH("hw.mxge.deassert_wait", &mxge_deassert_wait); TUNABLE_INT_FETCH("hw.mxge.verbose", &mxge_verbose); TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); if (bootverbose) mxge_verbose = 1; if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) mxge_intr_coal_delay = 30; if (mxge_ticks == 0) mxge_ticks = hz / 2; sc->pause = mxge_flow_control; if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; } if (mxge_initial_mtu > ETHERMTU_JUMBO || mxge_initial_mtu < ETHER_MIN_LEN) mxge_initial_mtu = ETHERMTU_JUMBO; if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) mxge_throttle = MXGE_MAX_THROTTLE; if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) mxge_throttle = MXGE_MIN_THROTTLE; sc->throttle = mxge_throttle; } static void mxge_free_slices(mxge_softc_t *sc) { struct mxge_slice_state *ss; int i; if (sc->ss == NULL) return; for (i = 0; i < sc->num_slices; i++) { ss = &sc->ss[i]; if (ss->fw_stats != NULL) { mxge_dma_free(&ss->fw_stats_dma); ss->fw_stats = NULL; #ifdef IFNET_BUF_RING if (ss->tx.br != NULL) { drbr_free(ss->tx.br, M_DEVBUF); ss->tx.br = NULL; } #endif mtx_destroy(&ss->tx.mtx); } if (ss->rx_done.entry != NULL) { mxge_dma_free(&ss->rx_done.dma); ss->rx_done.entry = NULL; } } free(sc->ss, M_DEVBUF); sc->ss = NULL; } static int mxge_alloc_slices(mxge_softc_t *sc) { mxge_cmd_t cmd; struct mxge_slice_state *ss; size_t bytes; int err, i, max_intr_slots; err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); if (err != 0) { device_printf(sc->dev, "Cannot determine rx ring size\n"); return err; } sc->rx_ring_size = cmd.data0; max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); bytes = sizeof (*sc->ss) * sc->num_slices; sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->ss == NULL) return (ENOMEM); for (i = 0; i < sc->num_slices; i++) { ss = &sc->ss[i]; ss->sc = sc; /* allocate per-slice rx interrupt queues */ bytes = max_intr_slots * sizeof (*ss->rx_done.entry); err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); if (err != 0) goto abort; ss->rx_done.entry = ss->rx_done.dma.addr; bzero(ss->rx_done.entry, bytes); /* * allocate the per-slice firmware stats; stats * (including tx) are used used only on the first * slice for now */ #ifndef IFNET_BUF_RING if (i > 0) continue; #endif bytes = sizeof (*ss->fw_stats); err = mxge_dma_alloc(sc, &ss->fw_stats_dma, sizeof (*ss->fw_stats), 64); if (err != 0) goto abort; ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), "%s:tx(%d)", device_get_nameunit(sc->dev), i); mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); #ifdef IFNET_BUF_RING ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, &ss->tx.mtx); #endif } return (0); abort: mxge_free_slices(sc); return (ENOMEM); } static void mxge_slice_probe(mxge_softc_t *sc) { mxge_cmd_t cmd; char *old_fw; int msix_cnt, status, max_intr_slots; sc->num_slices = 1; /* * don't enable multiple slices if they are not enabled, * or if this is not an SMP system */ if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) return; /* see how many MSI-X interrupts are available */ msix_cnt = pci_msix_count(sc->dev); if (msix_cnt < 2) return; /* now load the slice aware firmware see what it supports */ old_fw = sc->fw_name; if (old_fw == mxge_fw_aligned) sc->fw_name = mxge_fw_rss_aligned; else sc->fw_name = mxge_fw_rss_unaligned; status = mxge_load_firmware(sc, 0); if (status != 0) { device_printf(sc->dev, "Falling back to a single slice\n"); return; } /* try to send a reset command to the card to see if it is alive */ memset(&cmd, 0, sizeof (cmd)); status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); if (status != 0) { device_printf(sc->dev, "failed reset\n"); goto abort_with_fw; } /* get rx ring size */ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); if (status != 0) { device_printf(sc->dev, "Cannot determine rx ring size\n"); goto abort_with_fw; } max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); /* tell it the size of the interrupt queues */ cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); if (status != 0) { device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); goto abort_with_fw; } /* ask the maximum number of slices it supports */ status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); if (status != 0) { device_printf(sc->dev, "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); goto abort_with_fw; } sc->num_slices = cmd.data0; if (sc->num_slices > msix_cnt) sc->num_slices = msix_cnt; if (mxge_max_slices == -1) { /* cap to number of CPUs in system */ if (sc->num_slices > mp_ncpus) sc->num_slices = mp_ncpus; } else { if (sc->num_slices > mxge_max_slices) sc->num_slices = mxge_max_slices; } /* make sure it is a power of two */ while (sc->num_slices & (sc->num_slices - 1)) sc->num_slices--; if (mxge_verbose) device_printf(sc->dev, "using %d slices\n", sc->num_slices); return; abort_with_fw: sc->fw_name = old_fw; (void) mxge_load_firmware(sc, 0); } static int mxge_add_msix_irqs(mxge_softc_t *sc) { size_t bytes; int count, err, i, rid; rid = PCIR_BAR(2); sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->msix_table_res == NULL) { device_printf(sc->dev, "couldn't alloc MSIX table res\n"); return ENXIO; } count = sc->num_slices; err = pci_alloc_msix(sc->dev, &count); if (err != 0) { device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" "err = %d \n", sc->num_slices, err); goto abort_with_msix_table; } if (count < sc->num_slices) { device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", count, sc->num_slices); device_printf(sc->dev, "Try setting hw.mxge.max_slices to %d\n", count); err = ENOSPC; goto abort_with_msix; } bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); if (sc->msix_irq_res == NULL) { err = ENOMEM; goto abort_with_msix; } for (i = 0; i < sc->num_slices; i++) { rid = i + 1; sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->msix_irq_res[i] == NULL) { device_printf(sc->dev, "couldn't allocate IRQ res" " for message %d\n", i); err = ENXIO; goto abort_with_res; } } bytes = sizeof (*sc->msix_ih) * sc->num_slices; sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); for (i = 0; i < sc->num_slices; i++) { err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], INTR_TYPE_NET | INTR_MPSAFE, #if __FreeBSD_version > 700030 NULL, #endif mxge_intr, &sc->ss[i], &sc->msix_ih[i]); if (err != 0) { device_printf(sc->dev, "couldn't setup intr for " "message %d\n", i); goto abort_with_intr; } bus_describe_intr(sc->dev, sc->msix_irq_res[i], sc->msix_ih[i], "s%d", i); } if (mxge_verbose) { device_printf(sc->dev, "using %d msix IRQs:", sc->num_slices); for (i = 0; i < sc->num_slices; i++) printf(" %ld", rman_get_start(sc->msix_irq_res[i])); printf("\n"); } return (0); abort_with_intr: for (i = 0; i < sc->num_slices; i++) { if (sc->msix_ih[i] != NULL) { bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_ih[i]); sc->msix_ih[i] = NULL; } } free(sc->msix_ih, M_DEVBUF); abort_with_res: for (i = 0; i < sc->num_slices; i++) { rid = i + 1; if (sc->msix_irq_res[i] != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, rid, sc->msix_irq_res[i]); sc->msix_irq_res[i] = NULL; } free(sc->msix_irq_res, M_DEVBUF); abort_with_msix: pci_release_msi(sc->dev); abort_with_msix_table: bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), sc->msix_table_res); return err; } static int mxge_add_single_irq(mxge_softc_t *sc) { int count, err, rid; count = pci_msi_count(sc->dev); if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { rid = 1; } else { rid = 0; sc->legacy_irq = 1; } sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 1, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(sc->dev, "could not alloc interrupt\n"); return ENXIO; } if (mxge_verbose) device_printf(sc->dev, "using %s irq %ld\n", sc->legacy_irq ? "INTx" : "MSI", rman_get_start(sc->irq_res)); err = bus_setup_intr(sc->dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, #if __FreeBSD_version > 700030 NULL, #endif mxge_intr, &sc->ss[0], &sc->ih); if (err != 0) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->legacy_irq ? 0 : 1, sc->irq_res); if (!sc->legacy_irq) pci_release_msi(sc->dev); } return err; } static void mxge_rem_msix_irqs(mxge_softc_t *sc) { int i, rid; for (i = 0; i < sc->num_slices; i++) { if (sc->msix_ih[i] != NULL) { bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_ih[i]); sc->msix_ih[i] = NULL; } } free(sc->msix_ih, M_DEVBUF); for (i = 0; i < sc->num_slices; i++) { rid = i + 1; if (sc->msix_irq_res[i] != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, rid, sc->msix_irq_res[i]); sc->msix_irq_res[i] = NULL; } free(sc->msix_irq_res, M_DEVBUF); bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), sc->msix_table_res); pci_release_msi(sc->dev); return; } static void mxge_rem_single_irq(mxge_softc_t *sc) { bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->legacy_irq ? 0 : 1, sc->irq_res); if (!sc->legacy_irq) pci_release_msi(sc->dev); } static void mxge_rem_irq(mxge_softc_t *sc) { if (sc->num_slices > 1) mxge_rem_msix_irqs(sc); else mxge_rem_single_irq(sc); } static int mxge_add_irq(mxge_softc_t *sc) { int err; if (sc->num_slices > 1) err = mxge_add_msix_irqs(sc); else err = mxge_add_single_irq(sc); if (0 && err == 0 && sc->num_slices > 1) { mxge_rem_msix_irqs(sc); err = mxge_add_msix_irqs(sc); } return err; } static int mxge_attach(device_t dev) { mxge_cmd_t cmd; mxge_softc_t *sc = device_get_softc(dev); struct ifnet *ifp; int err, rid; sc->dev = dev; mxge_fetch_tunables(sc); TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->tq); if (sc->tq == NULL) { err = ENOMEM; goto abort_with_nothing; } err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, /* alignment */ 0, /* boundary */ BUS_SPACE_MAXADDR, /* low */ BUS_SPACE_MAXADDR, /* high */ NULL, NULL, /* filter */ 65536 + 256, /* maxsize */ MXGE_MAX_SEND_DESC, /* num segs */ 65536, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock */ &sc->parent_dmat); /* tag */ if (err != 0) { device_printf(sc->dev, "Err %d allocating parent dmat\n", err); goto abort_with_tq; } ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); err = ENOSPC; goto abort_with_parent_dmat; } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", device_get_nameunit(dev)); mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), "%s:drv", device_get_nameunit(dev)); mtx_init(&sc->driver_mtx, sc->driver_mtx_name, MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); mxge_setup_cfg_space(sc); /* Map the board into the kernel */ rid = PCIR_BARS; sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, ~0, 1, RF_ACTIVE); if (sc->mem_res == NULL) { device_printf(dev, "could not map memory\n"); err = ENXIO; goto abort_with_lock; } sc->sram = rman_get_virtual(sc->mem_res); sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; if (sc->sram_size > rman_get_size(sc->mem_res)) { device_printf(dev, "impossible memory region size %ld\n", rman_get_size(sc->mem_res)); err = ENXIO; goto abort_with_mem_res; } /* make NULL terminated copy of the EEPROM strings section of lanai SRAM */ bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); bus_space_read_region_1(rman_get_bustag(sc->mem_res), rman_get_bushandle(sc->mem_res), sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); err = mxge_parse_strings(sc); if (err != 0) goto abort_with_mem_res; /* Enable write combining for efficient use of PCIe bus */ mxge_enable_wc(sc); /* Allocate the out of band dma memory */ err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof (mxge_cmd_t), 64); if (err != 0) goto abort_with_mem_res; sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); if (err != 0) goto abort_with_cmd_dma; err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); if (err != 0) goto abort_with_zeropad_dma; /* select & load the firmware */ err = mxge_select_firmware(sc); if (err != 0) goto abort_with_dmabench; sc->intr_coal_delay = mxge_intr_coal_delay; mxge_slice_probe(sc); err = mxge_alloc_slices(sc); if (err != 0) goto abort_with_dmabench; err = mxge_reset(sc, 0); if (err != 0) goto abort_with_slices; err = mxge_alloc_rings(sc); if (err != 0) { device_printf(sc->dev, "failed to allocate rings\n"); goto abort_with_slices; } err = mxge_add_irq(sc); if (err != 0) { device_printf(sc->dev, "failed to add irq\n"); goto abort_with_rings; } ifp->if_baudrate = IF_Gbps(10UL); ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6; #if defined(INET) || defined(INET6) ifp->if_capabilities |= IFCAP_LRO; #endif #ifdef MXGE_NEW_VLAN_API ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; /* Only FW 1.4.32 and newer can do TSO over vlans */ if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && sc->fw_ver_tiny >= 32) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif sc->max_mtu = mxge_max_mtu(sc); if (sc->max_mtu >= 9000) ifp->if_capabilities |= IFCAP_JUMBO_MTU; else device_printf(dev, "MTU limited to %d. Install " "latest firmware for 9000 byte jumbo support\n", sc->max_mtu - ETHER_HDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; /* check to see if f/w supports TSO for IPv6 */ if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { if (CSUM_TCP_IPV6) ifp->if_capabilities |= IFCAP_TSO6; sc->max_tso6_hlen = min(cmd.data0, sizeof (sc->ss[0].scratch)); } ifp->if_capenable = ifp->if_capabilities; if (sc->lro_cnt == 0) ifp->if_capenable &= ~IFCAP_LRO; ifp->if_init = mxge_init; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = mxge_ioctl; ifp->if_start = mxge_start; /* Initialise the ifmedia structure */ ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); mxge_media_init(sc); mxge_media_probe(sc); sc->dying = 0; ether_ifattach(ifp, sc->mac_addr); /* ether_ifattach sets mtu to ETHERMTU */ if (mxge_initial_mtu != ETHERMTU) mxge_change_mtu(sc, mxge_initial_mtu); mxge_add_sysctls(sc); #ifdef IFNET_BUF_RING ifp->if_transmit = mxge_transmit; ifp->if_qflush = mxge_qflush; #endif taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); return 0; abort_with_rings: mxge_free_rings(sc); abort_with_slices: mxge_free_slices(sc); abort_with_dmabench: mxge_dma_free(&sc->dmabench_dma); abort_with_zeropad_dma: mxge_dma_free(&sc->zeropad_dma); abort_with_cmd_dma: mxge_dma_free(&sc->cmd_dma); abort_with_mem_res: bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); abort_with_lock: pci_disable_busmaster(dev); mtx_destroy(&sc->cmd_mtx); mtx_destroy(&sc->driver_mtx); if_free(ifp); abort_with_parent_dmat: bus_dma_tag_destroy(sc->parent_dmat); abort_with_tq: if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->watchdog_task); taskqueue_free(sc->tq); sc->tq = NULL; } abort_with_nothing: return err; } static int mxge_detach(device_t dev) { mxge_softc_t *sc = device_get_softc(dev); if (mxge_vlans_active(sc)) { device_printf(sc->dev, "Detach vlans before removing module\n"); return EBUSY; } mtx_lock(&sc->driver_mtx); sc->dying = 1; if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) mxge_close(sc, 0); mtx_unlock(&sc->driver_mtx); ether_ifdetach(sc->ifp); if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->watchdog_task); taskqueue_free(sc->tq); sc->tq = NULL; } callout_drain(&sc->co_hdl); ifmedia_removeall(&sc->media); mxge_dummy_rdma(sc, 0); mxge_rem_sysctls(sc); mxge_rem_irq(sc); mxge_free_rings(sc); mxge_free_slices(sc); mxge_dma_free(&sc->dmabench_dma); mxge_dma_free(&sc->zeropad_dma); mxge_dma_free(&sc->cmd_dma); bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); pci_disable_busmaster(dev); mtx_destroy(&sc->cmd_mtx); mtx_destroy(&sc->driver_mtx); if_free(sc->ifp); bus_dma_tag_destroy(sc->parent_dmat); return 0; } static int mxge_shutdown(device_t dev) { return 0; } /* This file uses Myri10GE driver indentation. Local Variables: c-file-style:"linux" tab-width:8 End: */ Index: stable/9/sys/dev/oce/oce_sysctl.c =================================================================== --- stable/9/sys/dev/oce/oce_sysctl.c (revision 273911) +++ stable/9/sys/dev/oce/oce_sysctl.c (revision 273912) @@ -1,1512 +1,1512 @@ /*- * Copyright (C) 2013 Emulex * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Emulex Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * freebsd-drivers@emulex.com * * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 */ /* $FreeBSD$ */ #include "oce_if.h" static void copy_stats_to_sc_xe201(POCE_SOFTC sc); static void copy_stats_to_sc_be3(POCE_SOFTC sc); static void copy_stats_to_sc_be2(POCE_SOFTC sc); static int oce_sysctl_loopback(SYSCTL_HANDLER_ARGS); static int oce_sys_aic_enable(SYSCTL_HANDLER_ARGS); static int oce_be3_fwupgrade(POCE_SOFTC sc, const struct firmware *fw); static int oce_skyhawk_fwupgrade(POCE_SOFTC sc, const struct firmware *fw); static int oce_sys_fwupgrade(SYSCTL_HANDLER_ARGS); static int oce_lancer_fwupgrade(POCE_SOFTC sc, const struct firmware *fw); static int oce_sysctl_sfp_vpd_dump(SYSCTL_HANDLER_ARGS); static boolean_t oce_phy_flashing_required(POCE_SOFTC sc); static boolean_t oce_img_flashing_required(POCE_SOFTC sc, const char *p, int img_optype, uint32_t img_offset, uint32_t img_size, uint32_t hdrs_size); static void oce_add_stats_sysctls_be3(POCE_SOFTC sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *stats_node); static void oce_add_stats_sysctls_xe201(POCE_SOFTC sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *stats_node); extern char component_revision[32]; uint32_t sfp_vpd_dump_buffer[TRANSCEIVER_DATA_NUM_ELE]; struct flash_img_attri { int img_offset; int img_size; int img_type; bool skip_image; int optype; }; void oce_add_sysctls(POCE_SOFTC sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid *stats_node; SYSCTL_ADD_STRING(ctx, child, OID_AUTO, "component_revision", - CTLTYPE_INT | CTLFLAG_RD, - &component_revision, + CTLFLAG_RD, + component_revision, sizeof(component_revision), "EMULEX One-Connect device driver revision"); SYSCTL_ADD_STRING(ctx, child, OID_AUTO, "firmware_version", - CTLTYPE_INT | CTLFLAG_RD, - &sc->fw_version, + CTLFLAG_RD, + sc->fw_version, sizeof(sc->fw_version), "EMULEX One-Connect Firmware Version"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_rsp_handled", - CTLTYPE_INT | CTLFLAG_RW, + CTLFLAG_RW, &oce_max_rsp_handled, sizeof(oce_max_rsp_handled), "Maximum receive frames handled per interupt"); if ((sc->function_mode & FNM_FLEX10_MODE) || (sc->function_mode & FNM_UMC_MODE)) SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "speed", CTLFLAG_RD, &sc->qos_link_speed, 0,"QOS Speed"); else SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "speed", CTLFLAG_RD, &sc->speed, 0,"Link Speed"); if (sc->function_mode & FNM_UMC_MODE) SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "pvid", CTLFLAG_RD, &sc->pvid, 0,"PVID"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "loop_back", CTLTYPE_INT | CTLFLAG_RW, (void *)sc, 0, oce_sysctl_loopback, "I", "Loop Back Tests"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fw_upgrade", CTLTYPE_STRING | CTLFLAG_RW, (void *)sc, 0, oce_sys_fwupgrade, "A", "Firmware ufi file"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "aic_enable", CTLTYPE_INT | CTLFLAG_RW, (void *)sc, 1, oce_sys_aic_enable, "I", "aic flags"); /* * Dumps Transceiver data * "sysctl dev.oce.0.sfp_vpd_dump=0" * "sysctl -x dev.oce.0.sfp_vpd_dump_buffer" for hex dump * "sysctl -b dev.oce.0.sfp_vpd_dump_buffer > sfp.bin" for binary dump */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sfp_vpd_dump", CTLTYPE_INT | CTLFLAG_RW, (void *)sc, 0, oce_sysctl_sfp_vpd_dump, "I", "Initiate a sfp_vpd_dump operation"); SYSCTL_ADD_OPAQUE(ctx, child, OID_AUTO, "sfp_vpd_dump_buffer", CTLFLAG_RD, sfp_vpd_dump_buffer, TRANSCEIVER_DATA_SIZE, "IU", "Access sfp_vpd_dump buffer"); stats_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, NULL, "Ethernet Statistics"); if (IS_BE(sc) || IS_SH(sc)) oce_add_stats_sysctls_be3(sc, ctx, stats_node); else oce_add_stats_sysctls_xe201(sc, ctx, stats_node); } static uint32_t oce_loopback_test(struct oce_softc *sc, uint8_t loopback_type) { uint32_t status = 0; oce_mbox_cmd_set_loopback(sc, sc->port_id, loopback_type, 1); status = oce_mbox_cmd_test_loopback(sc, sc->port_id, loopback_type, 1500, 2, 0xabc); oce_mbox_cmd_set_loopback(sc, sc->port_id, OCE_NO_LOOPBACK, 1); return status; } static int oce_sys_aic_enable(SYSCTL_HANDLER_ARGS) { int value = 0; uint32_t status, vector; POCE_SOFTC sc = (struct oce_softc *)arg1; struct oce_aic_obj *aic; status = sysctl_handle_int(oidp, &value, 0, req); if (status || !req->newptr) return status; for (vector = 0; vector < sc->intr_count; vector++) { aic = &sc->aic_obj[vector]; if (value == 0){ aic->max_eqd = aic->min_eqd = aic->et_eqd = 0; aic->enable = 0; } else { aic->max_eqd = OCE_MAX_EQD; aic->min_eqd = OCE_MIN_EQD; aic->et_eqd = OCE_MIN_EQD; aic->enable = TRUE; } } return 0; } static int oce_sysctl_loopback(SYSCTL_HANDLER_ARGS) { int value = 0; uint32_t status; struct oce_softc *sc = (struct oce_softc *)arg1; status = sysctl_handle_int(oidp, &value, 0, req); if (status || !req->newptr) return status; if (value != 1) { device_printf(sc->dev, "Not a Valid value. Set to loop_back=1 to run tests\n"); return 0; } if ((status = oce_loopback_test(sc, OCE_MAC_LOOPBACK))) { device_printf(sc->dev, "MAC Loopback Test = Failed (Error status = %d)\n", status); } else device_printf(sc->dev, "MAC Loopback Test = Success\n"); if ((status = oce_loopback_test(sc, OCE_PHY_LOOPBACK))) { device_printf(sc->dev, "PHY Loopback Test = Failed (Error status = %d)\n", status); } else device_printf(sc->dev, "PHY Loopback Test = Success\n"); if ((status = oce_loopback_test(sc, OCE_ONE_PORT_EXT_LOOPBACK))) { device_printf(sc->dev, "EXT Loopback Test = Failed (Error status = %d)\n", status); } else device_printf(sc->dev, "EXT Loopback Test = Success\n"); return 0; } static int oce_sys_fwupgrade(SYSCTL_HANDLER_ARGS) { char ufiname[256] = {0}; uint32_t status = 1; struct oce_softc *sc = (struct oce_softc *)arg1; const struct firmware *fw; status = sysctl_handle_string(oidp, ufiname, sizeof(ufiname), req); if (status || !req->newptr) return status; fw = firmware_get(ufiname); if (fw == NULL) { device_printf(sc->dev, "Unable to get Firmware. " "Make sure %s is copied to /boot/modules\n", ufiname); return ENOENT; } if (IS_BE(sc)) { if ((sc->flags & OCE_FLAGS_BE2)) { device_printf(sc->dev, "Flashing not supported for BE2 yet.\n"); status = 1; goto done; } status = oce_be3_fwupgrade(sc, fw); } else if (IS_SH(sc)) { status = oce_skyhawk_fwupgrade(sc,fw); } else status = oce_lancer_fwupgrade(sc, fw); done: if (status) { device_printf(sc->dev, "Firmware Upgrade failed\n"); } else { device_printf(sc->dev, "Firmware Flashed successfully\n"); } /* Release Firmware*/ firmware_put(fw, FIRMWARE_UNLOAD); return status; } static void oce_fill_flash_img_data(POCE_SOFTC sc, const struct flash_sec_info * fsec, struct flash_img_attri *pimg, int i, const struct firmware *fw, int bin_offset) { if (IS_SH(sc)) { pimg->img_offset = HOST_32(fsec->fsec_entry[i].offset); pimg->img_size = HOST_32(fsec->fsec_entry[i].pad_size); } pimg->img_type = HOST_32(fsec->fsec_entry[i].type); pimg->skip_image = FALSE; switch (pimg->img_type) { case IMG_ISCSI: pimg->optype = 0; if (IS_BE3(sc)) { pimg->img_offset = 2097152; pimg->img_size = 2097152; } break; case IMG_REDBOOT: pimg->optype = 1; if (IS_BE3(sc)) { pimg->img_offset = 262144; pimg->img_size = 1048576; } if (!oce_img_flashing_required(sc, fw->data, pimg->optype, pimg->img_offset, pimg->img_size, bin_offset)) pimg->skip_image = TRUE; break; case IMG_BIOS: pimg->optype = 2; if (IS_BE3(sc)) { pimg->img_offset = 12582912; pimg->img_size = 524288; } break; case IMG_PXEBIOS: pimg->optype = 3; if (IS_BE3(sc)) { pimg->img_offset = 13107200;; pimg->img_size = 524288; } break; case IMG_FCOEBIOS: pimg->optype = 8; if (IS_BE3(sc)) { pimg->img_offset = 13631488; pimg->img_size = 524288; } break; case IMG_ISCSI_BAK: pimg->optype = 9; if (IS_BE3(sc)) { pimg->img_offset = 4194304; pimg->img_size = 2097152; } break; case IMG_FCOE: pimg->optype = 10; if (IS_BE3(sc)) { pimg->img_offset = 6291456; pimg->img_size = 2097152; } break; case IMG_FCOE_BAK: pimg->optype = 11; if (IS_BE3(sc)) { pimg->img_offset = 8388608; pimg->img_size = 2097152; } break; case IMG_NCSI: pimg->optype = 13; if (IS_BE3(sc)) { pimg->img_offset = 15990784; pimg->img_size = 262144; } break; case IMG_PHY: pimg->optype = 99; if (IS_BE3(sc)) { pimg->img_offset = 1310720; pimg->img_size = 262144; } if (!oce_phy_flashing_required(sc)) pimg->skip_image = TRUE; break; default: pimg->skip_image = TRUE; break; } } static int oce_sh_be3_flashdata(POCE_SOFTC sc, const struct firmware *fw, int32_t num_imgs) { char cookie[2][16] = {"*** SE FLAS", "H DIRECTORY *** "}; const char *p = (const char *)fw->data; const struct flash_sec_info *fsec = NULL; struct mbx_common_read_write_flashrom *req; int rc = 0, i, bin_offset = 0, opcode, num_bytes; OCE_DMA_MEM dma_mem; struct flash_img_attri imgatt; /* Validate Cookie */ bin_offset = (sizeof(struct flash_file_hdr) + (num_imgs * sizeof(struct image_hdr))); p += bin_offset; while (p < ((const char *)fw->data + fw->datasize)) { fsec = (const struct flash_sec_info *)p; if (!memcmp(cookie, fsec->cookie, sizeof(cookie))) break; fsec = NULL; p += 32; } if (!fsec) { device_printf(sc->dev, "Invalid Cookie. Firmware image corrupted ?\n"); return EINVAL; } rc = oce_dma_alloc(sc, sizeof(struct mbx_common_read_write_flashrom), &dma_mem, 0); if (rc) { device_printf(sc->dev, "Memory allocation failure while flashing\n"); return ENOMEM; } req = OCE_DMAPTR(&dma_mem, struct mbx_common_read_write_flashrom); if (IS_SH(sc)) num_imgs = HOST_32(fsec->fsec_hdr.num_images); else if (IS_BE3(sc)) num_imgs = MAX_FLASH_COMP; for (i = 0; i < num_imgs; i++) { bzero(&imgatt, sizeof(struct flash_img_attri)); oce_fill_flash_img_data(sc, fsec, &imgatt, i, fw, bin_offset); if (imgatt.skip_image) continue; p = fw->data; p = p + bin_offset + imgatt.img_offset; if ((p + imgatt.img_size) > ((const char *)fw->data + fw->datasize)) { rc = 1; goto ret; } while (imgatt.img_size) { if (imgatt.img_size > 32*1024) num_bytes = 32*1024; else num_bytes = imgatt.img_size; imgatt.img_size -= num_bytes; if (!imgatt.img_size) opcode = FLASHROM_OPER_FLASH; else opcode = FLASHROM_OPER_SAVE; memcpy(req->data_buffer, p, num_bytes); p += num_bytes; rc = oce_mbox_write_flashrom(sc, imgatt.optype, opcode, &dma_mem, num_bytes); if (rc) { device_printf(sc->dev, "cmd to write to flash rom failed.\n"); rc = EIO; goto ret; } /* Leave the CPU for others for some time */ pause("yield", 10); } } ret: oce_dma_free(sc, &dma_mem); return rc; } #define UFI_TYPE2 2 #define UFI_TYPE3 3 #define UFI_TYPE3R 10 #define UFI_TYPE4 4 #define UFI_TYPE4R 11 static int oce_get_ufi_type(POCE_SOFTC sc, const struct flash_file_hdr *fhdr) { if (fhdr == NULL) goto be_get_ufi_exit; if (IS_SH(sc) && fhdr->build[0] == '4') { if (fhdr->asic_type_rev >= 0x10) return UFI_TYPE4R; else return UFI_TYPE4; } else if (IS_BE3(sc) && fhdr->build[0] == '3') { if (fhdr->asic_type_rev == 0x10) return UFI_TYPE3R; else return UFI_TYPE3; } else if (IS_BE2(sc) && fhdr->build[0] == '2') return UFI_TYPE2; be_get_ufi_exit: device_printf(sc->dev, "UFI and Interface are not compatible for flashing\n"); return -1; } static int oce_skyhawk_fwupgrade(POCE_SOFTC sc, const struct firmware *fw) { int rc = 0, num_imgs = 0, i = 0, ufi_type; const struct flash_file_hdr *fhdr; const struct image_hdr *img_ptr; fhdr = (const struct flash_file_hdr *)fw->data; ufi_type = oce_get_ufi_type(sc, fhdr); /* Display flash version */ device_printf(sc->dev, "Flashing Firmware %s\n", &fhdr->build[2]); num_imgs = fhdr->num_imgs; for (i = 0; i < num_imgs; i++) { img_ptr = (const struct image_hdr *)((const char *)fw->data + sizeof(struct flash_file_hdr) + (i * sizeof(struct image_hdr))); if (img_ptr->imageid != 1) continue; switch (ufi_type) { case UFI_TYPE4R: rc = oce_sh_be3_flashdata(sc, fw, num_imgs); break; case UFI_TYPE4: if (sc->asic_revision < 0x10) rc = oce_sh_be3_flashdata(sc, fw, num_imgs); else { rc = -1; device_printf(sc->dev, "Cant load SH A0 UFI on B0\n"); } break; default: rc = -1; break; } } return rc; } static int oce_be3_fwupgrade(POCE_SOFTC sc, const struct firmware *fw) { int rc = 0, num_imgs = 0, i = 0; const struct flash_file_hdr *fhdr; const struct image_hdr *img_ptr; fhdr = (const struct flash_file_hdr *)fw->data; if (fhdr->build[0] != '3') { device_printf(sc->dev, "Invalid BE3 firmware image\n"); return EINVAL; } /* Display flash version */ device_printf(sc->dev, "Flashing Firmware %s\n", &fhdr->build[2]); num_imgs = fhdr->num_imgs; for (i = 0; i < num_imgs; i++) { img_ptr = (const struct image_hdr *)((const char *)fw->data + sizeof(struct flash_file_hdr) + (i * sizeof(struct image_hdr))); if (img_ptr->imageid == 1) { rc = oce_sh_be3_flashdata(sc, fw, num_imgs); break; } } return rc; } static boolean_t oce_phy_flashing_required(POCE_SOFTC sc) { int status = 0; struct oce_phy_info phy_info; status = oce_mbox_get_phy_info(sc, &phy_info); if (status) return FALSE; if ((phy_info.phy_type == TN_8022) && (phy_info.interface_type == PHY_TYPE_BASET_10GB)) { return TRUE; } return FALSE; } static boolean_t oce_img_flashing_required(POCE_SOFTC sc, const char *p, int img_optype, uint32_t img_offset, uint32_t img_size, uint32_t hdrs_size) { uint32_t crc_offset; uint8_t flashed_crc[4]; int status; crc_offset = hdrs_size + img_offset + img_size - 4; p += crc_offset; status = oce_mbox_get_flashrom_crc(sc, flashed_crc, (img_size - 4), img_optype); if (status) return TRUE; /* Some thing worng. ReFlash */ /*update redboot only if crc does not match*/ if (bcmp(flashed_crc, p, 4)) return TRUE; else return FALSE; } static int oce_lancer_fwupgrade(POCE_SOFTC sc, const struct firmware *fw) { int rc = 0; OCE_DMA_MEM dma_mem; const uint8_t *data = NULL; uint8_t *dest_image_ptr = NULL; size_t size = 0; uint32_t data_written = 0, chunk_size = 0; uint32_t offset = 0, add_status = 0; if (!IS_ALIGNED(fw->datasize, sizeof(uint32_t))) { device_printf(sc->dev, "Lancer FW image is not 4 byte aligned."); return EINVAL; } rc = oce_dma_alloc(sc, 32*1024, &dma_mem, 0); if (rc) { device_printf(sc->dev, "Memory allocation failure while flashing Lancer\n"); return ENOMEM; } size = fw->datasize; data = fw->data; dest_image_ptr = OCE_DMAPTR(&dma_mem, uint8_t); while (size) { chunk_size = MIN(size, (32*1024)); bcopy(data, dest_image_ptr, chunk_size); rc = oce_mbox_lancer_write_flashrom(sc, chunk_size, offset, &dma_mem, &data_written, &add_status); if (rc) break; size -= data_written; data += data_written; offset += data_written; pause("yield", 10); } if (!rc) /* Commit the firmware*/ rc = oce_mbox_lancer_write_flashrom(sc, 0, offset, &dma_mem, &data_written, &add_status); if (rc) { device_printf(sc->dev, "Lancer firmware load error. " "Addstatus = 0x%x, status = %d \n", add_status, rc); rc = EIO; } oce_dma_free(sc, &dma_mem); return rc; } static void oce_add_stats_sysctls_be3(POCE_SOFTC sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *stats_node) { struct sysctl_oid *rx_stats_node, *tx_stats_node; struct sysctl_oid_list *rx_stat_list, *tx_stat_list; struct sysctl_oid_list *queue_stats_list; struct sysctl_oid *queue_stats_node; struct oce_drv_stats *stats; char prefix[32]; int i; stats = &sc->oce_stats_info; rx_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(stats_node), OID_AUTO,"rx", CTLFLAG_RD, NULL, "RX Ethernet Statistics"); rx_stat_list = SYSCTL_CHILDREN(rx_stats_node); SYSCTL_ADD_QUAD(ctx, rx_stat_list, OID_AUTO, "total_pkts", CTLFLAG_RD, &stats->rx.t_rx_pkts, "Total Received Packets"); SYSCTL_ADD_QUAD(ctx, rx_stat_list, OID_AUTO, "total_bytes", CTLFLAG_RD, &stats->rx.t_rx_bytes, "Total Received Bytes"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_frags", CTLFLAG_RD, &stats->rx.t_rx_frags, 0, "Total Received Fragements"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_mcast_pkts", CTLFLAG_RD, &stats->rx.t_rx_mcast_pkts, 0, "Total Received Multicast Packets"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_ucast_pkts", CTLFLAG_RD, &stats->rx.t_rx_ucast_pkts, 0, "Total Received Unicast Packets"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_rxcp_errs", CTLFLAG_RD, &stats->rx.t_rxcp_errs, 0, "Total Receive completion errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "pause_frames", CTLFLAG_RD, &stats->u0.be.rx_pause_frames, 0, "Pause Frames"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "priority_pause_frames", CTLFLAG_RD, &stats->u0.be.rx_priority_pause_frames, 0, "Priority Pause Frames"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "control_frames", CTLFLAG_RD, &stats->u0.be.rx_control_frames, 0, "Control Frames"); for (i = 0; i < sc->nrqs; i++) { sprintf(prefix, "queue%d",i); queue_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(rx_stats_node), OID_AUTO, prefix, CTLFLAG_RD, NULL, "Queue name"); queue_stats_list = SYSCTL_CHILDREN(queue_stats_node); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "rx_pkts", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_pkts, "Receive Packets"); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_bytes, "Recived Bytes"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rx_frags", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_frags, 0, "Received Fragments"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rx_mcast_pkts", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_mcast_pkts, 0, "Received Multicast Packets"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rx_ucast_pkts", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_ucast_pkts, 0, "Received Unicast Packets"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rxcp_err", CTLFLAG_RD, &sc->rq[i]->rx_stats.rxcp_err, 0, "Received Completion Errors"); } rx_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(rx_stats_node), OID_AUTO, "err", CTLFLAG_RD, NULL, "Receive Error Stats"); rx_stat_list = SYSCTL_CHILDREN(rx_stats_node); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->u0.be.rx_crc_errors, 0, "CRC Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "pbuf_errors", CTLFLAG_RD, &stats->u0.be.rx_drops_no_pbuf, 0, "Drops due to pbuf full"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "erx_errors", CTLFLAG_RD, &stats->u0.be.rx_drops_no_erx_descr, 0, "ERX Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "alignment_errors", CTLFLAG_RD, &stats->u0.be.rx_drops_too_many_frags, 0, "RX Alignmnet Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "in_range_errors", CTLFLAG_RD, &stats->u0.be.rx_in_range_errors, 0, "In Range Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "out_range_errors", CTLFLAG_RD, &stats->u0.be.rx_out_range_errors, 0, "Out Range Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "frame_too_long", CTLFLAG_RD, &stats->u0.be.rx_frame_too_long, 0, "Frame Too Long"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "address_match_errors", CTLFLAG_RD, &stats->u0.be.rx_address_match_errors, 0, "Address Match Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_too_small", CTLFLAG_RD, &stats->u0.be.rx_dropped_too_small, 0, "Dropped Too Small"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_too_short", CTLFLAG_RD, &stats->u0.be.rx_dropped_too_short, 0, "Dropped Too Short"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_header_too_small", CTLFLAG_RD, &stats->u0.be.rx_dropped_header_too_small, 0, "Dropped Header Too Small"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_tcp_length", CTLFLAG_RD, &stats->u0.be.rx_dropped_tcp_length, 0, "Dropped TCP Length"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_runt", CTLFLAG_RD, &stats->u0.be.rx_dropped_runt, 0, "Dropped runt"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "ip_checksum_errs", CTLFLAG_RD, &stats->u0.be.rx_ip_checksum_errs, 0, "IP Checksum Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "tcp_checksum_errs", CTLFLAG_RD, &stats->u0.be.rx_tcp_checksum_errs, 0, "TCP Checksum Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "udp_checksum_errs", CTLFLAG_RD, &stats->u0.be.rx_udp_checksum_errs, 0, "UDP Checksum Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "fifo_overflow_drop", CTLFLAG_RD, &stats->u0.be.rxpp_fifo_overflow_drop, 0, "FIFO Overflow Drop"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "input_fifo_overflow_drop", CTLFLAG_RD, &stats->u0.be.rx_input_fifo_overflow_drop, 0, "Input FIFO Overflow Drop"); tx_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(stats_node), OID_AUTO, "tx",CTLFLAG_RD, NULL, "TX Ethernet Statistics"); tx_stat_list = SYSCTL_CHILDREN(tx_stats_node); SYSCTL_ADD_QUAD(ctx, tx_stat_list, OID_AUTO, "total_tx_pkts", CTLFLAG_RD, &stats->tx.t_tx_pkts, "Total Transmit Packets"); SYSCTL_ADD_QUAD(ctx, tx_stat_list, OID_AUTO, "total_tx_bytes", CTLFLAG_RD, &stats->tx.t_tx_bytes, "Total Transmit Bytes"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_reqs", CTLFLAG_RD, &stats->tx.t_tx_reqs, 0, "Total Transmit Requests"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_stops", CTLFLAG_RD, &stats->tx.t_tx_stops, 0, "Total Transmit Stops"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_wrbs", CTLFLAG_RD, &stats->tx.t_tx_wrbs, 0, "Total Transmit WRB's"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_compl", CTLFLAG_RD, &stats->tx.t_tx_compl, 0, "Total Transmit Completions"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_ipv6_ext_hdr_tx_drop", CTLFLAG_RD, &stats->tx.t_ipv6_ext_hdr_tx_drop, 0, "Total Transmit IPV6 Drops"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "pauseframes", CTLFLAG_RD, &stats->u0.be.tx_pauseframes, 0, "Pause Frames"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "priority_pauseframes", CTLFLAG_RD, &stats->u0.be.tx_priority_pauseframes, 0, "Priority Pauseframes"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "controlframes", CTLFLAG_RD, &stats->u0.be.tx_controlframes, 0, "Tx Control Frames"); for (i = 0; i < sc->nwqs; i++) { sprintf(prefix, "queue%d",i); queue_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tx_stats_node), OID_AUTO, prefix, CTLFLAG_RD, NULL, "Queue name"); queue_stats_list = SYSCTL_CHILDREN(queue_stats_node); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "tx_pkts", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_pkts, "Transmit Packets"); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_bytes, "Transmit Bytes"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_reqs", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_reqs, 0, "Transmit Requests"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_stops", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_stops, 0, "Transmit Stops"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_wrbs", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_wrbs, 0, "Transmit WRB's"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_compl", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_compl, 0, "Transmit Completions"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "ipv6_ext_hdr_tx_drop",CTLFLAG_RD, &sc->wq[i]->tx_stats.ipv6_ext_hdr_tx_drop, 0, "Transmit IPV6 Ext Header Drop"); } return; } static void oce_add_stats_sysctls_xe201(POCE_SOFTC sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *stats_node) { struct sysctl_oid *rx_stats_node, *tx_stats_node; struct sysctl_oid_list *rx_stat_list, *tx_stat_list; struct sysctl_oid_list *queue_stats_list; struct sysctl_oid *queue_stats_node; struct oce_drv_stats *stats; char prefix[32]; int i; stats = &sc->oce_stats_info; rx_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(stats_node), OID_AUTO, "rx", CTLFLAG_RD, NULL, "RX Ethernet Statistics"); rx_stat_list = SYSCTL_CHILDREN(rx_stats_node); SYSCTL_ADD_QUAD(ctx, rx_stat_list, OID_AUTO, "total_pkts", CTLFLAG_RD, &stats->rx.t_rx_pkts, "Total Received Packets"); SYSCTL_ADD_QUAD(ctx, rx_stat_list, OID_AUTO, "total_bytes", CTLFLAG_RD, &stats->rx.t_rx_bytes, "Total Received Bytes"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_frags", CTLFLAG_RD, &stats->rx.t_rx_frags, 0, "Total Received Fragements"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_mcast_pkts", CTLFLAG_RD, &stats->rx.t_rx_mcast_pkts, 0, "Total Received Multicast Packets"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_ucast_pkts", CTLFLAG_RD, &stats->rx.t_rx_ucast_pkts, 0, "Total Received Unicast Packets"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "total_rxcp_errs", CTLFLAG_RD, &stats->rx.t_rxcp_errs, 0, "Total Receive completion errors"); - SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "pause_frames", - CTLFLAG_RD, &stats->u0.xe201.rx_pause_frames, 0, + SYSCTL_ADD_UQUAD(ctx, rx_stat_list, OID_AUTO, "pause_frames", + CTLFLAG_RD, &stats->u0.xe201.rx_pause_frames, "Pause Frames"); - SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "control_frames", - CTLFLAG_RD, &stats->u0.xe201.rx_control_frames, 0, + SYSCTL_ADD_UQUAD(ctx, rx_stat_list, OID_AUTO, "control_frames", + CTLFLAG_RD, &stats->u0.xe201.rx_control_frames, "Control Frames"); for (i = 0; i < sc->nrqs; i++) { sprintf(prefix, "queue%d",i); queue_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(rx_stats_node), OID_AUTO, prefix, CTLFLAG_RD, NULL, "Queue name"); queue_stats_list = SYSCTL_CHILDREN(queue_stats_node); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "rx_pkts", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_pkts, "Receive Packets"); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_bytes, "Recived Bytes"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rx_frags", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_frags, 0, "Received Fragments"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rx_mcast_pkts", CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_mcast_pkts, 0, "Received Multicast Packets"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rx_ucast_pkts",CTLFLAG_RD, &sc->rq[i]->rx_stats.rx_ucast_pkts, 0, "Received Unicast Packets"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "rxcp_err", CTLFLAG_RD, &sc->rq[i]->rx_stats.rxcp_err, 0, "Received Completion Errors"); } rx_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(rx_stats_node), OID_AUTO, "err", CTLFLAG_RD, NULL, "Receive Error Stats"); rx_stat_list = SYSCTL_CHILDREN(rx_stats_node); - SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "crc_errs", - CTLFLAG_RD, &stats->u0.xe201.rx_crc_errors, 0, + SYSCTL_ADD_UQUAD(ctx, rx_stat_list, OID_AUTO, "crc_errs", + CTLFLAG_RD, &stats->u0.xe201.rx_crc_errors, "CRC Errors"); - SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "alignment_errors", - CTLFLAG_RD, &stats->u0.xe201.rx_alignment_errors, 0, + SYSCTL_ADD_UQUAD(ctx, rx_stat_list, OID_AUTO, "alignment_errors", + CTLFLAG_RD, &stats->u0.xe201.rx_alignment_errors, "RX Alignmnet Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "in_range_errors", CTLFLAG_RD, &stats->u0.xe201.rx_in_range_errors, 0, "In Range Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "out_range_errors", CTLFLAG_RD, &stats->u0.xe201.rx_out_of_range_errors, 0, "Out Range Errors"); - SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "frame_too_long", - CTLFLAG_RD, &stats->u0.xe201.rx_frames_too_long, 0, + SYSCTL_ADD_UQUAD(ctx, rx_stat_list, OID_AUTO, "frame_too_long", + CTLFLAG_RD, &stats->u0.xe201.rx_frames_too_long, "Frame Too Long"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "address_match_errors", CTLFLAG_RD, &stats->u0.xe201.rx_address_match_errors, 0, "Address Match Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_too_small", CTLFLAG_RD, &stats->u0.xe201.rx_dropped_too_small, 0, "Dropped Too Small"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_too_short", CTLFLAG_RD, &stats->u0.xe201.rx_dropped_too_short, 0, "Dropped Too Short"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_header_too_small", CTLFLAG_RD, &stats->u0.xe201.rx_dropped_header_too_small, 0, "Dropped Header Too Small"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_tcp_length", CTLFLAG_RD, &stats->u0.xe201.rx_dropped_invalid_tcp_length, 0, "Dropped TCP Length"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "dropped_runt", CTLFLAG_RD, &stats->u0.xe201.rx_dropped_runt, 0, "Dropped runt"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "ip_checksum_errs", CTLFLAG_RD, &stats->u0.xe201.rx_ip_checksum_errors, 0, "IP Checksum Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "tcp_checksum_errs", CTLFLAG_RD, &stats->u0.xe201.rx_tcp_checksum_errors, 0, "TCP Checksum Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "udp_checksum_errs", CTLFLAG_RD, &stats->u0.xe201.rx_udp_checksum_errors, 0, "UDP Checksum Errors"); SYSCTL_ADD_UINT(ctx, rx_stat_list, OID_AUTO, "input_fifo_overflow_drop", CTLFLAG_RD, &stats->u0.xe201.rx_fifo_overflow, 0, "Input FIFO Overflow Drop"); tx_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(stats_node), OID_AUTO, "tx", CTLFLAG_RD, NULL, "TX Ethernet Statistics"); tx_stat_list = SYSCTL_CHILDREN(tx_stats_node); SYSCTL_ADD_QUAD(ctx, tx_stat_list, OID_AUTO, "total_tx_pkts", CTLFLAG_RD, &stats->tx.t_tx_pkts, "Total Transmit Packets"); SYSCTL_ADD_QUAD(ctx, tx_stat_list, OID_AUTO, "total_tx_bytes", CTLFLAG_RD, &stats->tx.t_tx_bytes, "Total Transmit Bytes"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_reqs", CTLFLAG_RD, &stats->tx.t_tx_reqs, 0, "Total Transmit Requests"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_stops", CTLFLAG_RD, &stats->tx.t_tx_stops, 0, "Total Transmit Stops"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_wrbs", CTLFLAG_RD, &stats->tx.t_tx_wrbs, 0, "Total Transmit WRB's"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_tx_compl", CTLFLAG_RD, &stats->tx.t_tx_compl, 0, "Total Transmit Completions"); SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "total_ipv6_ext_hdr_tx_drop", CTLFLAG_RD, &stats->tx.t_ipv6_ext_hdr_tx_drop, 0, "Total Transmit IPV6 Drops"); - SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "pauseframes", - CTLFLAG_RD, &stats->u0.xe201.tx_pause_frames, 0, + SYSCTL_ADD_UQUAD(ctx, tx_stat_list, OID_AUTO, "pauseframes", + CTLFLAG_RD, &stats->u0.xe201.tx_pause_frames, "Pause Frames"); - SYSCTL_ADD_UINT(ctx, tx_stat_list, OID_AUTO, "controlframes", - CTLFLAG_RD, &stats->u0.xe201.tx_control_frames, 0, + SYSCTL_ADD_UQUAD(ctx, tx_stat_list, OID_AUTO, "controlframes", + CTLFLAG_RD, &stats->u0.xe201.tx_control_frames, "Tx Control Frames"); for (i = 0; i < sc->nwqs; i++) { sprintf(prefix, "queue%d",i); queue_stats_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tx_stats_node), OID_AUTO, prefix, CTLFLAG_RD, NULL, "Queue name"); queue_stats_list = SYSCTL_CHILDREN(queue_stats_node); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "tx_pkts", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_pkts, "Transmit Packets"); SYSCTL_ADD_QUAD(ctx, queue_stats_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_bytes, "Transmit Bytes"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_reqs", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_reqs, 0, "Transmit Requests"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_stops", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_stops, 0, "Transmit Stops"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_wrbs", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_wrbs, 0, "Transmit WRB's"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "tx_compl", CTLFLAG_RD, &sc->wq[i]->tx_stats.tx_compl, 0, "Transmit Completions"); SYSCTL_ADD_UINT(ctx, queue_stats_list, OID_AUTO, "ipv6_ext_hdr_tx_drop", CTLFLAG_RD, &sc->wq[i]->tx_stats.ipv6_ext_hdr_tx_drop, 0, "Transmit IPV6 Ext Header Drop"); } return; } void oce_refresh_queue_stats(POCE_SOFTC sc) { struct oce_drv_stats *adapter_stats; int i; adapter_stats = &sc->oce_stats_info; /* Caluculate total TX and TXstats from all queues */ bzero(&adapter_stats->rx, sizeof(struct oce_rx_stats)); for (i = 0; i < sc->nrqs; i++) { adapter_stats->rx.t_rx_pkts += sc->rq[i]->rx_stats.rx_pkts; adapter_stats->rx.t_rx_bytes += sc->rq[i]->rx_stats.rx_bytes; adapter_stats->rx.t_rx_frags += sc->rq[i]->rx_stats.rx_frags; adapter_stats->rx.t_rx_mcast_pkts += sc->rq[i]->rx_stats.rx_mcast_pkts; adapter_stats->rx.t_rx_ucast_pkts += sc->rq[i]->rx_stats.rx_ucast_pkts; adapter_stats->rx.t_rxcp_errs += sc-> rq[i]->rx_stats.rxcp_err; } bzero(&adapter_stats->tx, sizeof(struct oce_tx_stats)); for (i = 0; i < sc->nwqs; i++) { adapter_stats->tx.t_tx_reqs += sc->wq[i]->tx_stats.tx_reqs; adapter_stats->tx.t_tx_stops += sc->wq[i]->tx_stats.tx_stops; adapter_stats->tx.t_tx_wrbs += sc->wq[i]->tx_stats.tx_wrbs; adapter_stats->tx.t_tx_compl += sc->wq[i]->tx_stats.tx_compl; adapter_stats->tx.t_tx_bytes += sc->wq[i]->tx_stats.tx_bytes; adapter_stats->tx.t_tx_pkts += sc->wq[i]->tx_stats.tx_pkts; adapter_stats->tx.t_ipv6_ext_hdr_tx_drop += sc->wq[i]->tx_stats.ipv6_ext_hdr_tx_drop; } } static void copy_stats_to_sc_xe201(POCE_SOFTC sc) { struct oce_xe201_stats *adapter_stats; struct mbx_get_pport_stats *nic_mbx; struct pport_stats *port_stats; nic_mbx = OCE_DMAPTR(&sc->stats_mem, struct mbx_get_pport_stats); port_stats = &nic_mbx->params.rsp.pps; adapter_stats = &sc->oce_stats_info.u0.xe201; adapter_stats->tx_pkts = port_stats->tx_pkts; adapter_stats->tx_unicast_pkts = port_stats->tx_unicast_pkts; adapter_stats->tx_multicast_pkts = port_stats->tx_multicast_pkts; adapter_stats->tx_broadcast_pkts = port_stats->tx_broadcast_pkts; adapter_stats->tx_bytes = port_stats->tx_bytes; adapter_stats->tx_unicast_bytes = port_stats->tx_unicast_bytes; adapter_stats->tx_multicast_bytes = port_stats->tx_multicast_bytes; adapter_stats->tx_broadcast_bytes = port_stats->tx_broadcast_bytes; adapter_stats->tx_discards = port_stats->tx_discards; adapter_stats->tx_errors = port_stats->tx_errors; adapter_stats->tx_pause_frames = port_stats->tx_pause_frames; adapter_stats->tx_pause_on_frames = port_stats->tx_pause_on_frames; adapter_stats->tx_pause_off_frames = port_stats->tx_pause_off_frames; adapter_stats->tx_internal_mac_errors = port_stats->tx_internal_mac_errors; adapter_stats->tx_control_frames = port_stats->tx_control_frames; adapter_stats->tx_pkts_64_bytes = port_stats->tx_pkts_64_bytes; adapter_stats->tx_pkts_65_to_127_bytes = port_stats->tx_pkts_65_to_127_bytes; adapter_stats->tx_pkts_128_to_255_bytes = port_stats->tx_pkts_128_to_255_bytes; adapter_stats->tx_pkts_256_to_511_bytes = port_stats->tx_pkts_256_to_511_bytes; adapter_stats->tx_pkts_512_to_1023_bytes = port_stats->tx_pkts_512_to_1023_bytes; adapter_stats->tx_pkts_1024_to_1518_bytes = port_stats->tx_pkts_1024_to_1518_bytes; adapter_stats->tx_pkts_1519_to_2047_bytes = port_stats->tx_pkts_1519_to_2047_bytes; adapter_stats->tx_pkts_2048_to_4095_bytes = port_stats->tx_pkts_2048_to_4095_bytes; adapter_stats->tx_pkts_4096_to_8191_bytes = port_stats->tx_pkts_4096_to_8191_bytes; adapter_stats->tx_pkts_8192_to_9216_bytes = port_stats->tx_pkts_8192_to_9216_bytes; adapter_stats->tx_lso_pkts = port_stats->tx_lso_pkts; adapter_stats->rx_pkts = port_stats->rx_pkts; adapter_stats->rx_unicast_pkts = port_stats->rx_unicast_pkts; adapter_stats->rx_multicast_pkts = port_stats->rx_multicast_pkts; adapter_stats->rx_broadcast_pkts = port_stats->rx_broadcast_pkts; adapter_stats->rx_bytes = port_stats->rx_bytes; adapter_stats->rx_unicast_bytes = port_stats->rx_unicast_bytes; adapter_stats->rx_multicast_bytes = port_stats->rx_multicast_bytes; adapter_stats->rx_broadcast_bytes = port_stats->rx_broadcast_bytes; adapter_stats->rx_unknown_protos = port_stats->rx_unknown_protos; adapter_stats->rx_discards = port_stats->rx_discards; adapter_stats->rx_errors = port_stats->rx_errors; adapter_stats->rx_crc_errors = port_stats->rx_crc_errors; adapter_stats->rx_alignment_errors = port_stats->rx_alignment_errors; adapter_stats->rx_symbol_errors = port_stats->rx_symbol_errors; adapter_stats->rx_pause_frames = port_stats->rx_pause_frames; adapter_stats->rx_pause_on_frames = port_stats->rx_pause_on_frames; adapter_stats->rx_pause_off_frames = port_stats->rx_pause_off_frames; adapter_stats->rx_frames_too_long = port_stats->rx_frames_too_long; adapter_stats->rx_internal_mac_errors = port_stats->rx_internal_mac_errors; adapter_stats->rx_undersize_pkts = port_stats->rx_undersize_pkts; adapter_stats->rx_oversize_pkts = port_stats->rx_oversize_pkts; adapter_stats->rx_fragment_pkts = port_stats->rx_fragment_pkts; adapter_stats->rx_jabbers = port_stats->rx_jabbers; adapter_stats->rx_control_frames = port_stats->rx_control_frames; adapter_stats->rx_control_frames_unknown_opcode = port_stats->rx_control_frames_unknown_opcode; adapter_stats->rx_in_range_errors = port_stats->rx_in_range_errors; adapter_stats->rx_out_of_range_errors = port_stats->rx_out_of_range_errors; adapter_stats->rx_address_match_errors = port_stats->rx_address_match_errors; adapter_stats->rx_vlan_mismatch_errors = port_stats->rx_vlan_mismatch_errors; adapter_stats->rx_dropped_too_small = port_stats->rx_dropped_too_small; adapter_stats->rx_dropped_too_short = port_stats->rx_dropped_too_short; adapter_stats->rx_dropped_header_too_small = port_stats->rx_dropped_header_too_small; adapter_stats->rx_dropped_invalid_tcp_length = port_stats->rx_dropped_invalid_tcp_length; adapter_stats->rx_dropped_runt = port_stats->rx_dropped_runt; adapter_stats->rx_ip_checksum_errors = port_stats->rx_ip_checksum_errors; adapter_stats->rx_tcp_checksum_errors = port_stats->rx_tcp_checksum_errors; adapter_stats->rx_udp_checksum_errors = port_stats->rx_udp_checksum_errors; adapter_stats->rx_non_rss_pkts = port_stats->rx_non_rss_pkts; adapter_stats->rx_ipv4_pkts = port_stats->rx_ipv4_pkts; adapter_stats->rx_ipv6_pkts = port_stats->rx_ipv6_pkts; adapter_stats->rx_ipv4_bytes = port_stats->rx_ipv4_bytes; adapter_stats->rx_ipv6_bytes = port_stats->rx_ipv6_bytes; adapter_stats->rx_nic_pkts = port_stats->rx_nic_pkts; adapter_stats->rx_tcp_pkts = port_stats->rx_tcp_pkts; adapter_stats->rx_iscsi_pkts = port_stats->rx_iscsi_pkts; adapter_stats->rx_management_pkts = port_stats->rx_management_pkts; adapter_stats->rx_switched_unicast_pkts = port_stats->rx_switched_unicast_pkts; adapter_stats->rx_switched_multicast_pkts = port_stats->rx_switched_multicast_pkts; adapter_stats->rx_switched_broadcast_pkts = port_stats->rx_switched_broadcast_pkts; adapter_stats->num_forwards = port_stats->num_forwards; adapter_stats->rx_fifo_overflow = port_stats->rx_fifo_overflow; adapter_stats->rx_input_fifo_overflow = port_stats->rx_input_fifo_overflow; adapter_stats->rx_drops_too_many_frags = port_stats->rx_drops_too_many_frags; adapter_stats->rx_drops_invalid_queue = port_stats->rx_drops_invalid_queue; adapter_stats->rx_drops_mtu = port_stats->rx_drops_mtu; adapter_stats->rx_pkts_64_bytes = port_stats->rx_pkts_64_bytes; adapter_stats->rx_pkts_65_to_127_bytes = port_stats->rx_pkts_65_to_127_bytes; adapter_stats->rx_pkts_128_to_255_bytes = port_stats->rx_pkts_128_to_255_bytes; adapter_stats->rx_pkts_256_to_511_bytes = port_stats->rx_pkts_256_to_511_bytes; adapter_stats->rx_pkts_512_to_1023_bytes = port_stats->rx_pkts_512_to_1023_bytes; adapter_stats->rx_pkts_1024_to_1518_bytes = port_stats->rx_pkts_1024_to_1518_bytes; adapter_stats->rx_pkts_1519_to_2047_bytes = port_stats->rx_pkts_1519_to_2047_bytes; adapter_stats->rx_pkts_2048_to_4095_bytes = port_stats->rx_pkts_2048_to_4095_bytes; adapter_stats->rx_pkts_4096_to_8191_bytes = port_stats->rx_pkts_4096_to_8191_bytes; adapter_stats->rx_pkts_8192_to_9216_bytes = port_stats->rx_pkts_8192_to_9216_bytes; } static void copy_stats_to_sc_be2(POCE_SOFTC sc) { struct oce_be_stats *adapter_stats; struct oce_pmem_stats *pmem; struct oce_rxf_stats_v0 *rxf_stats; struct oce_port_rxf_stats_v0 *port_stats; struct mbx_get_nic_stats_v0 *nic_mbx; uint32_t port = sc->port_id; nic_mbx = OCE_DMAPTR(&sc->stats_mem, struct mbx_get_nic_stats_v0); pmem = &nic_mbx->params.rsp.stats.pmem; rxf_stats = &nic_mbx->params.rsp.stats.rxf; port_stats = &nic_mbx->params.rsp.stats.rxf.port[port]; adapter_stats = &sc->oce_stats_info.u0.be; /* Update stats */ adapter_stats->rx_pause_frames = port_stats->rx_pause_frames; adapter_stats->rx_crc_errors = port_stats->rx_crc_errors; adapter_stats->rx_control_frames = port_stats->rx_control_frames; adapter_stats->rx_in_range_errors = port_stats->rx_in_range_errors; adapter_stats->rx_frame_too_long = port_stats->rx_frame_too_long; adapter_stats->rx_dropped_runt = port_stats->rx_dropped_runt; adapter_stats->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs; adapter_stats->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs; adapter_stats->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs; adapter_stats->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop; adapter_stats->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length; adapter_stats->rx_dropped_too_small = port_stats->rx_dropped_too_small; adapter_stats->rx_dropped_too_short = port_stats->rx_dropped_too_short; adapter_stats->rx_out_range_errors = port_stats->rx_out_range_errors; adapter_stats->rx_dropped_header_too_small = port_stats->rx_dropped_header_too_small; adapter_stats->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow_drop; adapter_stats->rx_address_match_errors = port_stats->rx_address_match_errors; adapter_stats->rx_alignment_symbol_errors = port_stats->rx_alignment_symbol_errors; adapter_stats->tx_pauseframes = port_stats->tx_pauseframes; adapter_stats->tx_controlframes = port_stats->tx_controlframes; if (sc->if_id) adapter_stats->jabber_events = rxf_stats->port1_jabber_events; else adapter_stats->jabber_events = rxf_stats->port0_jabber_events; adapter_stats->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf; adapter_stats->rx_drops_no_txpb = rxf_stats->rx_drops_no_txpb; adapter_stats->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr; adapter_stats->rx_drops_invalid_ring = rxf_stats->rx_drops_invalid_ring; adapter_stats->forwarded_packets = rxf_stats->forwarded_packets; adapter_stats->rx_drops_mtu = rxf_stats->rx_drops_mtu; adapter_stats->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr; adapter_stats->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags; adapter_stats->eth_red_drops = pmem->eth_red_drops; } static void copy_stats_to_sc_be3(POCE_SOFTC sc) { struct oce_be_stats *adapter_stats; struct oce_pmem_stats *pmem; struct oce_rxf_stats_v1 *rxf_stats; struct oce_port_rxf_stats_v1 *port_stats; struct mbx_get_nic_stats *nic_mbx; uint32_t port = sc->port_id; nic_mbx = OCE_DMAPTR(&sc->stats_mem, struct mbx_get_nic_stats); pmem = &nic_mbx->params.rsp.stats.pmem; rxf_stats = &nic_mbx->params.rsp.stats.rxf; port_stats = &nic_mbx->params.rsp.stats.rxf.port[port]; adapter_stats = &sc->oce_stats_info.u0.be; /* Update stats */ adapter_stats->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop; adapter_stats->rx_priority_pause_frames = port_stats->rx_priority_pause_frames; adapter_stats->rx_pause_frames = port_stats->rx_pause_frames; adapter_stats->rx_crc_errors = port_stats->rx_crc_errors; adapter_stats->rx_control_frames = port_stats->rx_control_frames; adapter_stats->rx_in_range_errors = port_stats->rx_in_range_errors; adapter_stats->rx_frame_too_long = port_stats->rx_frame_too_long; adapter_stats->rx_dropped_runt = port_stats->rx_dropped_runt; adapter_stats->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs; adapter_stats->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs; adapter_stats->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs; adapter_stats->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length; adapter_stats->rx_dropped_too_small = port_stats->rx_dropped_too_small; adapter_stats->rx_dropped_too_short = port_stats->rx_dropped_too_short; adapter_stats->rx_out_range_errors = port_stats->rx_out_range_errors; adapter_stats->rx_dropped_header_too_small = port_stats->rx_dropped_header_too_small; adapter_stats->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow_drop; adapter_stats->rx_address_match_errors = port_stats->rx_address_match_errors; adapter_stats->rx_alignment_symbol_errors = port_stats->rx_alignment_symbol_errors; adapter_stats->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop; adapter_stats->tx_pauseframes = port_stats->tx_pauseframes; adapter_stats->tx_controlframes = port_stats->tx_controlframes; adapter_stats->jabber_events = port_stats->jabber_events; adapter_stats->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf; adapter_stats->rx_drops_no_txpb = rxf_stats->rx_drops_no_txpb; adapter_stats->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr; adapter_stats->rx_drops_invalid_ring = rxf_stats->rx_drops_invalid_ring; adapter_stats->forwarded_packets = rxf_stats->forwarded_packets; adapter_stats->rx_drops_mtu = rxf_stats->rx_drops_mtu; adapter_stats->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr; adapter_stats->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags; adapter_stats->eth_red_drops = pmem->eth_red_drops; } int oce_stats_init(POCE_SOFTC sc) { int rc = 0, sz; if (IS_BE(sc) || IS_SH(sc)) { if (sc->flags & OCE_FLAGS_BE2) sz = sizeof(struct mbx_get_nic_stats_v0); else sz = sizeof(struct mbx_get_nic_stats); } else sz = sizeof(struct mbx_get_pport_stats); rc = oce_dma_alloc(sc, sz, &sc->stats_mem, 0); return rc; } void oce_stats_free(POCE_SOFTC sc) { oce_dma_free(sc, &sc->stats_mem); } int oce_refresh_nic_stats(POCE_SOFTC sc) { int rc = 0, reset = 0; if (IS_BE(sc) || IS_SH(sc)) { if (sc->flags & OCE_FLAGS_BE2) { rc = oce_mbox_get_nic_stats_v0(sc, &sc->stats_mem); if (!rc) copy_stats_to_sc_be2(sc); } else { rc = oce_mbox_get_nic_stats(sc, &sc->stats_mem); if (!rc) copy_stats_to_sc_be3(sc); } } else { rc = oce_mbox_get_pport_stats(sc, &sc->stats_mem, reset); if (!rc) copy_stats_to_sc_xe201(sc); } return rc; } static int oce_sysctl_sfp_vpd_dump(SYSCTL_HANDLER_ARGS) { int result = 0, error; int rc = 0; POCE_SOFTC sc = (POCE_SOFTC) arg1; /* sysctl default handler */ error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if(result == -1) { return EINVAL; } bzero((char *)sfp_vpd_dump_buffer, TRANSCEIVER_DATA_SIZE); rc = oce_mbox_read_transrecv_data(sc, PAGE_NUM_A0); if(rc) return rc; rc = oce_mbox_read_transrecv_data(sc, PAGE_NUM_A2); if(rc) return rc; return rc; } Index: stable/9/sys/dev/puc =================================================================== --- stable/9/sys/dev/puc (revision 273911) +++ stable/9/sys/dev/puc (revision 273912) Property changes on: stable/9/sys/dev/puc ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev/puc:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/qlxgbe/ql_os.c =================================================================== --- stable/9/sys/dev/qlxgbe/ql_os.c (revision 273911) +++ stable/9/sys/dev/qlxgbe/ql_os.c (revision 273912) @@ -1,1705 +1,1705 @@ /* * Copyright (c) 2013-2014 Qlogic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * and ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: ql_os.c * Author : David C Somayajulu, Qlogic Corporation, Aliso Viejo, CA 92656. */ #include __FBSDID("$FreeBSD$"); #include "ql_os.h" #include "ql_hw.h" #include "ql_def.h" #include "ql_inline.h" #include "ql_ver.h" #include "ql_glbl.h" #include "ql_dbg.h" #include /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif #ifndef PCI_PRODUCT_QLOGIC_ISP8030 #define PCI_PRODUCT_QLOGIC_ISP8030 0x8030 #endif #define PCI_QLOGIC_ISP8030 \ ((PCI_PRODUCT_QLOGIC_ISP8030 << 16) | PCI_VENDOR_QLOGIC) /* * static functions */ static int qla_alloc_parent_dma_tag(qla_host_t *ha); static void qla_free_parent_dma_tag(qla_host_t *ha); static int qla_alloc_xmt_bufs(qla_host_t *ha); static void qla_free_xmt_bufs(qla_host_t *ha); static int qla_alloc_rcv_bufs(qla_host_t *ha); static void qla_free_rcv_bufs(qla_host_t *ha); static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb); static void qla_init_ifnet(device_t dev, qla_host_t *ha); static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS); static int qla_sysctl_get_link_status(SYSCTL_HANDLER_ARGS); static void qla_release(qla_host_t *ha); static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void qla_stop(qla_host_t *ha); static int qla_send(qla_host_t *ha, struct mbuf **m_headp); static void qla_tx_done(void *context, int pending); static void qla_get_peer(qla_host_t *ha); static void qla_error_recovery(void *context, int pending); /* * Hooks to the Operating Systems */ static int qla_pci_probe (device_t); static int qla_pci_attach (device_t); static int qla_pci_detach (device_t); static void qla_init(void *arg); static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qla_media_change(struct ifnet *ifp); static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static void qla_start(struct ifnet *ifp); static device_method_t qla_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qla_pci_probe), DEVMETHOD(device_attach, qla_pci_attach), DEVMETHOD(device_detach, qla_pci_detach), { 0, 0 } }; static driver_t qla_pci_driver = { "ql", qla_pci_methods, sizeof (qla_host_t), }; static devclass_t qla83xx_devclass; DRIVER_MODULE(qla83xx, pci, qla_pci_driver, qla83xx_devclass, 0, 0); MODULE_DEPEND(qla83xx, pci, 1, 1, 1); MODULE_DEPEND(qla83xx, ether, 1, 1, 1); MALLOC_DEFINE(M_QLA83XXBUF, "qla83xxbuf", "Buffers for qla83xx driver"); #define QL_STD_REPLENISH_THRES 0 #define QL_JUMBO_REPLENISH_THRES 32 static char dev_str[64]; /* * Name: qla_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qla_pci_probe(device_t dev) { switch ((pci_get_device(dev) << 16) | (pci_get_vendor(dev))) { case PCI_QLOGIC_ISP8030: snprintf(dev_str, sizeof(dev_str), "%s v%d.%d.%d", "Qlogic ISP 83xx PCI CNA Adapter-Ethernet Function", QLA_VERSION_MAJOR, QLA_VERSION_MINOR, QLA_VERSION_BUILD); device_set_desc(dev, dev_str); break; default: return (ENXIO); } if (bootverbose) printf("%s: %s\n ", __func__, dev_str); return (BUS_PROBE_DEFAULT); } static void qla_add_sysctls(qla_host_t *ha) { device_t dev = ha->pci_dev; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qla_sysctl_get_stats, "I", "Statistics"); SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLFLAG_RD, - &ha->fw_ver_str, 0, "firmware version"); + ha->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "link_status", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qla_sysctl_get_link_status, "I", "Link Status"); ha->dbg_level = 0; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &ha->dbg_level, ha->dbg_level, "Debug Level"); ha->std_replenish = QL_STD_REPLENISH_THRES; SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "std_replenish", CTLFLAG_RW, &ha->std_replenish, ha->std_replenish, "Threshold for Replenishing Standard Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ipv4_lro", CTLFLAG_RD, &ha->ipv4_lro, "number of ipv4 lro completions"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ipv6_lro", CTLFLAG_RD, &ha->ipv6_lro, "number of ipv6 lro completions"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "tx_tso_frames", CTLFLAG_RD, &ha->tx_tso_frames, "number of Tx TSO Frames"); SYSCTL_ADD_QUAD(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "hw_vlan_tx_frames", CTLFLAG_RD, &ha->hw_vlan_tx_frames, "number of Tx VLAN Frames"); return; } static void qla_watchdog(void *arg) { qla_host_t *ha = arg; qla_hw_t *hw; struct ifnet *ifp; uint32_t i; qla_hw_tx_cntxt_t *hw_tx_cntxt; hw = &ha->hw; ifp = ha->ifp; if (ha->flags.qla_watchdog_exit) { ha->qla_watchdog_exited = 1; return; } ha->qla_watchdog_exited = 0; if (!ha->flags.qla_watchdog_pause) { if (ql_hw_check_health(ha) || ha->qla_initiate_recovery || (ha->msg_from_peer == QL_PEER_MSG_RESET)) { ha->qla_watchdog_paused = 1; ha->flags.qla_watchdog_pause = 1; ha->qla_initiate_recovery = 0; ha->err_inject = 0; taskqueue_enqueue(ha->err_tq, &ha->err_task); } else { for (i = 0; i < ha->hw.num_tx_rings; i++) { hw_tx_cntxt = &hw->tx_cntxt[i]; if (qla_le32_to_host(*(hw_tx_cntxt->tx_cons)) != hw_tx_cntxt->txr_comp) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); break; } } if ((ifp->if_snd.ifq_head != NULL) && QL_RUNNING(ifp)) { taskqueue_enqueue(ha->tx_tq, &ha->tx_task); } ha->qla_watchdog_paused = 0; } } else { ha->qla_watchdog_paused = 1; } ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); } /* * Name: qla_pci_attach * Function: attaches the device to the operating system */ static int qla_pci_attach(device_t dev) { qla_host_t *ha = NULL; uint32_t rsrc_len; int i; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qla_host_t)); if (pci_get_device(dev) != PCI_PRODUCT_QLOGIC_ISP8030) { device_printf(dev, "device is not ISP8030\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; pci_enable_busmaster(dev); ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map any ports\n"); goto qla_pci_attach_err; } rsrc_len = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); mtx_init(&ha->hw_lock, "qla83xx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); mtx_init(&ha->tx_lock, "qla83xx_tx_lock", MTX_NETWORK_LOCK, MTX_DEF); qla_add_sysctls(ha); ql_hw_add_sysctls(ha); ha->flags.lock_init = 1; ha->reg_rid1 = PCIR_BAR(2); ha->pci_reg1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid1, RF_ACTIVE); ha->msix_count = pci_msix_count(dev); if (ha->msix_count < (ha->hw.num_sds_rings + 1)) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qla_pci_attach_err; } QL_DPRINT2(ha, (dev, "%s: ha %p pci_func 0x%x rsrc_count 0x%08x" " msix_count 0x%x pci_reg %p\n", __func__, ha, ha->pci_func, rsrc_len, ha->msix_count, ha->pci_reg)); ha->msix_count = ha->hw.num_sds_rings + 1; if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msi[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qla_pci_attach_err; } ha->mbx_irq_rid = 1; ha->mbx_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->mbx_irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->mbx_irq == NULL) { device_printf(dev, "could not allocate mbx interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->mbx_irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, ql_mbx_isr, ha, &ha->mbx_handle)) { device_printf(dev, "could not setup mbx interrupt\n"); goto qla_pci_attach_err; } for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->irq_vec[i].sds_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = 2 + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); goto qla_pci_attach_err; } if (bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, ql_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle)) { device_printf(dev, "could not setup interrupt\n"); goto qla_pci_attach_err; } } printf("%s: mp__ncpus %d sds %d rds %d msi-x %d\n", __func__, mp_ncpus, ha->hw.num_sds_rings, ha->hw.num_rds_rings, ha->msix_count); /* initialize hardware */ if (ql_init_hw(ha)) { device_printf(dev, "%s: ql_init_hw failed\n", __func__); goto qla_pci_attach_err; } device_printf(dev, "%s: firmware[%d.%d.%d.%d]\n", __func__, ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); snprintf(ha->fw_ver_str, sizeof(ha->fw_ver_str), "%d.%d.%d.%d", ha->fw_ver_major, ha->fw_ver_minor, ha->fw_ver_sub, ha->fw_ver_build); ql_read_mac_addr(ha); /* allocate parent dma tag */ if (qla_alloc_parent_dma_tag(ha)) { device_printf(dev, "%s: qla_alloc_parent_dma_tag failed\n", __func__); goto qla_pci_attach_err; } /* alloc all dma buffers */ if (ql_alloc_dma(ha)) { device_printf(dev, "%s: ql_alloc_dma failed\n", __func__); goto qla_pci_attach_err; } qla_get_peer(ha); /* create the o.s ethernet interface */ qla_init_ifnet(dev, ha); ha->flags.qla_watchdog_active = 1; ha->flags.qla_watchdog_pause = 1; TASK_INIT(&ha->tx_task, 0, qla_tx_done, ha); ha->tx_tq = taskqueue_create_fast("qla_txq", M_NOWAIT, taskqueue_thread_enqueue, &ha->tx_tq); taskqueue_start_threads(&ha->tx_tq, 1, PI_NET, "%s txq", device_get_nameunit(ha->pci_dev)); callout_init(&ha->tx_callout, TRUE); ha->flags.qla_callout_init = 1; /* create ioctl device interface */ if (ql_make_cdev(ha)) { device_printf(dev, "%s: ql_make_cdev failed\n", __func__); goto qla_pci_attach_err; } callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); TASK_INIT(&ha->err_task, 0, qla_error_recovery, ha); ha->err_tq = taskqueue_create_fast("qla_errq", M_NOWAIT, taskqueue_thread_enqueue, &ha->err_tq); taskqueue_start_threads(&ha->err_tq, 1, PI_NET, "%s errq", device_get_nameunit(ha->pci_dev)); QL_DPRINT2(ha, (dev, "%s: exit 0\n", __func__)); return (0); qla_pci_attach_err: qla_release(ha); QL_DPRINT2(ha, (dev, "%s: exit ENXIO\n", __func__)); return (ENXIO); } /* * Name: qla_pci_detach * Function: Unhooks the device from the operating system */ static int qla_pci_detach(device_t dev) { qla_host_t *ha = NULL; struct ifnet *ifp; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } ifp = ha->ifp; (void)QLA_LOCK(ha, __func__, 0); qla_stop(ha); QLA_UNLOCK(ha, __func__); qla_release(ha); QL_DPRINT2(ha, (dev, "%s: exit\n", __func__)); return (0); } /* * SYSCTL Related Callbacks */ static int qla_sysctl_get_stats(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; ql_get_stats(ha); } return (err); } static int qla_sysctl_get_link_status(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qla_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qla_host_t *)arg1; ql_hw_link_status(ha); } return (err); } /* * Name: qla_release * Function: Releases the resources allocated for the device */ static void qla_release(qla_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; if (ha->err_tq) { taskqueue_drain(ha->err_tq, &ha->err_task); taskqueue_free(ha->err_tq); } if (ha->tx_tq) { taskqueue_drain(ha->tx_tq, &ha->tx_task); taskqueue_free(ha->tx_tq); } ql_del_cdev(ha); if (ha->flags.qla_watchdog_active) { ha->flags.qla_watchdog_exit = 1; while (ha->qla_watchdog_exited == 0) qla_mdelay(__func__, 1); } if (ha->flags.qla_callout_init) callout_stop(&ha->tx_callout); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); ql_free_dma(ha); qla_free_parent_dma_tag(ha); if (ha->mbx_handle) (void)bus_teardown_intr(dev, ha->mbx_irq, ha->mbx_handle); if (ha->mbx_irq) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->mbx_irq_rid, ha->mbx_irq); for (i = 0; i < ha->hw.num_sds_rings; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } } if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->tx_lock); mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_reg1) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid1, ha->pci_reg1); } /* * DMA Related Functions */ static void qla_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } int ql_alloc_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { device_printf(dev, "%s: could not create dma tag\n", __func__); goto ql_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); device_printf(dev, "%s: bus_dmamem_alloc failed\n", __func__); goto ql_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qla_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto ql_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; ql_alloc_dmabuf_exit: QL_DPRINT2(ha, (dev, "%s: exit ret 0x%08x tag %p map %p b %p sz 0x%x\n", __func__, ret, (void *)dma_buf->dma_tag, (void *)dma_buf->dma_map, (void *)dma_buf->dma_b, dma_buf->size)); return ret; } void ql_free_dmabuf(qla_host_t *ha, qla_dma_t *dma_buf) { bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); } static int qla_alloc_parent_dma_tag(qla_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { device_printf(dev, "%s: could not create parent dma tag\n", __func__); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qla_free_parent_dma_tag(qla_host_t *ha) { if (ha->flags.parent_tag) { bus_dma_tag_destroy(ha->parent_tag); ha->flags.parent_tag = 0; } } /* * Name: qla_init_ifnet * Function: Creates the Network Device Interface and Registers it with the O.S */ static void qla_init_ifnet(device_t dev, qla_host_t *ha) { struct ifnet *ifp; QL_DPRINT2(ha, (dev, "%s: enter\n", __func__)); ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); #if __FreeBSD_version >= 1000000 if_initbaudrate(ifp, IF_Gbps(10)); ifp->if_capabilities = IFCAP_LINKSTATE; #else ifp->if_mtu = ETHERMTU; ifp->if_baudrate = (1 * 1000 * 1000 *1000); #endif /* #if __FreeBSD_version >= 1000000 */ ifp->if_init = qla_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qla_ioctl; ifp->if_start = qla_start; IFQ_SET_MAXLEN(&ifp->if_snd, qla_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qla_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ether_ifattach(ifp, qla_get_mac_addr(ha)); ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTSO; ifp->if_capenable = ifp->if_capabilities; ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qla_media_change, qla_media_status); ifmedia_add(&ha->media, (IFM_ETHER | qla_get_optics(ha) | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2(ha, (dev, "%s: exit\n", __func__)); return; } static void qla_init_locked(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; qla_stop(ha); if (qla_alloc_xmt_bufs(ha) != 0) return; if (qla_alloc_rcv_bufs(ha) != 0) return; bcopy(IF_LLADDR(ha->ifp), ha->hw.mac_addr, ETHER_ADDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; ha->flags.stop_rcv = 0; if (ql_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; ha->hw_vlan_tx_frames = 0; ha->tx_tso_frames = 0; } return; } static void qla_init(void *arg) { qla_host_t *ha; ha = (qla_host_t *)arg; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); (void)QLA_LOCK(ha, __func__, 0); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); } static int qla_set_multi(qla_host_t *ha, uint32_t add_multi) { uint8_t mta[Q8_MAX_NUM_MULTICAST_ADDRS * Q8_MAC_ADDR_LEN]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; int ret = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == Q8_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * Q8_MAC_ADDR_LEN], Q8_MAC_ADDR_LEN); mcnt++; } if_maddr_runlock(ifp); if (QLA_LOCK(ha, __func__, 1) == 0) { ret = ql_hw_set_multi(ha, mta, mcnt, add_multi); QLA_UNLOCK(ha, __func__); } return (ret); } static int qla_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFADDR (0x%lx)\n", __func__, cmd)); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { (void)QLA_LOCK(ha, __func__, 0); qla_init_locked(ha); QLA_UNLOCK(ha, __func__); } QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", __func__, cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr))); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFMTU (0x%lx)\n", __func__, cmd)); if (ifr->ifr_mtu > QLA_MAX_MTU) { ret = EINVAL; } else { (void) QLA_LOCK(ha, __func__, 0); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { ret = ql_set_max_mtu(ha, ha->max_frame_size, ha->hw.rcv_cntxt_id); } if (ifp->if_mtu > ETHERMTU) ha->std_replenish = QL_JUMBO_REPLENISH_THRES; else ha->std_replenish = QL_STD_REPLENISH_THRES; QLA_UNLOCK(ha, __func__); if (ret) ret = EINVAL; } break; case SIOCSIFFLAGS: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFFLAGS (0x%lx)\n", __func__, cmd)); (void)QLA_LOCK(ha, __func__, 0); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = ql_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = ql_set_allmulti(ha); } } else { qla_init_locked(ha); ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; ret = ql_set_max_mtu(ha, ha->max_frame_size, ha->hw.rcv_cntxt_id); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qla_stop(ha); ha->if_flags = ifp->if_flags; } QLA_UNLOCK(ha, __func__); break; case SIOCADDMULTI: QL_DPRINT4(ha, (ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCADDMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qla_set_multi(ha, 1)) ret = EINVAL; } break; case SIOCDELMULTI: QL_DPRINT4(ha, (ha->pci_dev, "%s: %s (0x%lx)\n", __func__, "SIOCDELMULTI", cmd)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qla_set_multi(ha, 0)) ret = EINVAL; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", __func__, cmd)); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4(ha, (ha->pci_dev, "%s: SIOCSIFCAP (0x%lx)\n", __func__, cmd)); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qla_init(ha); VLAN_CAPABILITIES(ifp); break; } default: QL_DPRINT4(ha, (ha->pci_dev, "%s: default (0x%lx)\n", __func__, cmd)); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qla_media_change(struct ifnet *ifp) { qla_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qla_host_t *ha; ha = (qla_host_t *)ifp->if_softc; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; ql_update_link_state(ha); if (ha->hw.link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qla_get_optics(ha)); } QL_DPRINT2(ha, (ha->pci_dev, "%s: exit (%s)\n", __func__,\ (ha->hw.link_up ? "link_up" : "link_down"))); return; } static void qla_start(struct ifnet *ifp) { struct mbuf *m_head; qla_host_t *ha = (qla_host_t *)ifp->if_softc; QL_DPRINT8(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (!mtx_trylock(&ha->tx_lock)) { QL_DPRINT8(ha, (ha->pci_dev, "%s: mtx_trylock(&ha->tx_lock) failed\n", __func__)); return; } if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { QL_DPRINT8(ha, (ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); QLA_TX_UNLOCK(ha); return; } if (!ha->watchdog_ticks) ql_update_link_state(ha); if (!ha->hw.link_up) { QL_DPRINT8(ha, (ha->pci_dev, "%s: link down\n", __func__)); QLA_TX_UNLOCK(ha); return; } while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) { QL_DPRINT8(ha, (ha->pci_dev, "%s: m_head == NULL\n", __func__)); break; } if (qla_send(ha, &m_head)) { if (m_head == NULL) break; QL_DPRINT8(ha, (ha->pci_dev, "%s: PREPEND\n", __func__)); ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } QLA_TX_UNLOCK(ha); QL_DPRINT8(ha, (ha->pci_dev, "%s: exit\n", __func__)); return; } static int qla_send(qla_host_t *ha, struct mbuf **m_headp) { bus_dma_segment_t segs[QLA_MAX_SEGMENTS]; bus_dmamap_t map; int nsegs; int ret = -1; uint32_t tx_idx; struct mbuf *m_head = *m_headp; uint32_t txr_idx = ha->txr_idx; QL_DPRINT8(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (m_head->m_flags & M_FLOWID) txr_idx = m_head->m_pkthdr.flowid & (ha->hw.num_tx_rings - 1); tx_idx = ha->hw.tx_cntxt[txr_idx].txr_next; map = ha->tx_ring[txr_idx].tx_buf[tx_idx].map; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ret == EFBIG) { struct mbuf *m; QL_DPRINT8(ha, (ha->pci_dev, "%s: EFBIG [%d]\n", __func__, m_head->m_pkthdr.len)); m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { ha->err_tx_defrag++; m_freem(m_head); *m_headp = NULL; device_printf(ha->pci_dev, "%s: m_defrag() = NULL [%d]\n", __func__, ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed0[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } } else if (ret) { ha->err_tx_dmamap_load++; device_printf(ha->pci_dev, "%s: bus_dmamap_load_mbuf_sg failed1[%d, %d]\n", __func__, ret, m_head->m_pkthdr.len); if (ret != ENOMEM) { m_freem(m_head); *m_headp = NULL; } return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qla_send: empty packet")); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); if (!(ret = ql_hw_send(ha, segs, nsegs, tx_idx, m_head, txr_idx))) { ha->tx_ring[txr_idx].count++; ha->tx_ring[txr_idx].tx_buf[tx_idx].m_head = m_head; } else { if (ret == EINVAL) { if (m_head) m_freem(m_head); *m_headp = NULL; } } QL_DPRINT8(ha, (ha->pci_dev, "%s: exit\n", __func__)); return (ret); } static void qla_stop(qla_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); ha->flags.qla_watchdog_pause = 1; while (!ha->qla_watchdog_paused) qla_mdelay(__func__, 1); ha->flags.stop_rcv = 1; ql_hw_stop_rcv(ha); ql_del_hw_if(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); return; } /* * Buffer Management Functions for Transmit and Receive Rings */ static int qla_alloc_xmt_bufs(qla_host_t *ha) { int ret = 0; uint32_t i, j; qla_tx_buf_t *txb; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLA_MAX_TSO_FRAME_SIZE, /* maxsize */ QLA_MAX_SEGMENTS, /* nsegments */ PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { device_printf(ha->pci_dev, "%s: tx_tag alloc failed\n", __func__); return (ENOMEM); } for (i = 0; i < ha->hw.num_tx_rings; i++) { bzero((void *)ha->tx_ring[i].tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); } for (j = 0; j < ha->hw.num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) { txb = &ha->tx_ring[j].tx_buf[i]; if ((ret = bus_dmamap_create(ha->tx_tag, BUS_DMA_NOWAIT, &txb->map))) { ha->err_tx_dmamap_create++; device_printf(ha->pci_dev, "%s: bus_dmamap_create failed[%d]\n", __func__, ret); qla_free_xmt_bufs(ha); return (ret); } } } return 0; } /* * Release mbuf after it sent on the wire */ static void qla_clear_tx_buf(qla_host_t *ha, qla_tx_buf_t *txb) { QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); if (txb->m_head && txb->map) { bus_dmamap_unload(ha->tx_tag, txb->map); m_freem(txb->m_head); txb->m_head = NULL; } if (txb->map) bus_dmamap_destroy(ha->tx_tag, txb->map); QL_DPRINT2(ha, (ha->pci_dev, "%s: exit\n", __func__)); } static void qla_free_xmt_bufs(qla_host_t *ha) { int i, j; for (j = 0; j < ha->hw.num_tx_rings; j++) { for (i = 0; i < NUM_TX_DESCRIPTORS; i++) qla_clear_tx_buf(ha, &ha->tx_ring[j].tx_buf[i]); } if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } for (i = 0; i < ha->hw.num_tx_rings; i++) { bzero((void *)ha->tx_ring[i].tx_buf, (sizeof(qla_tx_buf_t) * NUM_TX_DESCRIPTORS)); } return; } static int qla_alloc_rcv_std(qla_host_t *ha) { int i, j, k, r, ret = 0; qla_rx_buf_t *rxb; qla_rx_ring_t *rx_ring; for (r = 0; r < ha->hw.num_rds_rings; r++) { rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; ret = bus_dmamap_create(ha->rx_tag, BUS_DMA_NOWAIT, &rxb->map); if (ret) { device_printf(ha->pci_dev, "%s: dmamap[%d, %d] failed\n", __func__, r, i); for (k = 0; k < r; k++) { for (j = 0; j < NUM_RX_DESCRIPTORS; j++) { rxb = &ha->rx_ring[k].rx_buf[j]; bus_dmamap_destroy(ha->rx_tag, rxb->map); } } for (j = 0; j < i; j++) { bus_dmamap_destroy(ha->rx_tag, rx_ring->rx_buf[j].map); } goto qla_alloc_rcv_std_err; } } } qla_init_hw_rcv_descriptors(ha); for (r = 0; r < ha->hw.num_rds_rings; r++) { rx_ring = &ha->rx_ring[r]; for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &rx_ring->rx_buf[i]; rxb->handle = i; if (!(ret = ql_get_mbuf(ha, rxb, NULL))) { /* * set the physical address in the * corresponding descriptor entry in the * receive ring/queue for the hba */ qla_set_hw_rcv_desc(ha, r, i, rxb->handle, rxb->paddr, (rxb->m_head)->m_pkthdr.len); } else { device_printf(ha->pci_dev, "%s: ql_get_mbuf [%d, %d] failed\n", __func__, r, i); bus_dmamap_destroy(ha->rx_tag, rxb->map); goto qla_alloc_rcv_std_err; } } } return 0; qla_alloc_rcv_std_err: return (-1); } static void qla_free_rcv_std(qla_host_t *ha) { int i, r; qla_rx_buf_t *rxb; for (r = 0; r < ha->hw.num_rds_rings; r++) { for (i = 0; i < NUM_RX_DESCRIPTORS; i++) { rxb = &ha->rx_ring[r].rx_buf[i]; if (rxb->m_head != NULL) { bus_dmamap_unload(ha->rx_tag, rxb->map); bus_dmamap_destroy(ha->rx_tag, rxb->map); m_freem(rxb->m_head); rxb->m_head = NULL; } } } return; } static int qla_alloc_rcv_bufs(qla_host_t *ha) { int i, ret = 0; if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { device_printf(ha->pci_dev, "%s: rx_tag alloc failed\n", __func__); return (ENOMEM); } bzero((void *)ha->rx_ring, (sizeof(qla_rx_ring_t) * MAX_RDS_RINGS)); for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; } ret = qla_alloc_rcv_std(ha); return (ret); } static void qla_free_rcv_bufs(qla_host_t *ha) { int i; qla_free_rcv_std(ha); if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } bzero((void *)ha->rx_ring, (sizeof(qla_rx_ring_t) * MAX_RDS_RINGS)); for (i = 0; i < ha->hw.num_sds_rings; i++) { ha->hw.sds[i].sdsr_next = 0; ha->hw.sds[i].rxb_free = NULL; ha->hw.sds[i].rx_free = 0; } return; } int ql_get_mbuf(qla_host_t *ha, qla_rx_buf_t *rxb, struct mbuf *nmp) { register struct mbuf *mp = nmp; struct ifnet *ifp; int ret = 0; uint32_t offset; bus_dma_segment_t segs[1]; int nsegs; QL_DPRINT2(ha, (ha->pci_dev, "%s: enter\n", __func__)); ifp = ha->ifp; if (mp == NULL) { mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { ha->err_m_getcl++; ret = ENOBUFS; device_printf(ha->pci_dev, "%s: m_getcl failed\n", __func__); goto exit_ql_get_mbuf; } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } offset = (uint32_t)((unsigned long long)mp->m_data & 0x7ULL); if (offset) { offset = 8 - offset; m_adj(mp, offset); } /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, rxb->map, mp, segs, &nsegs, BUS_DMA_NOWAIT); rxb->paddr = segs[0].ds_addr; if (ret || !rxb->paddr || (nsegs != 1)) { m_free(mp); rxb->m_head = NULL; device_printf(ha->pci_dev, "%s: bus_dmamap_load failed[%d, 0x%016llx, %d]\n", __func__, ret, (long long unsigned int)rxb->paddr, nsegs); ret = -1; goto exit_ql_get_mbuf; } rxb->m_head = mp; bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_PREREAD); exit_ql_get_mbuf: QL_DPRINT2(ha, (ha->pci_dev, "%s: exit ret = 0x%08x\n", __func__, ret)); return (ret); } static void qla_tx_done(void *context, int pending) { qla_host_t *ha = context; struct ifnet *ifp; ifp = ha->ifp; if (!ifp) return; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QL_DPRINT8(ha, (ha->pci_dev, "%s: !IFF_DRV_RUNNING\n", __func__)); return; } ql_hw_tx_done(ha); qla_start(ha->ifp); } static void qla_get_peer(qla_host_t *ha) { device_t *peers; int count, i, slot; int my_slot = pci_get_slot(ha->pci_dev); if (device_get_children(device_get_parent(ha->pci_dev), &peers, &count)) return; for (i = 0; i < count; i++) { slot = pci_get_slot(peers[i]); if ((slot >= 0) && (slot == my_slot) && (pci_get_device(peers[i]) == pci_get_device(ha->pci_dev))) { if (ha->pci_dev != peers[i]) ha->peer_dev = peers[i]; } } } static void qla_send_msg_to_peer(qla_host_t *ha, uint32_t msg_to_peer) { qla_host_t *ha_peer; if (ha->peer_dev) { if ((ha_peer = device_get_softc(ha->peer_dev)) != NULL) { ha_peer->msg_from_peer = msg_to_peer; } } } static void qla_error_recovery(void *context, int pending) { qla_host_t *ha = context; uint32_t msecs_100 = 100; struct ifnet *ifp = ha->ifp; (void)QLA_LOCK(ha, __func__, 0); ha->flags.stop_rcv = 1; ql_hw_stop_rcv(ha); ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); QLA_UNLOCK(ha, __func__); if ((ha->pci_func & 0x1) == 0) { if (!ha->msg_from_peer) { qla_send_msg_to_peer(ha, QL_PEER_MSG_RESET); while ((ha->msg_from_peer != QL_PEER_MSG_ACK) && msecs_100--) qla_mdelay(__func__, 100); } ha->msg_from_peer = 0; ql_minidump(ha); (void) ql_init_hw(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); qla_send_msg_to_peer(ha, QL_PEER_MSG_ACK); } else { if (ha->msg_from_peer == QL_PEER_MSG_RESET) { ha->msg_from_peer = 0; qla_send_msg_to_peer(ha, QL_PEER_MSG_ACK); } else { qla_send_msg_to_peer(ha, QL_PEER_MSG_RESET); } while ((ha->msg_from_peer != QL_PEER_MSG_ACK) && msecs_100--) qla_mdelay(__func__, 100); ha->msg_from_peer = 0; (void) ql_init_hw(ha); qla_free_xmt_bufs(ha); qla_free_rcv_bufs(ha); } (void)QLA_LOCK(ha, __func__, 0); if (qla_alloc_xmt_bufs(ha) != 0) { QLA_UNLOCK(ha, __func__); return; } if (qla_alloc_rcv_bufs(ha) != 0) { QLA_UNLOCK(ha, __func__); return; } ha->flags.stop_rcv = 0; if (ql_init_hw_if(ha) == 0) { ifp = ha->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ha->flags.qla_watchdog_pause = 0; } QLA_UNLOCK(ha, __func__); } Index: stable/9/sys/dev/rt/if_rt.c =================================================================== --- stable/9/sys/dev/rt/if_rt.c (revision 273911) +++ stable/9/sys/dev/rt/if_rt.c (revision 273912) @@ -1,2613 +1,2616 @@ /*- * Copyright (c) 2011, Aleksandr Rybalko * based on hard work * by Alexander Egorenkov * and by Damien Bergamini * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "if_rtvar.h" #include "if_rtreg.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IF_RT_PHY_SUPPORT #include "miibus_if.h" #endif /* * Defines and macros */ #define RT_MAX_AGG_SIZE 3840 #define RT_TX_DATA_SEG0_SIZE MJUMPAGESIZE #define RT_MS(_v, _f) (((_v) & _f) >> _f##_S) #define RT_SM(_v, _f) (((_v) << _f##_S) & _f) #define RT_TX_WATCHDOG_TIMEOUT 5 /* * Static function prototypes */ static int rt_probe(device_t dev); static int rt_attach(device_t dev); static int rt_detach(device_t dev); static int rt_shutdown(device_t dev); static int rt_suspend(device_t dev); static int rt_resume(device_t dev); static void rt_init_locked(void *priv); static void rt_init(void *priv); static void rt_stop_locked(void *priv); static void rt_stop(void *priv); static void rt_start(struct ifnet *ifp); static int rt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static void rt_periodic(void *arg); static void rt_tx_watchdog(void *arg); static void rt_intr(void *arg); static void rt_tx_coherent_intr(struct rt_softc *sc); static void rt_rx_coherent_intr(struct rt_softc *sc); static void rt_rx_delay_intr(struct rt_softc *sc); static void rt_tx_delay_intr(struct rt_softc *sc); static void rt_rx_intr(struct rt_softc *sc); static void rt_tx_intr(struct rt_softc *sc, int qid); static void rt_rx_done_task(void *context, int pending); static void rt_tx_done_task(void *context, int pending); static void rt_periodic_task(void *context, int pending); static int rt_rx_eof(struct rt_softc *sc, int limit); static void rt_tx_eof(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_update_stats(struct rt_softc *sc); static void rt_watchdog(struct rt_softc *sc); static void rt_update_raw_counters(struct rt_softc *sc); static void rt_intr_enable(struct rt_softc *sc, uint32_t intr_mask); static void rt_intr_disable(struct rt_softc *sc, uint32_t intr_mask); static int rt_txrx_enable(struct rt_softc *sc); static int rt_alloc_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static void rt_reset_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static void rt_free_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static int rt_alloc_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring, int qid); static void rt_reset_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_free_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void rt_sysctl_attach(struct rt_softc *sc); #ifdef IF_RT_PHY_SUPPORT void rt_miibus_statchg(device_t); static int rt_miibus_readreg(device_t, int, int); static int rt_miibus_writereg(device_t, int, int, int); #endif static int rt_ifmedia_upd(struct ifnet *); static void rt_ifmedia_sts(struct ifnet *, struct ifmediareq *); static SYSCTL_NODE(_hw, OID_AUTO, rt, CTLFLAG_RD, 0, "RT driver parameters"); #ifdef IF_RT_DEBUG static int rt_debug = 0; SYSCTL_INT(_hw_rt, OID_AUTO, debug, CTLFLAG_RW, &rt_debug, 0, "RT debug level"); TUNABLE_INT("hw.rt.debug", &rt_debug); #endif static int rt_probe(device_t dev) { device_set_desc(dev, "Ralink RT305XF onChip Ethernet MAC"); return (0); } /* * macaddr_atoi - translate string MAC address to uint8_t array */ static int macaddr_atoi(const char *str, uint8_t *mac) { int count, i; unsigned int amac[ETHER_ADDR_LEN]; /* Aligned version */ count = sscanf(str, "%x%*c%x%*c%x%*c%x%*c%x%*c%x", &amac[0], &amac[1], &amac[2], &amac[3], &amac[4], &amac[5]); if (count < ETHER_ADDR_LEN) { memset(mac, 0, ETHER_ADDR_LEN); return (1); } /* Copy aligned to result */ for (i = 0; i < ETHER_ADDR_LEN; i ++) mac[i] = (amac[i] & 0xff); return (0); } #ifdef USE_GENERATED_MAC_ADDRESS static char * kernenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } /* * generate_mac(uin8_t *mac) * This is MAC address generator for cases when real device MAC address * unknown or not yet accessible. * Use 'b','s','d' signature and 3 octets from CRC32 on kenv. * MAC = 'b', 's', 'd', CRC[3]^CRC[2], CRC[1], CRC[0] * * Output - MAC address, that do not change between reboots, if hints or * bootloader info unchange. */ static void generate_mac(uint8_t *mac) { unsigned char *cp; int i = 0; uint32_t crc = 0xffffffff; /* Generate CRC32 on kenv */ if (dynamic_kenv) { for (cp = kenvp[0]; cp != NULL; cp = kenvp[++i]) { crc = calculate_crc32c(crc, cp, strlen(cp) + 1); } } else { for (cp = kern_envp; cp != NULL; cp = kernenv_next(cp)) { crc = calculate_crc32c(crc, cp, strlen(cp) + 1); } } crc = ~crc; mac[0] = 'b'; mac[1] = 's'; mac[2] = 'd'; mac[3] = (crc >> 24) ^ ((crc >> 16) & 0xff); mac[4] = (crc >> 8) & 0xff; mac[5] = crc & 0xff; } #endif /* * ether_request_mac - try to find usable MAC address. */ static int ether_request_mac(device_t dev, uint8_t *mac) { char *var; /* * "ethaddr" is passed via envp on RedBoot platforms * "kmac" is passed via argv on RouterBOOT platforms */ #if defined(__U_BOOT__) || defined(__REDBOOT__) || defined(__ROUTERBOOT__) if ((var = getenv("ethaddr")) != NULL || (var = getenv("kmac")) != NULL ) { if(!macaddr_atoi(var, mac)) { printf("%s: use %s macaddr from KENV\n", device_get_nameunit(dev), var); freeenv(var); return (0); } freeenv(var); } #endif /* * Try from hints * hint.[dev].[unit].macaddr */ if (!resource_string_value(device_get_name(dev), device_get_unit(dev), "macaddr", (const char **)&var)) { if(!macaddr_atoi(var, mac)) { printf("%s: use %s macaddr from hints\n", device_get_nameunit(dev), var); return (0); } } #ifdef USE_GENERATED_MAC_ADDRESS generate_mac(mac); device_printf(dev, "use generated %02x:%02x:%02x:%02x:%02x:%02x " "macaddr\n", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); #else /* Hardcoded */ mac[0] = 0x00; mac[1] = 0x18; mac[2] = 0xe7; mac[3] = 0xd5; mac[4] = 0x83; mac[5] = 0x90; device_printf(dev, "use hardcoded 00:18:e7:d5:83:90 macaddr\n"); #endif return (0); } static int rt_attach(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; int error, i; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->lock, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE); sc->mem_rid = 0; sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, RF_ACTIVE); if (sc->mem == NULL) { device_printf(dev, "could not allocate memory resource\n"); error = ENXIO; goto fail; } sc->bst = rman_get_bustag(sc->mem); sc->bsh = rman_get_bushandle(sc->mem); sc->irq_rid = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid, RF_ACTIVE); if (sc->irq == NULL) { device_printf(dev, "could not allocate interrupt resource\n"); error = ENXIO; goto fail; } #ifdef IF_RT_DEBUG sc->debug = rt_debug; SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "rt debug level"); #endif device_printf(dev, "RT305XF Ethernet MAC (rev 0x%08x)\n", sc->mac_rev); /* Reset hardware */ RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); RT_WRITE(sc, GDMA1_BASE + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* Forward UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* Forward BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* Forward MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* Forward Other to CPU */ )); /* allocate Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { error = rt_alloc_tx_ring(sc, &sc->tx_ring[i], i); if (error != 0) { device_printf(dev, "could not allocate Tx ring #%d\n", i); goto fail; } } sc->tx_ring_mgtqid = 5; error = rt_alloc_rx_ring(sc, &sc->rx_ring); if (error != 0) { device_printf(dev, "could not allocate Rx ring\n"); goto fail; } callout_init(&sc->periodic_ch, 0); callout_init_mtx(&sc->tx_watchdog_ch, &sc->lock, 0); ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "could not if_alloc()\n"); error = ENOMEM; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = rt_init; ifp->if_ioctl = rt_ioctl; ifp->if_start = rt_start; ifp->if_mtu = ETHERMTU; #define RT_TX_QLEN 256 IFQ_SET_MAXLEN(&ifp->if_snd, RT_TX_QLEN); ifp->if_snd.ifq_drv_maxlen = RT_TX_QLEN; IFQ_SET_READY(&ifp->if_snd); #ifdef IF_RT_PHY_SUPPORT error = mii_attach(dev, &sc->rt_miibus, ifp, rt_ifmedia_upd, rt_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); error = ENXIO; goto fail; } #else ifmedia_init(&sc->rt_ifmedia, 0, rt_ifmedia_upd, rt_ifmedia_sts); ifmedia_add(&sc->rt_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); ifmedia_set(&sc->rt_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX); #endif /* IF_RT_PHY_SUPPORT */ ether_request_mac(dev, sc->mac_addr); ether_ifattach(ifp, sc->mac_addr); /* * Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_RXCSUM|IFCAP_TXCSUM; ifp->if_capenable |= IFCAP_RXCSUM|IFCAP_TXCSUM; /* init task queue */ TASK_INIT(&sc->rx_done_task, 0, rt_rx_done_task, sc); TASK_INIT(&sc->tx_done_task, 0, rt_tx_done_task, sc); TASK_INIT(&sc->periodic_task, 0, rt_periodic_task, sc); sc->rx_process_limit = 100; sc->taskqueue = taskqueue_create("rt_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->taskqueue); taskqueue_start_threads(&sc->taskqueue, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); rt_sysctl_attach(sc); /* set up interrupt */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, rt_intr, sc, &sc->irqh); if (error != 0) { printf("%s: could not set up interrupt\n", device_get_nameunit(dev)); goto fail; } #ifdef IF_RT_DEBUG device_printf(dev, "debug var at %#08x\n", (u_int)&(sc->debug)); #endif return (0); fail: /* free Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_free_tx_ring(sc, &sc->tx_ring[i]); rt_free_rx_ring(sc, &sc->rx_ring); mtx_destroy(&sc->lock); if (sc->mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem); if (sc->irq != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq); return (error); } /* * Set media options. */ static int rt_ifmedia_upd(struct ifnet *ifp) { struct rt_softc *sc; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; int error = 0; sc = ifp->if_softc; RT_SOFTC_LOCK(sc); mii = device_get_softc(sc->rt_miibus); if (mii->mii_instance) { struct mii_softc *miisc; for (miisc = LIST_FIRST(&mii->mii_phys); miisc != NULL; miisc = LIST_NEXT(miisc, mii_list)) mii_phy_reset(miisc); } if (mii) error = mii_mediachg(mii); RT_SOFTC_UNLOCK(sc); return (error); #else /* !IF_RT_PHY_SUPPORT */ struct ifmedia *ifm; struct ifmedia_entry *ife; sc = ifp->if_softc; ifm = &sc->rt_ifmedia; ife = ifm->ifm_cur; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) { device_printf(sc->dev, "AUTO is not supported for multiphy MAC"); return (EINVAL); } /* * Ignore everything */ return (0); #endif /* IF_RT_PHY_SUPPORT */ } /* * Report current media status. */ static void rt_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { #ifdef IF_RT_PHY_SUPPORT struct rt_softc *sc; struct mii_data *mii; sc = ifp->if_softc; RT_SOFTC_LOCK(sc); mii = device_get_softc(sc->rt_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; RT_SOFTC_UNLOCK(sc); #else /* !IF_RT_PHY_SUPPORT */ ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; #endif /* IF_RT_PHY_SUPPORT */ } static int rt_detach(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; int i; sc = device_get_softc(dev); ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "detaching\n"); RT_SOFTC_LOCK(sc); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->periodic_ch); callout_stop(&sc->tx_watchdog_ch); taskqueue_drain(sc->taskqueue, &sc->rx_done_task); taskqueue_drain(sc->taskqueue, &sc->tx_done_task); taskqueue_drain(sc->taskqueue, &sc->periodic_task); /* free Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_free_tx_ring(sc, &sc->tx_ring[i]); rt_free_rx_ring(sc, &sc->rx_ring); RT_SOFTC_UNLOCK(sc); #ifdef IF_RT_PHY_SUPPORT if (sc->rt_miibus != NULL) device_delete_child(dev, sc->rt_miibus); #endif ether_ifdetach(ifp); if_free(ifp); taskqueue_free(sc->taskqueue); mtx_destroy(&sc->lock); bus_generic_detach(dev); bus_teardown_intr(dev, sc->irq, sc->irqh); bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq); bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem); return (0); } static int rt_shutdown(device_t dev) { struct rt_softc *sc; sc = device_get_softc(dev); RT_DPRINTF(sc, RT_DEBUG_ANY, "shutting down\n"); rt_stop(sc); return (0); } static int rt_suspend(device_t dev) { struct rt_softc *sc; sc = device_get_softc(dev); RT_DPRINTF(sc, RT_DEBUG_ANY, "suspending\n"); rt_stop(sc); return (0); } static int rt_resume(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "resuming\n"); if (ifp->if_flags & IFF_UP) rt_init(sc); return (0); } /* * rt_init_locked - Run initialization process having locked mtx. */ static void rt_init_locked(void *priv) { struct rt_softc *sc; struct ifnet *ifp; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; #endif int i, ntries; uint32_t tmp; sc = priv; ifp = sc->ifp; #ifdef IF_RT_PHY_SUPPORT mii = device_get_softc(sc->rt_miibus); #endif RT_DPRINTF(sc, RT_DEBUG_ANY, "initializing\n"); RT_SOFTC_ASSERT_LOCKED(sc); /* hardware reset */ RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); rt305x_sysctl_set(SYSCTL_RSTCTRL, SYSCTL_RSTCTRL_FRENG); /* Fwd to CPU (uni|broad|multi)cast and Unknown */ RT_WRITE(sc, GDMA1_BASE + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* Forward UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* Forward BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* Forward MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* Forward Other to CPU */ )); /* disable DMA engine */ RT_WRITE(sc, PDMA_BASE + PDMA_GLO_CFG, 0); RT_WRITE(sc, PDMA_BASE + PDMA_RST_IDX, 0xffffffff); /* wait while DMA engine is busy */ for (ntries = 0; ntries < 100; ntries++) { tmp = RT_READ(sc, PDMA_BASE + PDMA_GLO_CFG); if (!(tmp & (FE_TX_DMA_BUSY | FE_RX_DMA_BUSY))) break; DELAY(1000); } if (ntries == 100) { device_printf(sc->dev, "timeout waiting for DMA engine\n"); goto fail; } /* reset Rx and Tx rings */ tmp = FE_RST_DRX_IDX0 | FE_RST_DTX_IDX3 | FE_RST_DTX_IDX2 | FE_RST_DTX_IDX1 | FE_RST_DTX_IDX0; RT_WRITE(sc, PDMA_BASE + PDMA_RST_IDX, tmp); /* XXX switch set mac address */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_reset_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { /* update TX_BASE_PTRx */ RT_WRITE(sc, PDMA_BASE + TX_BASE_PTR(i), sc->tx_ring[i].desc_phys_addr); RT_WRITE(sc, PDMA_BASE + TX_MAX_CNT(i), RT_SOFTC_TX_RING_DESC_COUNT); RT_WRITE(sc, PDMA_BASE + TX_CTX_IDX(i), 0); } /* init Rx ring */ rt_reset_rx_ring(sc, &sc->rx_ring); /* update RX_BASE_PTR0 */ RT_WRITE(sc, PDMA_BASE + RX_BASE_PTR0, sc->rx_ring.desc_phys_addr); RT_WRITE(sc, PDMA_BASE + RX_MAX_CNT0, RT_SOFTC_RX_RING_DATA_COUNT); RT_WRITE(sc, PDMA_BASE + RX_CALC_IDX0, RT_SOFTC_RX_RING_DATA_COUNT - 1); /* write back DDONE, 16byte burst enable RX/TX DMA */ RT_WRITE(sc, PDMA_BASE + PDMA_GLO_CFG, FE_TX_WB_DDONE | FE_DMA_BT_SIZE16 | FE_RX_DMA_EN | FE_TX_DMA_EN); /* disable interrupts mitigation */ RT_WRITE(sc, PDMA_BASE + DELAY_INT_CFG, 0); /* clear pending interrupts */ RT_WRITE(sc, GE_PORT_BASE + FE_INT_STATUS, 0xffffffff); /* enable interrupts */ tmp = CNT_PPE_AF | CNT_GDM_AF | PSE_P2_FC | GDM_CRC_DROP | PSE_BUF_DROP | GDM_OTHER_DROP | PSE_P1_FC | PSE_P0_FC | PSE_FQ_EMPTY | INT_TX_COHERENT | INT_RX_COHERENT | INT_TXQ3_DONE | INT_TXQ2_DONE | INT_TXQ1_DONE | INT_TXQ0_DONE | INT_RX_DONE; sc->intr_enable_mask = tmp; RT_WRITE(sc, GE_PORT_BASE + FE_INT_ENABLE, tmp); if (rt_txrx_enable(sc) != 0) goto fail; #ifdef IF_RT_PHY_SUPPORT if (mii) mii_mediachg(mii); #endif /* IF_RT_PHY_SUPPORT */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->periodic_round = 0; callout_reset(&sc->periodic_ch, hz / 10, rt_periodic, sc); return; fail: rt_stop_locked(sc); } /* * rt_init - lock and initialize device. */ static void rt_init(void *priv) { struct rt_softc *sc; sc = priv; RT_SOFTC_LOCK(sc); rt_init_locked(sc); RT_SOFTC_UNLOCK(sc); } /* * rt_stop_locked - stop TX/RX w/ lock */ static void rt_stop_locked(void *priv) { struct rt_softc *sc; struct ifnet *ifp; sc = priv; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "stopping\n"); RT_SOFTC_ASSERT_LOCKED(sc); sc->tx_timer = 0; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->periodic_ch); callout_stop(&sc->tx_watchdog_ch); RT_SOFTC_UNLOCK(sc); taskqueue_block(sc->taskqueue); /* * Sometime rt_stop_locked called from isr and we get panic * When found, I fix it */ #ifdef notyet taskqueue_drain(sc->taskqueue, &sc->rx_done_task); taskqueue_drain(sc->taskqueue, &sc->tx_done_task); taskqueue_drain(sc->taskqueue, &sc->periodic_task); #endif RT_SOFTC_LOCK(sc); /* disable interrupts */ RT_WRITE(sc, GE_PORT_BASE + FE_INT_ENABLE, 0); /* reset adapter */ RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); RT_WRITE(sc, GDMA1_BASE + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* Forward UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* Forward BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* Forward MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* Forward Other to CPU */ )); } static void rt_stop(void *priv) { struct rt_softc *sc; sc = priv; RT_SOFTC_LOCK(sc); rt_stop_locked(sc); RT_SOFTC_UNLOCK(sc); } /* * rt_tx_data - transmit packet. */ static int rt_tx_data(struct rt_softc *sc, struct mbuf *m, int qid) { struct ifnet *ifp; struct rt_softc_tx_ring *ring; struct rt_softc_tx_data *data; struct rt_txdesc *desc; struct mbuf *m_d; bus_dma_segment_t dma_seg[RT_SOFTC_MAX_SCATTER]; int error, ndmasegs, ndescs, i; KASSERT(qid >= 0 && qid < RT_SOFTC_TX_RING_COUNT, ("%s: Tx data: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_SOFTC_TX_RING_ASSERT_LOCKED(&sc->tx_ring[qid]); ifp = sc->ifp; ring = &sc->tx_ring[qid]; desc = &ring->desc[ring->desc_cur]; data = &ring->data[ring->data_cur]; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, m, dma_seg, &ndmasegs, 0); if (error != 0) { /* too many fragments, linearize */ RT_DPRINTF(sc, RT_DEBUG_TX, "could not load mbuf DMA map, trying to linearize " "mbuf: ndmasegs=%d, len=%d, error=%d\n", ndmasegs, m->m_pkthdr.len, error); m_d = m_collapse(m, M_NOWAIT, 16); if (m_d == NULL) { m_freem(m); m = NULL; return (ENOMEM); } m = m_d; sc->tx_defrag_packets++; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, m, dma_seg, &ndmasegs, 0); if (error != 0) { device_printf(sc->dev, "could not load mbuf DMA map: " "ndmasegs=%d, len=%d, error=%d\n", ndmasegs, m->m_pkthdr.len, error); m_freem(m); return (error); } } if (m->m_pkthdr.len == 0) ndmasegs = 0; /* determine how many Tx descs are required */ ndescs = 1 + ndmasegs / 2; if ((ring->desc_queued + ndescs) > (RT_SOFTC_TX_RING_DESC_COUNT - 2)) { RT_DPRINTF(sc, RT_DEBUG_TX, "there are not enough Tx descs\n"); sc->no_tx_desc_avail++; bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(m); return (EFBIG); } data->m = m; /* set up Tx descs */ for (i = 0; i < ndmasegs; i += 2) { /* Set destenation */ desc->dst = (TXDSCR_DST_PORT_GDMA1); if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) desc->dst |= (TXDSCR_IP_CSUM_GEN|TXDSCR_UDP_CSUM_GEN| TXDSCR_TCP_CSUM_GEN); /* Set queue id */ desc->qn = qid; /* No PPPoE */ desc->pppoe = 0; /* No VLAN */ desc->vid = 0; desc->sdp0 = htole32(dma_seg[i].ds_addr); desc->sdl0 = htole16(dma_seg[i].ds_len | ( ((i+1) == ndmasegs )?RT_TXDESC_SDL0_LASTSEG:0 )); if ((i+1) < ndmasegs) { desc->sdp1 = htole32(dma_seg[i+1].ds_addr); desc->sdl1 = htole16(dma_seg[i+1].ds_len | ( ((i+2) == ndmasegs )?RT_TXDESC_SDL1_LASTSEG:0 )); } else { desc->sdp1 = 0; desc->sdl1 = 0; } if ((i+2) < ndmasegs) { ring->desc_queued++; ring->desc_cur = (ring->desc_cur + 1) % RT_SOFTC_TX_RING_DESC_COUNT; } desc = &ring->desc[ring->desc_cur]; } RT_DPRINTF(sc, RT_DEBUG_TX, "sending data: len=%d, ndmasegs=%d, " "DMA ds_len=%d/%d/%d/%d/%d\n", m->m_pkthdr.len, ndmasegs, (int) dma_seg[0].ds_len, (int) dma_seg[1].ds_len, (int) dma_seg[2].ds_len, (int) dma_seg[3].ds_len, (int) dma_seg[4].ds_len); bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREWRITE); ring->desc_queued++; ring->desc_cur = (ring->desc_cur + 1) % RT_SOFTC_TX_RING_DESC_COUNT; ring->data_queued++; ring->data_cur = (ring->data_cur + 1) % RT_SOFTC_TX_RING_DATA_COUNT; /* kick Tx */ RT_WRITE(sc, PDMA_BASE + TX_CTX_IDX(qid), ring->desc_cur); return (0); } /* * rt_start - start Transmit/Receive */ static void rt_start(struct ifnet *ifp) { struct rt_softc *sc; struct mbuf *m; int qid = 0 /* XXX must check QoS priority */; sc = ifp->if_softc; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; for (;;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; m->m_pkthdr.rcvif = NULL; RT_SOFTC_TX_RING_LOCK(&sc->tx_ring[qid]); if (sc->tx_ring[qid].data_queued >= RT_SOFTC_TX_RING_DATA_COUNT) { RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); RT_DPRINTF(sc, RT_DEBUG_TX, "if_start: Tx ring with qid=%d is full\n", qid); m_freem(m); ifp->if_drv_flags |= IFF_DRV_OACTIVE; ifp->if_oerrors++; sc->tx_data_queue_full[qid]++; break; } if (rt_tx_data(sc, m, qid) != 0) { RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); ifp->if_oerrors++; break; } RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); sc->tx_timer = RT_TX_WATCHDOG_TIMEOUT; callout_reset(&sc->tx_watchdog_ch, hz, rt_tx_watchdog, sc); } } /* * rt_update_promisc - set/clear promiscuous mode. Unused yet, because * filtering done by attached Ethernet switch. */ static void rt_update_promisc(struct ifnet *ifp) { struct rt_softc *sc; sc = ifp->if_softc; printf("%s: %s promiscuous mode\n", device_get_nameunit(sc->dev), (ifp->if_flags & IFF_PROMISC) ? "entering" : "leaving"); } /* * rt_ioctl - ioctl handler. */ static int rt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct rt_softc *sc; struct ifreq *ifr; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; #endif /* IF_RT_PHY_SUPPORT */ int error, startall; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFFLAGS: startall = 0; RT_SOFTC_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->if_flags) & IFF_PROMISC) rt_update_promisc(ifp); } else { rt_init_locked(sc); startall = 1; } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) rt_stop_locked(sc); } sc->if_flags = ifp->if_flags; RT_SOFTC_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: #ifdef IF_RT_PHY_SUPPORT mii = device_get_softc(sc->rt_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); #else error = ifmedia_ioctl(ifp, ifr, &sc->rt_ifmedia, cmd); #endif /* IF_RT_PHY_SUPPORT */ break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * rt_periodic - Handler of PERIODIC interrupt */ static void rt_periodic(void *arg) { struct rt_softc *sc; sc = arg; RT_DPRINTF(sc, RT_DEBUG_PERIODIC, "periodic\n"); taskqueue_enqueue(sc->taskqueue, &sc->periodic_task); } /* * rt_tx_watchdog - Handler of TX Watchdog */ static void rt_tx_watchdog(void *arg) { struct rt_softc *sc; struct ifnet *ifp; sc = arg; ifp = sc->ifp; if (sc->tx_timer == 0) return; if (--sc->tx_timer == 0) { device_printf(sc->dev, "Tx watchdog timeout: resetting\n"); #ifdef notyet /* * XXX: Commented out, because reset break input. */ rt_stop_locked(sc); rt_init_locked(sc); #endif ifp->if_oerrors++; sc->tx_watchdog_timeouts++; } callout_reset(&sc->tx_watchdog_ch, hz, rt_tx_watchdog, sc); } /* * rt_cnt_ppe_af - Handler of PPE Counter Table Almost Full interrupt */ static void rt_cnt_ppe_af(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PPE Counter Table Almost Full\n"); } /* * rt_cnt_gdm_af - Handler of GDMA 1 & 2 Counter Table Almost Full interrupt */ static void rt_cnt_gdm_af(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 Counter Table Almost Full\n"); } /* * rt_pse_p2_fc - Handler of PSE port2 (GDMA 2) flow control interrupt */ static void rt_pse_p2_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port2 (GDMA 2) flow control asserted.\n"); } /* * rt_gdm_crc_drop - Handler of GDMA 1/2 discard a packet due to CRC error * interrupt */ static void rt_gdm_crc_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 discard a packet due to CRC error\n"); } /* * rt_pse_buf_drop - Handler of buffer sharing limitation interrupt */ static void rt_pse_buf_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE discards a packet due to buffer sharing limitation\n"); } /* * rt_gdm_other_drop - Handler of discard on other reason interrupt */ static void rt_gdm_other_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 discard a packet due to other reason\n"); } /* * rt_pse_p1_fc - Handler of PSE port1 (GDMA 1) flow control interrupt */ static void rt_pse_p1_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port1 (GDMA 1) flow control asserted.\n"); } /* * rt_pse_p0_fc - Handler of PSE port0 (CDMA) flow control interrupt */ static void rt_pse_p0_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port0 (CDMA) flow control asserted.\n"); } /* * rt_pse_fq_empty - Handler of PSE free Q empty threshold reached interrupt */ static void rt_pse_fq_empty(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE free Q empty threshold reached & forced drop " "condition occurred.\n"); } /* * rt_intr - main ISR */ static void rt_intr(void *arg) { struct rt_softc *sc; struct ifnet *ifp; uint32_t status; sc = arg; ifp = sc->ifp; /* acknowledge interrupts */ status = RT_READ(sc, GE_PORT_BASE + FE_INT_STATUS); RT_WRITE(sc, GE_PORT_BASE + FE_INT_STATUS, status); RT_DPRINTF(sc, RT_DEBUG_INTR, "interrupt: status=0x%08x\n", status); if (status == 0xffffffff || /* device likely went away */ status == 0) /* not for us */ return; sc->interrupts++; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; if (status & CNT_PPE_AF) rt_cnt_ppe_af(sc); if (status & CNT_GDM_AF) rt_cnt_gdm_af(sc); if (status & PSE_P2_FC) rt_pse_p2_fc(sc); if (status & GDM_CRC_DROP) rt_gdm_crc_drop(sc); if (status & PSE_BUF_DROP) rt_pse_buf_drop(sc); if (status & GDM_OTHER_DROP) rt_gdm_other_drop(sc); if (status & PSE_P1_FC) rt_pse_p1_fc(sc); if (status & PSE_P0_FC) rt_pse_p0_fc(sc); if (status & PSE_FQ_EMPTY) rt_pse_fq_empty(sc); if (status & INT_TX_COHERENT) rt_tx_coherent_intr(sc); if (status & INT_RX_COHERENT) rt_rx_coherent_intr(sc); if (status & RX_DLY_INT) rt_rx_delay_intr(sc); if (status & TX_DLY_INT) rt_tx_delay_intr(sc); if (status & INT_RX_DONE) rt_rx_intr(sc); if (status & INT_TXQ3_DONE) rt_tx_intr(sc, 3); if (status & INT_TXQ2_DONE) rt_tx_intr(sc, 2); if (status & INT_TXQ1_DONE) rt_tx_intr(sc, 1); if (status & INT_TXQ0_DONE) rt_tx_intr(sc, 0); } static void rt_tx_coherent_intr(struct rt_softc *sc) { uint32_t tmp; int i; RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx coherent interrupt\n"); sc->tx_coherent_interrupts++; /* restart DMA engine */ tmp = RT_READ(sc, PDMA_BASE + PDMA_GLO_CFG); tmp &= ~(FE_TX_WB_DDONE | FE_TX_DMA_EN); RT_WRITE(sc, PDMA_BASE + PDMA_GLO_CFG, tmp); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_reset_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { RT_WRITE(sc, PDMA_BASE + TX_BASE_PTR(i), sc->tx_ring[i].desc_phys_addr); RT_WRITE(sc, PDMA_BASE + TX_MAX_CNT(i), RT_SOFTC_TX_RING_DESC_COUNT); RT_WRITE(sc, PDMA_BASE + TX_CTX_IDX(i), 0); } rt_txrx_enable(sc); } /* * rt_rx_coherent_intr */ static void rt_rx_coherent_intr(struct rt_softc *sc) { uint32_t tmp; RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx coherent interrupt\n"); sc->rx_coherent_interrupts++; /* restart DMA engine */ tmp = RT_READ(sc, PDMA_BASE + PDMA_GLO_CFG); tmp &= ~(FE_RX_DMA_EN); RT_WRITE(sc, PDMA_BASE + PDMA_GLO_CFG, tmp); /* init Rx ring */ rt_reset_rx_ring(sc, &sc->rx_ring); RT_WRITE(sc, PDMA_BASE + RX_BASE_PTR0, sc->rx_ring.desc_phys_addr); RT_WRITE(sc, PDMA_BASE + RX_MAX_CNT0, RT_SOFTC_RX_RING_DATA_COUNT); RT_WRITE(sc, PDMA_BASE + RX_CALC_IDX0, RT_SOFTC_RX_RING_DATA_COUNT - 1); rt_txrx_enable(sc); } /* * rt_rx_intr - a packet received */ static void rt_rx_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx interrupt\n"); sc->rx_interrupts++; RT_SOFTC_LOCK(sc); if (!(sc->intr_disable_mask & INT_RX_DONE)) { rt_intr_disable(sc, INT_RX_DONE); taskqueue_enqueue(sc->taskqueue, &sc->rx_done_task); } sc->intr_pending_mask |= INT_RX_DONE; RT_SOFTC_UNLOCK(sc); } static void rt_rx_delay_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx delay interrupt\n"); sc->rx_delay_interrupts++; } static void rt_tx_delay_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx delay interrupt\n"); sc->tx_delay_interrupts++; } /* * rt_tx_intr - Transsmition of packet done */ static void rt_tx_intr(struct rt_softc *sc, int qid) { KASSERT(qid >= 0 && qid < RT_SOFTC_TX_RING_COUNT, ("%s: Tx interrupt: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx interrupt: qid=%d\n", qid); sc->tx_interrupts[qid]++; RT_SOFTC_LOCK(sc); if (!(sc->intr_disable_mask & (INT_TXQ0_DONE << qid))) { rt_intr_disable(sc, (INT_TXQ0_DONE << qid)); taskqueue_enqueue(sc->taskqueue, &sc->tx_done_task); } sc->intr_pending_mask |= (INT_TXQ0_DONE << qid); RT_SOFTC_UNLOCK(sc); } /* * rt_rx_done_task - run RX task */ static void rt_rx_done_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; int again; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_RX, "Rx done task\n"); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; sc->intr_pending_mask &= ~INT_RX_DONE; again = rt_rx_eof(sc, sc->rx_process_limit); RT_SOFTC_LOCK(sc); if ((sc->intr_pending_mask & INT_RX_DONE) || again) { RT_DPRINTF(sc, RT_DEBUG_RX, "Rx done task: scheduling again\n"); taskqueue_enqueue(sc->taskqueue, &sc->rx_done_task); } else { rt_intr_enable(sc, INT_RX_DONE); } RT_SOFTC_UNLOCK(sc); } /* * rt_tx_done_task - check for pending TX task in all queues */ static void rt_tx_done_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; uint32_t intr_mask; int i; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_TX, "Tx done task\n"); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; for (i = RT_SOFTC_TX_RING_COUNT - 1; i >= 0; i--) { if (sc->intr_pending_mask & (INT_TXQ0_DONE << i)) { sc->intr_pending_mask &= ~(INT_TXQ0_DONE << i); rt_tx_eof(sc, &sc->tx_ring[i]); } } sc->tx_timer = 0; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; intr_mask = ( INT_TXQ3_DONE | INT_TXQ2_DONE | INT_TXQ1_DONE | INT_TXQ0_DONE); RT_SOFTC_LOCK(sc); rt_intr_enable(sc, ~sc->intr_pending_mask & (sc->intr_disable_mask & intr_mask)); if (sc->intr_pending_mask & intr_mask) { RT_DPRINTF(sc, RT_DEBUG_TX, "Tx done task: scheduling again\n"); taskqueue_enqueue(sc->taskqueue, &sc->tx_done_task); } RT_SOFTC_UNLOCK(sc); if (!IFQ_IS_EMPTY(&ifp->if_snd)) rt_start(ifp); } /* * rt_periodic_task - run periodic task */ static void rt_periodic_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_PERIODIC, "periodic task: round=%lu\n", sc->periodic_round); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; RT_SOFTC_LOCK(sc); sc->periodic_round++; rt_update_stats(sc); if ((sc->periodic_round % 10) == 0) { rt_update_raw_counters(sc); rt_watchdog(sc); } RT_SOFTC_UNLOCK(sc); callout_reset(&sc->periodic_ch, hz / 10, rt_periodic, sc); } /* * rt_rx_eof - check for frames that done by DMA engine and pass it into * network subsystem. */ static int rt_rx_eof(struct rt_softc *sc, int limit) { struct ifnet *ifp; struct rt_softc_rx_ring *ring; struct rt_rxdesc *desc; struct rt_softc_rx_data *data; struct mbuf *m, *mnew; bus_dma_segment_t segs[1]; bus_dmamap_t dma_map; uint32_t index, desc_flags; int error, nsegs, len, nframes; ifp = sc->ifp; ring = &sc->rx_ring; nframes = 0; while (limit != 0) { index = RT_READ(sc, PDMA_BASE + RX_DRX_IDX0); if (ring->cur == index) break; desc = &ring->desc[ring->cur]; data = &ring->data[ring->cur]; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef IF_RT_DEBUG if ( sc->debug & RT_DEBUG_RX ) { printf("\nRX Descriptor[%#08x] dump:\n", (u_int)desc); hexdump(desc, 16, 0, 0); printf("-----------------------------------\n"); } #endif /* XXX Sometime device don`t set DDONE bit */ #ifdef DDONE_FIXED if (!(desc->sdl0 & htole16(RT_RXDESC_SDL0_DDONE))) { RT_DPRINTF(sc, RT_DEBUG_RX, "DDONE=0, try next\n"); break; } #endif len = le16toh(desc->sdl0) & 0x3fff; RT_DPRINTF(sc, RT_DEBUG_RX, "new frame len=%d\n", len); nframes++; mnew = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (mnew == NULL) { sc->rx_mbuf_alloc_errors++; ifp->if_ierrors++; goto skip; } mnew->m_len = mnew->m_pkthdr.len = MJUMPAGESIZE; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, ring->spare_dma_map, mnew, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { RT_DPRINTF(sc, RT_DEBUG_RX, "could not load Rx mbuf DMA map: " "error=%d, nsegs=%d\n", error, nsegs); m_freem(mnew); sc->rx_mbuf_dmamap_errors++; ifp->if_ierrors++; goto skip; } KASSERT(nsegs == 1, ("%s: too many DMA segments", device_get_nameunit(sc->dev))); bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); dma_map = data->dma_map; data->dma_map = ring->spare_dma_map; ring->spare_dma_map = dma_map; bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_PREREAD); m = data->m; desc_flags = desc->src; data->m = mnew; /* Add 2 for proper align of RX IP header */ desc->sdp0 = htole32(segs[0].ds_addr+2); desc->sdl0 = htole32(segs[0].ds_len-2); desc->src = 0; desc->ai = 0; desc->foe = 0; RT_DPRINTF(sc, RT_DEBUG_RX, "Rx frame: rxdesc flags=0x%08x\n", desc_flags); m->m_pkthdr.rcvif = ifp; /* Add 2 to fix data align, after sdp0 = addr + 2 */ m->m_data += 2; m->m_pkthdr.len = m->m_len = len; /* check for crc errors */ if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { /*check for valid checksum*/ if (desc_flags & (RXDSXR_SRC_IP_CSUM_FAIL| RXDSXR_SRC_L4_CSUM_FAIL)) { RT_DPRINTF(sc, RT_DEBUG_RX, "rxdesc: crc error\n"); ifp->if_ierrors++; if (!(ifp->if_flags & IFF_PROMISC)) { m_freem(m); goto skip; } } if ((desc_flags & RXDSXR_SRC_IP_CSUM_FAIL) != 0) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; m->m_pkthdr.csum_data = 0xffff; } m->m_flags &= ~M_HASFCS; } (*ifp->if_input)(ifp, m); skip: desc->sdl0 &= ~htole16(RT_RXDESC_SDL0_DDONE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->cur = (ring->cur + 1) % RT_SOFTC_RX_RING_DATA_COUNT; limit--; } if (ring->cur == 0) RT_WRITE(sc, PDMA_BASE + RX_CALC_IDX0, RT_SOFTC_RX_RING_DATA_COUNT - 1); else RT_WRITE(sc, PDMA_BASE + RX_CALC_IDX0, ring->cur - 1); RT_DPRINTF(sc, RT_DEBUG_RX, "Rx eof: nframes=%d\n", nframes); sc->rx_packets += nframes; return (limit == 0); } /* * rt_tx_eof - check for successful transmitted frames and mark their * descriptor as free. */ static void rt_tx_eof(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct ifnet *ifp; struct rt_txdesc *desc; struct rt_softc_tx_data *data; uint32_t index; int ndescs, nframes; ifp = sc->ifp; ndescs = 0; nframes = 0; for (;;) { index = RT_READ(sc, PDMA_BASE + TX_DTX_IDX(ring->qid)); if (ring->desc_next == index) break; ndescs++; desc = &ring->desc[ring->desc_next]; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (desc->sdl0 & htole16(RT_TXDESC_SDL0_LASTSEG) || desc->sdl1 & htole16(RT_TXDESC_SDL1_LASTSEG)) { nframes++; data = &ring->data[ring->data_next]; bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); data->m = NULL; ifp->if_opackets++; RT_SOFTC_TX_RING_LOCK(ring); ring->data_queued--; ring->data_next = (ring->data_next + 1) % RT_SOFTC_TX_RING_DATA_COUNT; RT_SOFTC_TX_RING_UNLOCK(ring); } desc->sdl0 &= ~htole16(RT_TXDESC_SDL0_DDONE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); RT_SOFTC_TX_RING_LOCK(ring); ring->desc_queued--; ring->desc_next = (ring->desc_next + 1) % RT_SOFTC_TX_RING_DESC_COUNT; RT_SOFTC_TX_RING_UNLOCK(ring); } RT_DPRINTF(sc, RT_DEBUG_TX, "Tx eof: qid=%d, ndescs=%d, nframes=%d\n", ring->qid, ndescs, nframes); } /* * rt_update_stats - query statistics counters and update related variables. */ static void rt_update_stats(struct rt_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_STATS, "update statistic: \n"); /* XXX do update stats here */ } /* * rt_watchdog - reinit device on watchdog event. */ static void rt_watchdog(struct rt_softc *sc) { uint32_t tmp; #ifdef notyet int ntries; #endif tmp = RT_READ(sc, PSE_BASE + CDMA_OQ_STA); RT_DPRINTF(sc, RT_DEBUG_WATCHDOG, "watchdog: PSE_IQ_STA=0x%08x\n", tmp); /* XXX: do not reset */ #ifdef notyet if (((tmp >> P0_IQ_PCNT_SHIFT) & 0xff) != 0) { sc->tx_queue_not_empty[0]++; for (ntries = 0; ntries < 10; ntries++) { tmp = RT_READ(sc, PSE_BASE + PSE_IQ_STA); if (((tmp >> P0_IQ_PCNT_SHIFT) & 0xff) == 0) break; DELAY(1); } } if (((tmp >> P1_IQ_PCNT_SHIFT) & 0xff) != 0) { sc->tx_queue_not_empty[1]++; for (ntries = 0; ntries < 10; ntries++) { tmp = RT_READ(sc, PSE_BASE + PSE_IQ_STA); if (((tmp >> P1_IQ_PCNT_SHIFT) & 0xff) == 0) break; DELAY(1); } } #endif } /* * rt_update_raw_counters - update counters. */ static void rt_update_raw_counters(struct rt_softc *sc) { sc->tx_bytes += RT_READ(sc, CNTR_BASE + GDMA_TX_GBCNT0); sc->tx_packets += RT_READ(sc, CNTR_BASE + GDMA_TX_GPCNT0); sc->tx_skip += RT_READ(sc, CNTR_BASE + GDMA_TX_SKIPCNT0); sc->tx_collision+= RT_READ(sc, CNTR_BASE + GDMA_TX_COLCNT0); sc->rx_bytes += RT_READ(sc, CNTR_BASE + GDMA_RX_GBCNT0); sc->rx_packets += RT_READ(sc, CNTR_BASE + GDMA_RX_GPCNT0); sc->rx_crc_err += RT_READ(sc, CNTR_BASE + GDMA_RX_CSUM_ERCNT0); sc->rx_short_err+= RT_READ(sc, CNTR_BASE + GDMA_RX_SHORT_ERCNT0); sc->rx_long_err += RT_READ(sc, CNTR_BASE + GDMA_RX_LONG_ERCNT0); sc->rx_phy_err += RT_READ(sc, CNTR_BASE + GDMA_RX_FERCNT0); sc->rx_fifo_overflows+= RT_READ(sc, CNTR_BASE + GDMA_RX_OERCNT0); } static void rt_intr_enable(struct rt_softc *sc, uint32_t intr_mask) { uint32_t tmp; sc->intr_disable_mask &= ~intr_mask; tmp = sc->intr_enable_mask & ~sc->intr_disable_mask; RT_WRITE(sc, GE_PORT_BASE + FE_INT_ENABLE, tmp); } static void rt_intr_disable(struct rt_softc *sc, uint32_t intr_mask) { uint32_t tmp; sc->intr_disable_mask |= intr_mask; tmp = sc->intr_enable_mask & ~sc->intr_disable_mask; RT_WRITE(sc, GE_PORT_BASE + FE_INT_ENABLE, tmp); } /* * rt_txrx_enable - enable TX/RX DMA */ static int rt_txrx_enable(struct rt_softc *sc) { struct ifnet *ifp; uint32_t tmp; int ntries; ifp = sc->ifp; /* enable Tx/Rx DMA engine */ for (ntries = 0; ntries < 200; ntries++) { tmp = RT_READ(sc, PDMA_BASE + PDMA_GLO_CFG); if (!(tmp & (FE_TX_DMA_BUSY | FE_RX_DMA_BUSY))) break; DELAY(1000); } if (ntries == 200) { device_printf(sc->dev, "timeout waiting for DMA engine\n"); return (-1); } DELAY(50); tmp |= FE_TX_WB_DDONE | FE_RX_DMA_EN | FE_TX_DMA_EN; RT_WRITE(sc, PDMA_BASE + PDMA_GLO_CFG, tmp); /* XXX set Rx filter */ return (0); } /* * rt_alloc_rx_ring - allocate RX DMA ring buffer */ static int rt_alloc_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_rxdesc *desc; struct rt_softc_rx_data *data; bus_dma_segment_t segs[1]; int i, nsegs, error; error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), 1, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), 0, NULL, NULL, &ring->desc_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Rx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dma_tag, (void **) &ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Rx desc DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dma_tag, ring->desc_dma_map, ring->desc, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), rt_dma_map_addr, &ring->desc_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Rx desc DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Rx data DMA tag\n"); goto fail; } for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { desc = &ring->desc[i]; data = &ring->data[i]; error = bus_dmamap_create(ring->data_dma_tag, 0, &data->dma_map); if (error != 0) { device_printf(sc->dev, "could not create Rx data DMA " "map\n"); goto fail; } data->m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (data->m == NULL) { device_printf(sc->dev, "could not allocate Rx mbuf\n"); error = ENOMEM; goto fail; } data->m->m_len = data->m->m_pkthdr.len = MJUMPAGESIZE; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, data->m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->dev, "could not load Rx mbuf DMA map\n"); goto fail; } KASSERT(nsegs == 1, ("%s: too many DMA segments", device_get_nameunit(sc->dev))); /* Add 2 for proper align of RX IP header */ desc->sdp0 = htole32(segs[0].ds_addr+2); desc->sdl0 = htole32(segs[0].ds_len-2); } error = bus_dmamap_create(ring->data_dma_tag, 0, &ring->spare_dma_map); if (error != 0) { device_printf(sc->dev, "could not create Rx spare DMA map\n"); goto fail; } bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); fail: rt_free_rx_ring(sc, ring); return (error); } /* * rt_reset_rx_ring - reset RX ring buffer */ static void rt_reset_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_rxdesc *desc; int i; for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { desc = &ring->desc[i]; desc->sdl0 &= ~htole16(RT_RXDESC_SDL0_DDONE); } bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->cur = 0; } /* * rt_free_rx_ring - free memory used by RX ring buffer */ static void rt_free_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_softc_rx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dma_tag, ring->desc_dma_map); bus_dmamem_free(ring->desc_dma_tag, ring->desc, ring->desc_dma_map); } if (ring->desc_dma_tag != NULL) bus_dma_tag_destroy(ring->desc_dma_tag); for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); } if (data->dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, data->dma_map); } if (ring->spare_dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, ring->spare_dma_map); if (ring->data_dma_tag != NULL) bus_dma_tag_destroy(ring->data_dma_tag); } /* * rt_alloc_tx_ring - allocate TX ring buffer */ static int rt_alloc_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring, int qid) { struct rt_softc_tx_data *data; int error, i; mtx_init(&ring->lock, device_get_nameunit(sc->dev), NULL, MTX_DEF); error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc), 1, RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc), 0, NULL, NULL, &ring->desc_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dma_tag, (void **) &ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Tx desc DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dma_tag, ring->desc_dma_map, ring->desc, (RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc)), rt_dma_map_addr, &ring->desc_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Tx desc DMA map\n"); goto fail; } ring->desc_queued = 0; ring->desc_cur = 0; ring->desc_next = 0; error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, 1, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, 0, NULL, NULL, &ring->seg0_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx seg0 DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->seg0_dma_tag, (void **) &ring->seg0, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->seg0_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Tx seg0 DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->seg0_dma_tag, ring->seg0_dma_map, ring->seg0, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, rt_dma_map_addr, &ring->seg0_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Tx seg0 DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, RT_SOFTC_MAX_SCATTER, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx data DMA tag\n"); goto fail; } for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; error = bus_dmamap_create(ring->data_dma_tag, 0, &data->dma_map); if (error != 0) { device_printf(sc->dev, "could not create Tx data DMA " "map\n"); goto fail; } } ring->data_queued = 0; ring->data_cur = 0; ring->data_next = 0; ring->qid = qid; return (0); fail: rt_free_tx_ring(sc, ring); return (error); } /* * rt_reset_tx_ring - reset TX ring buffer to empty state */ static void rt_reset_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct rt_softc_tx_data *data; struct rt_txdesc *desc; int i; for (i = 0; i < RT_SOFTC_TX_RING_DESC_COUNT; i++) { desc = &ring->desc[i]; desc->sdl0 = 0; desc->sdl1 = 0; } ring->desc_queued = 0; ring->desc_cur = 0; ring->desc_next = 0; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_PREWRITE); for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); data->m = NULL; } } ring->data_queued = 0; ring->data_cur = 0; ring->data_next = 0; } /* * rt_free_tx_ring - free RX ring buffer */ static void rt_free_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct rt_softc_tx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dma_tag, ring->desc_dma_map); bus_dmamem_free(ring->desc_dma_tag, ring->desc, ring->desc_dma_map); } if (ring->desc_dma_tag != NULL) bus_dma_tag_destroy(ring->desc_dma_tag); if (ring->seg0 != NULL) { bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->seg0_dma_tag, ring->seg0_dma_map); bus_dmamem_free(ring->seg0_dma_tag, ring->seg0, ring->seg0_dma_map); } if (ring->seg0_dma_tag != NULL) bus_dma_tag_destroy(ring->seg0_dma_tag); for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); } if (data->dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, data->dma_map); } if (ring->data_dma_tag != NULL) bus_dma_tag_destroy(ring->data_dma_tag); mtx_destroy(&ring->lock); } /* * rt_dma_map_addr - get address of busdma segment */ static void rt_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error != 0) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); *(bus_addr_t *) arg = segs[0].ds_addr; } /* * rt_sysctl_attach - attach sysctl nodes for NIC counters. */ static void rt_sysctl_attach(struct rt_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid *stats; ctx = device_get_sysctl_ctx(sc->dev); tree = device_get_sysctl_tree(sc->dev); /* statistic counters */ stats = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "stats", CTLFLAG_RD, 0, "statistic"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "interrupts", CTLFLAG_RD, &sc->interrupts, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "interrupts", CTLFLAG_RD, &sc->interrupts, "all interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_coherent_interrupts", CTLFLAG_RD, &sc->tx_coherent_interrupts, - 0, "Tx coherent interrupts"); + "Tx coherent interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_coherent_interrupts", CTLFLAG_RD, &sc->rx_coherent_interrupts, - 0, "Rx coherent interrupts"); + "Rx coherent interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_interrupts", CTLFLAG_RD, &sc->rx_interrupts, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_interrupts", CTLFLAG_RD, &sc->rx_interrupts, "Rx interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_delay_interrupts", CTLFLAG_RD, &sc->rx_delay_interrupts, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_delay_interrupts", CTLFLAG_RD, &sc->rx_delay_interrupts, "Rx delay interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "TXQ3_interrupts", CTLFLAG_RD, &sc->tx_interrupts[3], 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "TXQ3_interrupts", CTLFLAG_RD, &sc->tx_interrupts[3], "Tx AC3 interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "TXQ2_interrupts", CTLFLAG_RD, &sc->tx_interrupts[2], 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "TXQ2_interrupts", CTLFLAG_RD, &sc->tx_interrupts[2], "Tx AC2 interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "TXQ1_interrupts", CTLFLAG_RD, &sc->tx_interrupts[1], 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "TXQ1_interrupts", CTLFLAG_RD, &sc->tx_interrupts[1], "Tx AC1 interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "TXQ0_interrupts", CTLFLAG_RD, &sc->tx_interrupts[0], 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "TXQ0_interrupts", CTLFLAG_RD, &sc->tx_interrupts[0], "Tx AC0 interrupts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_delay_interrupts", CTLFLAG_RD, &sc->tx_delay_interrupts, - 0, "Tx delay interrupts"); + "Tx delay interrupts"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_desc_queued", CTLFLAG_RD, &sc->tx_ring[3].desc_queued, 0, "Tx AC3 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_data_queued", CTLFLAG_RD, &sc->tx_ring[3].data_queued, 0, "Tx AC3 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_desc_queued", CTLFLAG_RD, &sc->tx_ring[2].desc_queued, 0, "Tx AC2 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_data_queued", CTLFLAG_RD, &sc->tx_ring[2].data_queued, 0, "Tx AC2 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_desc_queued", CTLFLAG_RD, &sc->tx_ring[1].desc_queued, 0, "Tx AC1 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_data_queued", CTLFLAG_RD, &sc->tx_ring[1].data_queued, 0, "Tx AC1 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_desc_queued", CTLFLAG_RD, &sc->tx_ring[0].desc_queued, 0, "Tx AC0 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_data_queued", CTLFLAG_RD, &sc->tx_ring[0].data_queued, 0, "Tx AC0 data queued"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[3], - 0, "Tx AC3 data queue full"); + "Tx AC3 data queue full"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[2], - 0, "Tx AC2 data queue full"); + "Tx AC2 data queue full"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[1], - 0, "Tx AC1 data queue full"); + "Tx AC1 data queue full"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[0], - 0, "Tx AC0 data queue full"); + "Tx AC0 data queue full"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_watchdog_timeouts", CTLFLAG_RD, &sc->tx_watchdog_timeouts, - 0, "Tx watchdog timeouts"); + "Tx watchdog timeouts"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "tx_defrag_packets", CTLFLAG_RD, &sc->tx_defrag_packets, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "tx_defrag_packets", CTLFLAG_RD, &sc->tx_defrag_packets, "Tx defragmented packets"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "no_tx_desc_avail", CTLFLAG_RD, &sc->no_tx_desc_avail, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "no_tx_desc_avail", CTLFLAG_RD, &sc->no_tx_desc_avail, "no Tx descriptors available"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_mbuf_alloc_errors", CTLFLAG_RD, &sc->rx_mbuf_alloc_errors, - 0, "Rx mbuf allocation errors"); + "Rx mbuf allocation errors"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_mbuf_dmamap_errors", CTLFLAG_RD, &sc->rx_mbuf_dmamap_errors, - 0, "Rx mbuf DMA mapping errors"); + "Rx mbuf DMA mapping errors"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_queue_0_not_empty", CTLFLAG_RD, &sc->tx_queue_not_empty[0], - 0, "Tx queue 0 not empty"); + "Tx queue 0 not empty"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_queue_1_not_empty", CTLFLAG_RD, &sc->tx_queue_not_empty[1], - 0, "Tx queue 1 not empty"); + "Tx queue 1 not empty"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_packets", CTLFLAG_RD, &sc->rx_packets, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_packets", CTLFLAG_RD, &sc->rx_packets, "Rx packets"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_crc_errors", CTLFLAG_RD, &sc->rx_crc_err, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_crc_errors", CTLFLAG_RD, &sc->rx_crc_err, "Rx CRC errors"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_phy_errors", CTLFLAG_RD, &sc->rx_phy_err, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_phy_errors", CTLFLAG_RD, &sc->rx_phy_err, "Rx PHY errors"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_dup_packets", CTLFLAG_RD, &sc->rx_dup_packets, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_dup_packets", CTLFLAG_RD, &sc->rx_dup_packets, "Rx duplicate packets"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_fifo_overflows", CTLFLAG_RD, &sc->rx_fifo_overflows, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_fifo_overflows", CTLFLAG_RD, &sc->rx_fifo_overflows, "Rx FIFO overflows"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_bytes", CTLFLAG_RD, &sc->rx_bytes, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_bytes", CTLFLAG_RD, &sc->rx_bytes, "Rx bytes"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_long_err", CTLFLAG_RD, &sc->rx_long_err, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_long_err", CTLFLAG_RD, &sc->rx_long_err, "Rx too long frame errors"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "rx_short_err", CTLFLAG_RD, &sc->rx_short_err, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "rx_short_err", CTLFLAG_RD, &sc->rx_short_err, "Rx too short frame errors"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "tx_bytes", CTLFLAG_RD, &sc->tx_bytes, 0, + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "tx_bytes", CTLFLAG_RD, &sc->tx_bytes, "Tx bytes"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "tx_packets", CTLFLAG_RD, &sc->tx_packets, 0, + + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "tx_packets", CTLFLAG_RD, &sc->tx_packets, "Tx packets"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "tx_skip", CTLFLAG_RD, &sc->tx_skip, 0, + + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "tx_skip", CTLFLAG_RD, &sc->tx_skip, "Tx skip count for GDMA ports"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, - "tx_collision", CTLFLAG_RD, &sc->tx_collision, 0, + + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, + "tx_collision", CTLFLAG_RD, &sc->tx_collision, "Tx collision count for GDMA ports"); } #ifdef IF_RT_PHY_SUPPORT static int rt_miibus_readreg(device_t dev, int phy, int reg) { struct rt_softc *sc = device_get_softc(dev); /* * PSEUDO_PHYAD is a special value for indicate switch attached. * No one PHY use PSEUDO_PHYAD (0x1e) address. */ if (phy == 31) { /* Fake PHY ID for bfeswitch attach */ switch (reg) { case MII_BMSR: return (BMSR_EXTSTAT|BMSR_MEDIAMASK); case MII_PHYIDR1: return (0x40); /* As result of faking */ case MII_PHYIDR2: /* PHY will detect as */ return (0x6250); /* bfeswitch */ } } /* Wait prev command done if any */ while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); RT_WRITE(sc, MDIO_ACCESS, MDIO_CMD_ONGO || ((phy << MDIO_PHY_ADDR_SHIFT) & MDIO_PHY_ADDR_MASK) || ((reg << MDIO_PHYREG_ADDR_SHIFT) & MDIO_PHYREG_ADDR_MASK)); while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); return (RT_READ(sc, MDIO_ACCESS) & MDIO_PHY_DATA_MASK); } static int rt_miibus_writereg(device_t dev, int phy, int reg, int val) { struct rt_softc *sc = device_get_softc(dev); /* Wait prev command done if any */ while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); RT_WRITE(sc, MDIO_ACCESS, MDIO_CMD_ONGO || MDIO_CMD_WR || ((phy << MDIO_PHY_ADDR_SHIFT) & MDIO_PHY_ADDR_MASK) || ((reg << MDIO_PHYREG_ADDR_SHIFT) & MDIO_PHYREG_ADDR_MASK) || (val & MDIO_PHY_DATA_MASK)); while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); return (0); } void rt_miibus_statchg(device_t dev) { struct rt_softc *sc = device_get_softc(dev); struct mii_data *mii; mii = device_get_softc(sc->rt_miibus); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: /* XXX check link here */ sc->flags |= 1; break; default: break; } } } #endif /* IF_RT_PHY_SUPPORT */ static device_method_t rt_dev_methods[] = { DEVMETHOD(device_probe, rt_probe), DEVMETHOD(device_attach, rt_attach), DEVMETHOD(device_detach, rt_detach), DEVMETHOD(device_shutdown, rt_shutdown), DEVMETHOD(device_suspend, rt_suspend), DEVMETHOD(device_resume, rt_resume), #ifdef IF_RT_PHY_SUPPORT /* MII interface */ DEVMETHOD(miibus_readreg, rt_miibus_readreg), DEVMETHOD(miibus_writereg, rt_miibus_writereg), DEVMETHOD(miibus_statchg, rt_miibus_statchg), #endif DEVMETHOD_END }; static driver_t rt_driver = { "rt", rt_dev_methods, sizeof(struct rt_softc) }; static devclass_t rt_dev_class; DRIVER_MODULE(rt, nexus, rt_driver, rt_dev_class, 0, 0); MODULE_DEPEND(rt, ether, 1, 1, 1); MODULE_DEPEND(rt, miibus, 1, 1, 1); Index: stable/9/sys/dev/sound/pci/hda/hdaa.c =================================================================== --- stable/9/sys/dev/sound/pci/hda/hdaa.c (revision 273911) +++ stable/9/sys/dev/sound/pci/hda/hdaa.c (revision 273912) @@ -1,6799 +1,6799 @@ /*- * Copyright (c) 2006 Stephane E. Potvin * Copyright (c) 2006 Ariff Abdullah * Copyright (c) 2008-2012 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Intel High Definition Audio (Audio function) driver for FreeBSD. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_snd.h" #endif #include #include #include #include #include #include #include "mixer_if.h" SND_DECLARE_FILE("$FreeBSD$"); #define hdaa_lock(devinfo) snd_mtxlock((devinfo)->lock) #define hdaa_unlock(devinfo) snd_mtxunlock((devinfo)->lock) #define hdaa_lockassert(devinfo) snd_mtxassert((devinfo)->lock) #define hdaa_lockowned(devinfo) mtx_owned((devinfo)->lock) static const struct { const char *key; uint32_t value; } hdaa_quirks_tab[] = { { "softpcmvol", HDAA_QUIRK_SOFTPCMVOL }, { "fixedrate", HDAA_QUIRK_FIXEDRATE }, { "forcestereo", HDAA_QUIRK_FORCESTEREO }, { "eapdinv", HDAA_QUIRK_EAPDINV }, { "senseinv", HDAA_QUIRK_SENSEINV }, { "ivref50", HDAA_QUIRK_IVREF50 }, { "ivref80", HDAA_QUIRK_IVREF80 }, { "ivref100", HDAA_QUIRK_IVREF100 }, { "ovref50", HDAA_QUIRK_OVREF50 }, { "ovref80", HDAA_QUIRK_OVREF80 }, { "ovref100", HDAA_QUIRK_OVREF100 }, { "ivref", HDAA_QUIRK_IVREF }, { "ovref", HDAA_QUIRK_OVREF }, { "vref", HDAA_QUIRK_VREF }, }; #define HDA_PARSE_MAXDEPTH 10 MALLOC_DEFINE(M_HDAA, "hdaa", "HDA Audio"); static const char *HDA_COLORS[16] = {"Unknown", "Black", "Grey", "Blue", "Green", "Red", "Orange", "Yellow", "Purple", "Pink", "Res.A", "Res.B", "Res.C", "Res.D", "White", "Other"}; static const char *HDA_DEVS[16] = {"Line-out", "Speaker", "Headphones", "CD", "SPDIF-out", "Digital-out", "Modem-line", "Modem-handset", "Line-in", "AUX", "Mic", "Telephony", "SPDIF-in", "Digital-in", "Res.E", "Other"}; static const char *HDA_CONNS[4] = {"Jack", "None", "Fixed", "Both"}; static const char *HDA_CONNECTORS[16] = { "Unknown", "1/8", "1/4", "ATAPI", "RCA", "Optical", "Digital", "Analog", "DIN", "XLR", "RJ-11", "Combo", "0xc", "0xd", "0xe", "Other" }; static const char *HDA_LOCS[64] = { "0x00", "Rear", "Front", "Left", "Right", "Top", "Bottom", "Rear-panel", "Drive-bay", "0x09", "0x0a", "0x0b", "0x0c", "0x0d", "0x0e", "0x0f", "Internal", "0x11", "0x12", "0x13", "0x14", "0x15", "0x16", "Riser", "0x18", "Onboard", "0x1a", "0x1b", "0x1c", "0x1d", "0x1e", "0x1f", "External", "Ext-Rear", "Ext-Front", "Ext-Left", "Ext-Right", "Ext-Top", "Ext-Bottom", "0x07", "0x28", "0x29", "0x2a", "0x2b", "0x2c", "0x2d", "0x2e", "0x2f", "Other", "0x31", "0x32", "0x33", "0x34", "0x35", "Other-Bott", "Lid-In", "Lid-Out", "0x39", "0x3a", "0x3b", "0x3c", "0x3d", "0x3e", "0x3f" }; static const char *HDA_GPIO_ACTIONS[8] = { "keep", "set", "clear", "disable", "input", "0x05", "0x06", "0x07"}; static const char *HDA_HDMI_CODING_TYPES[18] = { "undefined", "LPCM", "AC-3", "MPEG1", "MP3", "MPEG2", "AAC-LC", "DTS", "ATRAC", "DSD", "E-AC-3", "DTS-HD", "MLP", "DST", "WMAPro", "HE-AAC", "HE-AACv2", "MPEG-Surround" }; /* Default */ static uint32_t hdaa_fmt[] = { SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static struct pcmchan_caps hdaa_caps = {48000, 48000, hdaa_fmt, 0}; static const struct { uint32_t rate; int valid; uint16_t base; uint16_t mul; uint16_t div; } hda_rate_tab[] = { { 8000, 1, 0x0000, 0x0000, 0x0500 }, /* (48000 * 1) / 6 */ { 9600, 0, 0x0000, 0x0000, 0x0400 }, /* (48000 * 1) / 5 */ { 12000, 0, 0x0000, 0x0000, 0x0300 }, /* (48000 * 1) / 4 */ { 16000, 1, 0x0000, 0x0000, 0x0200 }, /* (48000 * 1) / 3 */ { 18000, 0, 0x0000, 0x1000, 0x0700 }, /* (48000 * 3) / 8 */ { 19200, 0, 0x0000, 0x0800, 0x0400 }, /* (48000 * 2) / 5 */ { 24000, 0, 0x0000, 0x0000, 0x0100 }, /* (48000 * 1) / 2 */ { 28800, 0, 0x0000, 0x1000, 0x0400 }, /* (48000 * 3) / 5 */ { 32000, 1, 0x0000, 0x0800, 0x0200 }, /* (48000 * 2) / 3 */ { 36000, 0, 0x0000, 0x1000, 0x0300 }, /* (48000 * 3) / 4 */ { 38400, 0, 0x0000, 0x1800, 0x0400 }, /* (48000 * 4) / 5 */ { 48000, 1, 0x0000, 0x0000, 0x0000 }, /* (48000 * 1) / 1 */ { 64000, 0, 0x0000, 0x1800, 0x0200 }, /* (48000 * 4) / 3 */ { 72000, 0, 0x0000, 0x1000, 0x0100 }, /* (48000 * 3) / 2 */ { 96000, 1, 0x0000, 0x0800, 0x0000 }, /* (48000 * 2) / 1 */ { 144000, 0, 0x0000, 0x1000, 0x0000 }, /* (48000 * 3) / 1 */ { 192000, 1, 0x0000, 0x1800, 0x0000 }, /* (48000 * 4) / 1 */ { 8820, 0, 0x4000, 0x0000, 0x0400 }, /* (44100 * 1) / 5 */ { 11025, 1, 0x4000, 0x0000, 0x0300 }, /* (44100 * 1) / 4 */ { 12600, 0, 0x4000, 0x0800, 0x0600 }, /* (44100 * 2) / 7 */ { 14700, 0, 0x4000, 0x0000, 0x0200 }, /* (44100 * 1) / 3 */ { 17640, 0, 0x4000, 0x0800, 0x0400 }, /* (44100 * 2) / 5 */ { 18900, 0, 0x4000, 0x1000, 0x0600 }, /* (44100 * 3) / 7 */ { 22050, 1, 0x4000, 0x0000, 0x0100 }, /* (44100 * 1) / 2 */ { 25200, 0, 0x4000, 0x1800, 0x0600 }, /* (44100 * 4) / 7 */ { 26460, 0, 0x4000, 0x1000, 0x0400 }, /* (44100 * 3) / 5 */ { 29400, 0, 0x4000, 0x0800, 0x0200 }, /* (44100 * 2) / 3 */ { 33075, 0, 0x4000, 0x1000, 0x0300 }, /* (44100 * 3) / 4 */ { 35280, 0, 0x4000, 0x1800, 0x0400 }, /* (44100 * 4) / 5 */ { 44100, 1, 0x4000, 0x0000, 0x0000 }, /* (44100 * 1) / 1 */ { 58800, 0, 0x4000, 0x1800, 0x0200 }, /* (44100 * 4) / 3 */ { 66150, 0, 0x4000, 0x1000, 0x0100 }, /* (44100 * 3) / 2 */ { 88200, 1, 0x4000, 0x0800, 0x0000 }, /* (44100 * 2) / 1 */ { 132300, 0, 0x4000, 0x1000, 0x0000 }, /* (44100 * 3) / 1 */ { 176400, 1, 0x4000, 0x1800, 0x0000 }, /* (44100 * 4) / 1 */ }; #define HDA_RATE_TAB_LEN (sizeof(hda_rate_tab) / sizeof(hda_rate_tab[0])) const static char *ossnames[] = SOUND_DEVICE_NAMES; /**************************************************************************** * Function prototypes ****************************************************************************/ static int hdaa_pcmchannel_setup(struct hdaa_chan *); static void hdaa_widget_connection_select(struct hdaa_widget *, uint8_t); static void hdaa_audio_ctl_amp_set(struct hdaa_audio_ctl *, uint32_t, int, int); static struct hdaa_audio_ctl *hdaa_audio_ctl_amp_get(struct hdaa_devinfo *, nid_t, int, int, int); static void hdaa_audio_ctl_amp_set_internal(struct hdaa_devinfo *, nid_t, int, int, int, int, int, int); static void hdaa_dump_pin_config(struct hdaa_widget *w, uint32_t conf); static char * hdaa_audio_ctl_ossmixer_mask2allname(uint32_t mask, char *buf, size_t len) { int i, first = 1; bzero(buf, len); for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { if (mask & (1 << i)) { if (first == 0) strlcat(buf, ", ", len); strlcat(buf, ossnames[i], len); first = 0; } } return (buf); } static struct hdaa_audio_ctl * hdaa_audio_ctl_each(struct hdaa_devinfo *devinfo, int *index) { if (devinfo == NULL || index == NULL || devinfo->ctl == NULL || devinfo->ctlcnt < 1 || *index < 0 || *index >= devinfo->ctlcnt) return (NULL); return (&devinfo->ctl[(*index)++]); } static struct hdaa_audio_ctl * hdaa_audio_ctl_amp_get(struct hdaa_devinfo *devinfo, nid_t nid, int dir, int index, int cnt) { struct hdaa_audio_ctl *ctl; int i, found = 0; if (devinfo == NULL || devinfo->ctl == NULL) return (NULL); i = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { if (ctl->enable == 0) continue; if (ctl->widget->nid != nid) continue; if (dir && ctl->ndir != dir) continue; if (index >= 0 && ctl->ndir == HDAA_CTL_IN && ctl->dir == ctl->ndir && ctl->index != index) continue; found++; if (found == cnt || cnt <= 0) return (ctl); } return (NULL); } /* * Headphones redirection change handler. */ static void hdaa_hpredir_handler(struct hdaa_widget *w) { struct hdaa_devinfo *devinfo = w->devinfo; struct hdaa_audio_as *as = &devinfo->as[w->bindas]; struct hdaa_widget *w1; struct hdaa_audio_ctl *ctl; uint32_t val; int j, connected = w->wclass.pin.connected; HDA_BOOTVERBOSE( device_printf((as->pdevinfo && as->pdevinfo->dev) ? as->pdevinfo->dev : devinfo->dev, "Redirect output to: %s\n", connected ? "headphones": "main"); ); /* (Un)Mute headphone pin. */ ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, -1, 1); if (ctl != NULL && ctl->mute) { /* If pin has muter - use it. */ val = connected ? 0 : 1; if (val != ctl->forcemute) { ctl->forcemute = val; hdaa_audio_ctl_amp_set(ctl, HDAA_AMP_MUTE_DEFAULT, HDAA_AMP_VOL_DEFAULT, HDAA_AMP_VOL_DEFAULT); } } else { /* If there is no muter - disable pin output. */ if (connected) val = w->wclass.pin.ctrl | HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE; else val = w->wclass.pin.ctrl & ~HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE; if (val != w->wclass.pin.ctrl) { w->wclass.pin.ctrl = val; hda_command(devinfo->dev, HDA_CMD_SET_PIN_WIDGET_CTRL(0, w->nid, w->wclass.pin.ctrl)); } } /* (Un)Mute other pins. */ for (j = 0; j < 15; j++) { if (as->pins[j] <= 0) continue; ctl = hdaa_audio_ctl_amp_get(devinfo, as->pins[j], HDAA_CTL_IN, -1, 1); if (ctl != NULL && ctl->mute) { /* If pin has muter - use it. */ val = connected ? 1 : 0; if (val == ctl->forcemute) continue; ctl->forcemute = val; hdaa_audio_ctl_amp_set(ctl, HDAA_AMP_MUTE_DEFAULT, HDAA_AMP_VOL_DEFAULT, HDAA_AMP_VOL_DEFAULT); continue; } /* If there is no muter - disable pin output. */ w1 = hdaa_widget_get(devinfo, as->pins[j]); if (w1 != NULL) { if (connected) val = w1->wclass.pin.ctrl & ~HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE; else val = w1->wclass.pin.ctrl | HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE; if (val != w1->wclass.pin.ctrl) { w1->wclass.pin.ctrl = val; hda_command(devinfo->dev, HDA_CMD_SET_PIN_WIDGET_CTRL(0, w1->nid, w1->wclass.pin.ctrl)); } } } } /* * Recording source change handler. */ static void hdaa_autorecsrc_handler(struct hdaa_audio_as *as, struct hdaa_widget *w) { struct hdaa_pcm_devinfo *pdevinfo = as->pdevinfo; struct hdaa_devinfo *devinfo; struct hdaa_widget *w1; int i, mask, fullmask, prio, bestprio; char buf[128]; if (!as->mixed || pdevinfo == NULL || pdevinfo->mixer == NULL) return; /* Don't touch anything if we asked not to. */ if (pdevinfo->autorecsrc == 0 || (pdevinfo->autorecsrc == 1 && w != NULL)) return; /* Don't touch anything if "mix" or "speaker" selected. */ if (pdevinfo->recsrc & (SOUND_MASK_IMIX | SOUND_MASK_SPEAKER)) return; /* Don't touch anything if several selected. */ if (ffs(pdevinfo->recsrc) != fls(pdevinfo->recsrc)) return; devinfo = pdevinfo->devinfo; mask = fullmask = 0; bestprio = 0; for (i = 0; i < 16; i++) { if (as->pins[i] <= 0) continue; w1 = hdaa_widget_get(devinfo, as->pins[i]); if (w1 == NULL || w1->enable == 0) continue; if (w1->wclass.pin.connected == 0) continue; prio = (w1->wclass.pin.connected == 1) ? 2 : 1; if (prio < bestprio) continue; if (prio > bestprio) { mask = 0; bestprio = prio; } mask |= (1 << w1->ossdev); fullmask |= (1 << w1->ossdev); } if (mask == 0) return; /* Prefer newly connected input. */ if (w != NULL && (mask & (1 << w->ossdev))) mask = (1 << w->ossdev); /* Prefer previously selected input */ if (mask & pdevinfo->recsrc) mask &= pdevinfo->recsrc; /* Prefer mic. */ if (mask & SOUND_MASK_MIC) mask = SOUND_MASK_MIC; /* Prefer monitor (2nd mic). */ if (mask & SOUND_MASK_MONITOR) mask = SOUND_MASK_MONITOR; /* Just take first one. */ mask = (1 << (ffs(mask) - 1)); HDA_BOOTVERBOSE( hdaa_audio_ctl_ossmixer_mask2allname(mask, buf, sizeof(buf)); device_printf(pdevinfo->dev, "Automatically set rec source to: %s\n", buf); ); hdaa_unlock(devinfo); mix_setrecsrc(pdevinfo->mixer, mask); hdaa_lock(devinfo); } /* * Jack presence detection event handler. */ static void hdaa_presence_handler(struct hdaa_widget *w) { struct hdaa_devinfo *devinfo = w->devinfo; struct hdaa_audio_as *as; uint32_t res; int connected, old; if (w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) return; if (HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP(w->wclass.pin.cap) == 0 || (HDA_CONFIG_DEFAULTCONF_MISC(w->wclass.pin.config) & 1) != 0) return; res = hda_command(devinfo->dev, HDA_CMD_GET_PIN_SENSE(0, w->nid)); connected = (res & HDA_CMD_GET_PIN_SENSE_PRESENCE_DETECT) != 0; if (devinfo->quirks & HDAA_QUIRK_SENSEINV) connected = !connected; old = w->wclass.pin.connected; if (connected == old) return; w->wclass.pin.connected = connected; HDA_BOOTVERBOSE( if (connected || old != 2) { device_printf(devinfo->dev, "Pin sense: nid=%d sence=0x%08x (%sconnected)\n", w->nid, res, !connected ? "dis" : ""); } ); as = &devinfo->as[w->bindas]; if (as->hpredir >= 0 && as->pins[15] == w->nid) hdaa_hpredir_handler(w); if (as->dir == HDAA_CTL_IN && old != 2) hdaa_autorecsrc_handler(as, w); } /* * Callback for poll based presence detection. */ static void hdaa_jack_poll_callback(void *arg) { struct hdaa_devinfo *devinfo = arg; struct hdaa_widget *w; int i; hdaa_lock(devinfo); if (devinfo->poll_ival == 0) { hdaa_unlock(devinfo); return; } for (i = 0; i < devinfo->ascnt; i++) { if (devinfo->as[i].hpredir < 0) continue; w = hdaa_widget_get(devinfo, devinfo->as[i].pins[15]); if (w == NULL || w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; hdaa_presence_handler(w); } callout_reset(&devinfo->poll_jack, devinfo->poll_ival, hdaa_jack_poll_callback, devinfo); hdaa_unlock(devinfo); } static void hdaa_eld_dump(struct hdaa_widget *w) { struct hdaa_devinfo *devinfo = w->devinfo; device_t dev = devinfo->dev; uint8_t *sad; int len, mnl, i, sadc, fmt; if (w->eld == NULL || w->eld_len < 4) return; device_printf(dev, "ELD nid=%d: ELD_Ver=%u Baseline_ELD_Len=%u\n", w->nid, w->eld[0] >> 3, w->eld[2]); if ((w->eld[0] >> 3) != 0x02) return; len = min(w->eld_len, (u_int)w->eld[2] * 4); mnl = w->eld[4] & 0x1f; device_printf(dev, "ELD nid=%d: CEA_EDID_Ver=%u MNL=%u\n", w->nid, w->eld[4] >> 5, mnl); sadc = w->eld[5] >> 4; device_printf(dev, "ELD nid=%d: SAD_Count=%u Conn_Type=%u S_AI=%u HDCP=%u\n", w->nid, sadc, (w->eld[5] >> 2) & 0x3, (w->eld[5] >> 1) & 0x1, w->eld[5] & 0x1); device_printf(dev, "ELD nid=%d: Aud_Synch_Delay=%ums\n", w->nid, w->eld[6] * 2); device_printf(dev, "ELD nid=%d: Channels=0x%b\n", w->nid, w->eld[7], "\020\07RLRC\06FLRC\05RC\04RLR\03FC\02LFE\01FLR"); device_printf(dev, "ELD nid=%d: Port_ID=0x%02x%02x%02x%02x%02x%02x%02x%02x\n", w->nid, w->eld[8], w->eld[9], w->eld[10], w->eld[11], w->eld[12], w->eld[13], w->eld[14], w->eld[15]); device_printf(dev, "ELD nid=%d: Manufacturer_Name=0x%02x%02x\n", w->nid, w->eld[16], w->eld[17]); device_printf(dev, "ELD nid=%d: Product_Code=0x%02x%02x\n", w->nid, w->eld[18], w->eld[19]); device_printf(dev, "ELD nid=%d: Monitor_Name_String='%.*s'\n", w->nid, mnl, &w->eld[20]); for (i = 0; i < sadc; i++) { sad = &w->eld[20 + mnl + i * 3]; fmt = (sad[0] >> 3) & 0x0f; if (fmt == HDA_HDMI_CODING_TYPE_REF_CTX) { fmt = (sad[2] >> 3) & 0x1f; if (fmt < 1 || fmt > 3) fmt = 0; else fmt += 14; } device_printf(dev, "ELD nid=%d: %s %dch freqs=0x%b", w->nid, HDA_HDMI_CODING_TYPES[fmt], (sad[0] & 0x07) + 1, sad[1], "\020\007192\006176\00596\00488\00348\00244\00132"); switch (fmt) { case HDA_HDMI_CODING_TYPE_LPCM: printf(" sizes=0x%b", sad[2] & 0x07, "\020\00324\00220\00116"); break; case HDA_HDMI_CODING_TYPE_AC3: case HDA_HDMI_CODING_TYPE_MPEG1: case HDA_HDMI_CODING_TYPE_MP3: case HDA_HDMI_CODING_TYPE_MPEG2: case HDA_HDMI_CODING_TYPE_AACLC: case HDA_HDMI_CODING_TYPE_DTS: case HDA_HDMI_CODING_TYPE_ATRAC: printf(" max_bitrate=%d", sad[2] * 8000); break; case HDA_HDMI_CODING_TYPE_WMAPRO: printf(" profile=%d", sad[2] & 0x07); break; } printf("\n"); } } static void hdaa_eld_handler(struct hdaa_widget *w) { struct hdaa_devinfo *devinfo = w->devinfo; uint32_t res; int i; if (w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) return; if (HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP(w->wclass.pin.cap) == 0 || (HDA_CONFIG_DEFAULTCONF_MISC(w->wclass.pin.config) & 1) != 0) return; res = hda_command(devinfo->dev, HDA_CMD_GET_PIN_SENSE(0, w->nid)); if ((w->eld != 0) == ((res & HDA_CMD_GET_PIN_SENSE_ELD_VALID) != 0)) return; if (w->eld != NULL) { w->eld_len = 0; free(w->eld, M_HDAA); w->eld = NULL; } HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Pin sense: nid=%d sence=0x%08x " "(%sconnected, ELD %svalid)\n", w->nid, res, (res & HDA_CMD_GET_PIN_SENSE_PRESENCE_DETECT) ? "" : "dis", (res & HDA_CMD_GET_PIN_SENSE_ELD_VALID) ? "" : "in"); ); if ((res & HDA_CMD_GET_PIN_SENSE_ELD_VALID) == 0) return; res = hda_command(devinfo->dev, HDA_CMD_GET_HDMI_DIP_SIZE(0, w->nid, 0x08)); if (res == HDA_INVALID) return; w->eld_len = res & 0xff; if (w->eld_len != 0) w->eld = malloc(w->eld_len, M_HDAA, M_ZERO | M_NOWAIT); if (w->eld == NULL) { w->eld_len = 0; return; } for (i = 0; i < w->eld_len; i++) { res = hda_command(devinfo->dev, HDA_CMD_GET_HDMI_ELDD(0, w->nid, i)); if (res & 0x80000000) w->eld[i] = res & 0xff; } HDA_BOOTVERBOSE( hdaa_eld_dump(w); ); } /* * Pin sense initializer. */ static void hdaa_sense_init(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as; struct hdaa_widget *w; int i, poll = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP(w->param.widget_cap)) { if (w->unsol < 0) w->unsol = HDAC_UNSOL_ALLOC( device_get_parent(devinfo->dev), devinfo->dev, w->nid); hda_command(devinfo->dev, HDA_CMD_SET_UNSOLICITED_RESPONSE(0, w->nid, HDA_CMD_SET_UNSOLICITED_RESPONSE_ENABLE | w->unsol)); } as = &devinfo->as[w->bindas]; if (as->hpredir >= 0 && as->pins[15] == w->nid) { if (HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP(w->wclass.pin.cap) == 0 || (HDA_CONFIG_DEFAULTCONF_MISC(w->wclass.pin.config) & 1) != 0) { device_printf(devinfo->dev, "No presence detection support at nid %d\n", w->nid); } else { if (w->unsol < 0) poll = 1; HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Headphones redirection for " "association %d nid=%d using %s.\n", w->bindas, w->nid, (w->unsol < 0) ? "polling" : "unsolicited responses"); ); }; } hdaa_presence_handler(w); if (!HDA_PARAM_PIN_CAP_DP(w->wclass.pin.cap) && !HDA_PARAM_PIN_CAP_HDMI(w->wclass.pin.cap)) continue; hdaa_eld_handler(w); } if (poll) { callout_reset(&devinfo->poll_jack, 1, hdaa_jack_poll_callback, devinfo); } } static void hdaa_sense_deinit(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; int i; callout_stop(&devinfo->poll_jack); for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (w->unsol < 0) continue; hda_command(devinfo->dev, HDA_CMD_SET_UNSOLICITED_RESPONSE(0, w->nid, 0)); HDAC_UNSOL_FREE( device_get_parent(devinfo->dev), devinfo->dev, w->unsol); w->unsol = -1; } } uint32_t hdaa_widget_pin_patch(uint32_t config, const char *str) { char buf[256]; char *key, *value, *rest, *bad; int ival, i; strlcpy(buf, str, sizeof(buf)); rest = buf; while ((key = strsep(&rest, "=")) != NULL) { value = strsep(&rest, " \t"); if (value == NULL) break; ival = strtol(value, &bad, 10); if (strcmp(key, "seq") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_SEQUENCE_MASK; config |= ((ival << HDA_CONFIG_DEFAULTCONF_SEQUENCE_SHIFT) & HDA_CONFIG_DEFAULTCONF_SEQUENCE_MASK); } else if (strcmp(key, "as") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_ASSOCIATION_MASK; config |= ((ival << HDA_CONFIG_DEFAULTCONF_ASSOCIATION_SHIFT) & HDA_CONFIG_DEFAULTCONF_ASSOCIATION_MASK); } else if (strcmp(key, "misc") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_MISC_MASK; config |= ((ival << HDA_CONFIG_DEFAULTCONF_MISC_SHIFT) & HDA_CONFIG_DEFAULTCONF_MISC_MASK); } else if (strcmp(key, "color") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_COLOR_MASK; if (bad[0] == 0) { config |= ((ival << HDA_CONFIG_DEFAULTCONF_COLOR_SHIFT) & HDA_CONFIG_DEFAULTCONF_COLOR_MASK); }; for (i = 0; i < 16; i++) { if (strcasecmp(HDA_COLORS[i], value) == 0) { config |= (i << HDA_CONFIG_DEFAULTCONF_COLOR_SHIFT); break; } } } else if (strcmp(key, "ctype") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_MASK; if (bad[0] == 0) { config |= ((ival << HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_SHIFT) & HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_MASK); } for (i = 0; i < 16; i++) { if (strcasecmp(HDA_CONNECTORS[i], value) == 0) { config |= (i << HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_SHIFT); break; } } } else if (strcmp(key, "device") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_DEVICE_MASK; if (bad[0] == 0) { config |= ((ival << HDA_CONFIG_DEFAULTCONF_DEVICE_SHIFT) & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK); continue; }; for (i = 0; i < 16; i++) { if (strcasecmp(HDA_DEVS[i], value) == 0) { config |= (i << HDA_CONFIG_DEFAULTCONF_DEVICE_SHIFT); break; } } } else if (strcmp(key, "loc") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_LOCATION_MASK; if (bad[0] == 0) { config |= ((ival << HDA_CONFIG_DEFAULTCONF_LOCATION_SHIFT) & HDA_CONFIG_DEFAULTCONF_LOCATION_MASK); continue; } for (i = 0; i < 64; i++) { if (strcasecmp(HDA_LOCS[i], value) == 0) { config |= (i << HDA_CONFIG_DEFAULTCONF_LOCATION_SHIFT); break; } } } else if (strcmp(key, "conn") == 0) { config &= ~HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK; if (bad[0] == 0) { config |= ((ival << HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_SHIFT) & HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK); continue; }; for (i = 0; i < 4; i++) { if (strcasecmp(HDA_CONNS[i], value) == 0) { config |= (i << HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_SHIFT); break; } } } } return (config); } uint32_t hdaa_gpio_patch(uint32_t gpio, const char *str) { char buf[256]; char *key, *value, *rest; int ikey, i; strlcpy(buf, str, sizeof(buf)); rest = buf; while ((key = strsep(&rest, "=")) != NULL) { value = strsep(&rest, " \t"); if (value == NULL) break; ikey = strtol(key, NULL, 10); if (ikey < 0 || ikey > 7) continue; for (i = 0; i < 7; i++) { if (strcasecmp(HDA_GPIO_ACTIONS[i], value) == 0) { gpio &= ~HDAA_GPIO_MASK(ikey); gpio |= i << HDAA_GPIO_SHIFT(ikey); break; } } } return (gpio); } static void hdaa_local_patch_pin(struct hdaa_widget *w) { device_t dev = w->devinfo->dev; const char *res = NULL; uint32_t config, orig; char buf[32]; config = orig = w->wclass.pin.config; snprintf(buf, sizeof(buf), "cad%u.nid%u.config", hda_get_codec_id(dev), w->nid); if (resource_string_value(device_get_name( device_get_parent(device_get_parent(dev))), device_get_unit(device_get_parent(device_get_parent(dev))), buf, &res) == 0) { if (strncmp(res, "0x", 2) == 0) { config = strtol(res + 2, NULL, 16); } else { config = hdaa_widget_pin_patch(config, res); } } snprintf(buf, sizeof(buf), "nid%u.config", w->nid); if (resource_string_value(device_get_name(dev), device_get_unit(dev), buf, &res) == 0) { if (strncmp(res, "0x", 2) == 0) { config = strtol(res + 2, NULL, 16); } else { config = hdaa_widget_pin_patch(config, res); } } HDA_BOOTVERBOSE( if (config != orig) device_printf(w->devinfo->dev, "Patching pin config nid=%u 0x%08x -> 0x%08x\n", w->nid, orig, config); ); w->wclass.pin.newconf = w->wclass.pin.config = config; } static int hdaa_sysctl_config(SYSCTL_HANDLER_ARGS) { char buf[256]; int error; uint32_t conf; conf = *(uint32_t *)oidp->oid_arg1; snprintf(buf, sizeof(buf), "0x%08x as=%d seq=%d " "device=%s conn=%s ctype=%s loc=%s color=%s misc=%d", conf, HDA_CONFIG_DEFAULTCONF_ASSOCIATION(conf), HDA_CONFIG_DEFAULTCONF_SEQUENCE(conf), HDA_DEVS[HDA_CONFIG_DEFAULTCONF_DEVICE(conf)], HDA_CONNS[HDA_CONFIG_DEFAULTCONF_CONNECTIVITY(conf)], HDA_CONNECTORS[HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE(conf)], HDA_LOCS[HDA_CONFIG_DEFAULTCONF_LOCATION(conf)], HDA_COLORS[HDA_CONFIG_DEFAULTCONF_COLOR(conf)], HDA_CONFIG_DEFAULTCONF_MISC(conf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); if (strncmp(buf, "0x", 2) == 0) conf = strtol(buf + 2, NULL, 16); else conf = hdaa_widget_pin_patch(conf, buf); *(uint32_t *)oidp->oid_arg1 = conf; return (0); } static void hdaa_config_fetch(const char *str, uint32_t *on, uint32_t *off) { int i = 0, j, k, len, inv; for (;;) { while (str[i] != '\0' && (str[i] == ',' || isspace(str[i]) != 0)) i++; if (str[i] == '\0') return; j = i; while (str[j] != '\0' && !(str[j] == ',' || isspace(str[j]) != 0)) j++; len = j - i; if (len > 2 && strncmp(str + i, "no", 2) == 0) inv = 2; else inv = 0; for (k = 0; len > inv && k < nitems(hdaa_quirks_tab); k++) { if (strncmp(str + i + inv, hdaa_quirks_tab[k].key, len - inv) != 0) continue; if (len - inv != strlen(hdaa_quirks_tab[k].key)) continue; if (inv == 0) { *on |= hdaa_quirks_tab[k].value; *off &= ~hdaa_quirks_tab[k].value; } else { *off |= hdaa_quirks_tab[k].value; *on &= ~hdaa_quirks_tab[k].value; } break; } i = j; } } static int hdaa_sysctl_quirks(SYSCTL_HANDLER_ARGS) { char buf[256]; int error, n = 0, i; uint32_t quirks, quirks_off; quirks = *(uint32_t *)oidp->oid_arg1; buf[0] = 0; for (i = 0; i < nitems(hdaa_quirks_tab); i++) { if ((quirks & hdaa_quirks_tab[i].value) != 0) n += snprintf(buf + n, sizeof(buf) - n, "%s%s", n != 0 ? "," : "", hdaa_quirks_tab[i].key); } error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); if (strncmp(buf, "0x", 2) == 0) quirks = strtol(buf + 2, NULL, 16); else { quirks = 0; hdaa_config_fetch(buf, &quirks, &quirks_off); } *(uint32_t *)oidp->oid_arg1 = quirks; return (0); } static void hdaa_local_patch(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; const char *res = NULL; uint32_t quirks_on = 0, quirks_off = 0, x; int i; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL) continue; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) hdaa_local_patch_pin(w); } if (resource_string_value(device_get_name(devinfo->dev), device_get_unit(devinfo->dev), "config", &res) == 0) { if (res != NULL && strlen(res) > 0) hdaa_config_fetch(res, &quirks_on, &quirks_off); devinfo->quirks |= quirks_on; devinfo->quirks &= ~quirks_off; } if (devinfo->newquirks == -1) devinfo->newquirks = devinfo->quirks; else devinfo->quirks = devinfo->newquirks; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, "Config options: 0x%08x\n", devinfo->quirks); ); if (resource_string_value(device_get_name(devinfo->dev), device_get_unit(devinfo->dev), "gpio_config", &res) == 0) { if (strncmp(res, "0x", 2) == 0) { devinfo->gpio = strtol(res + 2, NULL, 16); } else { devinfo->gpio = hdaa_gpio_patch(devinfo->gpio, res); } } if (devinfo->newgpio == -1) devinfo->newgpio = devinfo->gpio; else devinfo->gpio = devinfo->newgpio; if (devinfo->newgpo == -1) devinfo->newgpo = devinfo->gpo; else devinfo->gpo = devinfo->newgpo; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, "GPIO config options:"); for (i = 0; i < 7; i++) { x = (devinfo->gpio & HDAA_GPIO_MASK(i)) >> HDAA_GPIO_SHIFT(i); if (x != 0) printf(" %d=%s", i, HDA_GPIO_ACTIONS[x]); } printf("\n"); ); } static void hdaa_widget_connection_parse(struct hdaa_widget *w) { uint32_t res; int i, j, max, ents, entnum; nid_t nid = w->nid; nid_t cnid, addcnid, prevcnid; w->nconns = 0; res = hda_command(w->devinfo->dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_CONN_LIST_LENGTH)); ents = HDA_PARAM_CONN_LIST_LENGTH_LIST_LENGTH(res); if (ents < 1) return; entnum = HDA_PARAM_CONN_LIST_LENGTH_LONG_FORM(res) ? 2 : 4; max = (sizeof(w->conns) / sizeof(w->conns[0])) - 1; prevcnid = 0; #define CONN_RMASK(e) (1 << ((32 / (e)) - 1)) #define CONN_NMASK(e) (CONN_RMASK(e) - 1) #define CONN_RESVAL(r, e, n) ((r) >> ((32 / (e)) * (n))) #define CONN_RANGE(r, e, n) (CONN_RESVAL(r, e, n) & CONN_RMASK(e)) #define CONN_CNID(r, e, n) (CONN_RESVAL(r, e, n) & CONN_NMASK(e)) for (i = 0; i < ents; i += entnum) { res = hda_command(w->devinfo->dev, HDA_CMD_GET_CONN_LIST_ENTRY(0, nid, i)); for (j = 0; j < entnum; j++) { cnid = CONN_CNID(res, entnum, j); if (cnid == 0) { if (w->nconns < ents) device_printf(w->devinfo->dev, "WARNING: nid=%d has zero cnid " "entnum=%d j=%d index=%d " "entries=%d found=%d res=0x%08x\n", nid, entnum, j, i, ents, w->nconns, res); else goto getconns_out; } if (cnid < w->devinfo->startnode || cnid >= w->devinfo->endnode) { HDA_BOOTVERBOSE( device_printf(w->devinfo->dev, "WARNING: nid=%d has cnid outside " "of the AFG range j=%d " "entnum=%d index=%d res=0x%08x\n", nid, j, entnum, i, res); ); } if (CONN_RANGE(res, entnum, j) == 0) addcnid = cnid; else if (prevcnid == 0 || prevcnid >= cnid) { device_printf(w->devinfo->dev, "WARNING: Invalid child range " "nid=%d index=%d j=%d entnum=%d " "prevcnid=%d cnid=%d res=0x%08x\n", nid, i, j, entnum, prevcnid, cnid, res); addcnid = cnid; } else addcnid = prevcnid + 1; while (addcnid <= cnid) { if (w->nconns > max) { device_printf(w->devinfo->dev, "Adding %d (nid=%d): " "Max connection reached! max=%d\n", addcnid, nid, max + 1); goto getconns_out; } w->connsenable[w->nconns] = 1; w->conns[w->nconns++] = addcnid++; } prevcnid = cnid; } } getconns_out: return; } static void hdaa_widget_parse(struct hdaa_widget *w) { device_t dev = w->devinfo->dev; uint32_t wcap, cap; nid_t nid = w->nid; char buf[64]; w->param.widget_cap = wcap = hda_command(dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_AUDIO_WIDGET_CAP)); w->type = HDA_PARAM_AUDIO_WIDGET_CAP_TYPE(wcap); hdaa_widget_connection_parse(w); if (HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP(wcap)) { if (HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR(wcap)) w->param.outamp_cap = hda_command(dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_OUTPUT_AMP_CAP)); else w->param.outamp_cap = w->devinfo->outamp_cap; } else w->param.outamp_cap = 0; if (HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP(wcap)) { if (HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR(wcap)) w->param.inamp_cap = hda_command(dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_INPUT_AMP_CAP)); else w->param.inamp_cap = w->devinfo->inamp_cap; } else w->param.inamp_cap = 0; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) { if (HDA_PARAM_AUDIO_WIDGET_CAP_FORMAT_OVR(wcap)) { cap = hda_command(dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_SUPP_STREAM_FORMATS)); w->param.supp_stream_formats = (cap != 0) ? cap : w->devinfo->supp_stream_formats; cap = hda_command(dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_SUPP_PCM_SIZE_RATE)); w->param.supp_pcm_size_rate = (cap != 0) ? cap : w->devinfo->supp_pcm_size_rate; } else { w->param.supp_stream_formats = w->devinfo->supp_stream_formats; w->param.supp_pcm_size_rate = w->devinfo->supp_pcm_size_rate; } if (HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE(w->param.widget_cap)) { w->wclass.conv.stripecap = hda_command(dev, HDA_CMD_GET_STRIPE_CONTROL(0, w->nid)) >> 20; } else w->wclass.conv.stripecap = 1; } else { w->param.supp_stream_formats = 0; w->param.supp_pcm_size_rate = 0; } if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) { w->wclass.pin.original = w->wclass.pin.newconf = w->wclass.pin.config = hda_command(dev, HDA_CMD_GET_CONFIGURATION_DEFAULT(0, w->nid)); w->wclass.pin.cap = hda_command(dev, HDA_CMD_GET_PARAMETER(0, w->nid, HDA_PARAM_PIN_CAP)); w->wclass.pin.ctrl = hda_command(dev, HDA_CMD_GET_PIN_WIDGET_CTRL(0, nid)); w->wclass.pin.connected = 2; if (HDA_PARAM_PIN_CAP_EAPD_CAP(w->wclass.pin.cap)) { w->param.eapdbtl = hda_command(dev, HDA_CMD_GET_EAPD_BTL_ENABLE(0, nid)); w->param.eapdbtl &= 0x7; w->param.eapdbtl |= HDA_CMD_SET_EAPD_BTL_ENABLE_EAPD; } else w->param.eapdbtl = HDA_INVALID; hdaa_unlock(w->devinfo); snprintf(buf, sizeof(buf), "nid%d_config", w->nid); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, &w->wclass.pin.newconf, sizeof(&w->wclass.pin.newconf), hdaa_sysctl_config, "A", "Current pin configuration"); snprintf(buf, sizeof(buf), "nid%d_original", w->nid); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &w->wclass.pin.original, sizeof(&w->wclass.pin.original), hdaa_sysctl_config, "A", "Original pin configuration"); hdaa_lock(w->devinfo); } w->unsol = -1; } static void hdaa_widget_postprocess(struct hdaa_widget *w) { const char *typestr; w->type = HDA_PARAM_AUDIO_WIDGET_CAP_TYPE(w->param.widget_cap); switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT: typestr = "audio output"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT: typestr = "audio input"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER: typestr = "audio mixer"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR: typestr = "audio selector"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX: typestr = "pin"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_POWER_WIDGET: typestr = "power widget"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_VOLUME_WIDGET: typestr = "volume widget"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_BEEP_WIDGET: typestr = "beep widget"; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_VENDOR_WIDGET: typestr = "vendor widget"; break; default: typestr = "unknown type"; break; } strlcpy(w->name, typestr, sizeof(w->name)); if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) { uint32_t config; const char *devstr; int conn, color; config = w->wclass.pin.config; devstr = HDA_DEVS[(config & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK) >> HDA_CONFIG_DEFAULTCONF_DEVICE_SHIFT]; conn = (config & HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK) >> HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_SHIFT; color = (config & HDA_CONFIG_DEFAULTCONF_COLOR_MASK) >> HDA_CONFIG_DEFAULTCONF_COLOR_SHIFT; strlcat(w->name, ": ", sizeof(w->name)); strlcat(w->name, devstr, sizeof(w->name)); strlcat(w->name, " (", sizeof(w->name)); if (conn == 0 && color != 0 && color != 15) { strlcat(w->name, HDA_COLORS[color], sizeof(w->name)); strlcat(w->name, " ", sizeof(w->name)); } strlcat(w->name, HDA_CONNS[conn], sizeof(w->name)); strlcat(w->name, ")", sizeof(w->name)); } } struct hdaa_widget * hdaa_widget_get(struct hdaa_devinfo *devinfo, nid_t nid) { if (devinfo == NULL || devinfo->widget == NULL || nid < devinfo->startnode || nid >= devinfo->endnode) return (NULL); return (&devinfo->widget[nid - devinfo->startnode]); } static void hdaa_audio_ctl_amp_set_internal(struct hdaa_devinfo *devinfo, nid_t nid, int index, int lmute, int rmute, int left, int right, int dir) { uint16_t v = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, "Setting amplifier nid=%d index=%d %s mute=%d/%d vol=%d/%d\n", nid,index,dir ? "in" : "out",lmute,rmute,left,right); ); if (left != right || lmute != rmute) { v = (1 << (15 - dir)) | (1 << 13) | (index << 8) | (lmute << 7) | left; hda_command(devinfo->dev, HDA_CMD_SET_AMP_GAIN_MUTE(0, nid, v)); v = (1 << (15 - dir)) | (1 << 12) | (index << 8) | (rmute << 7) | right; } else v = (1 << (15 - dir)) | (3 << 12) | (index << 8) | (lmute << 7) | left; hda_command(devinfo->dev, HDA_CMD_SET_AMP_GAIN_MUTE(0, nid, v)); } static void hdaa_audio_ctl_amp_set(struct hdaa_audio_ctl *ctl, uint32_t mute, int left, int right) { nid_t nid; int lmute, rmute; nid = ctl->widget->nid; /* Save new values if valid. */ if (mute != HDAA_AMP_MUTE_DEFAULT) ctl->muted = mute; if (left != HDAA_AMP_VOL_DEFAULT) ctl->left = left; if (right != HDAA_AMP_VOL_DEFAULT) ctl->right = right; /* Prepare effective values */ if (ctl->forcemute) { lmute = 1; rmute = 1; left = 0; right = 0; } else { lmute = HDAA_AMP_LEFT_MUTED(ctl->muted); rmute = HDAA_AMP_RIGHT_MUTED(ctl->muted); left = ctl->left; right = ctl->right; } /* Apply effective values */ if (ctl->dir & HDAA_CTL_OUT) hdaa_audio_ctl_amp_set_internal(ctl->widget->devinfo, nid, ctl->index, lmute, rmute, left, right, 0); if (ctl->dir & HDAA_CTL_IN) hdaa_audio_ctl_amp_set_internal(ctl->widget->devinfo, nid, ctl->index, lmute, rmute, left, right, 1); } static void hdaa_widget_connection_select(struct hdaa_widget *w, uint8_t index) { if (w == NULL || w->nconns < 1 || index > (w->nconns - 1)) return; HDA_BOOTHVERBOSE( device_printf(w->devinfo->dev, "Setting selector nid=%d index=%d\n", w->nid, index); ); hda_command(w->devinfo->dev, HDA_CMD_SET_CONNECTION_SELECT_CONTROL(0, w->nid, index)); w->selconn = index; } /**************************************************************************** * Device Methods ****************************************************************************/ static void * hdaa_channel_init(kobj_t obj, void *data, struct snd_dbuf *b, struct pcm_channel *c, int dir) { struct hdaa_chan *ch = data; struct hdaa_pcm_devinfo *pdevinfo = ch->pdevinfo; struct hdaa_devinfo *devinfo = pdevinfo->devinfo; hdaa_lock(devinfo); if (devinfo->quirks & HDAA_QUIRK_FIXEDRATE) { ch->caps.minspeed = ch->caps.maxspeed = 48000; ch->pcmrates[0] = 48000; ch->pcmrates[1] = 0; } ch->dir = dir; ch->b = b; ch->c = c; ch->blksz = pdevinfo->chan_size / pdevinfo->chan_blkcnt; ch->blkcnt = pdevinfo->chan_blkcnt; hdaa_unlock(devinfo); if (sndbuf_alloc(ch->b, bus_get_dma_tag(devinfo->dev), hda_get_dma_nocache(devinfo->dev) ? BUS_DMA_NOCACHE : 0, pdevinfo->chan_size) != 0) return (NULL); return (ch); } static int hdaa_channel_setformat(kobj_t obj, void *data, uint32_t format) { struct hdaa_chan *ch = data; int i; for (i = 0; ch->caps.fmtlist[i] != 0; i++) { if (format == ch->caps.fmtlist[i]) { ch->fmt = format; return (0); } } return (EINVAL); } static uint32_t hdaa_channel_setspeed(kobj_t obj, void *data, uint32_t speed) { struct hdaa_chan *ch = data; uint32_t spd = 0, threshold; int i; /* First look for equal or multiple frequency. */ for (i = 0; ch->pcmrates[i] != 0; i++) { spd = ch->pcmrates[i]; if (speed != 0 && spd / speed * speed == spd) { ch->spd = spd; return (spd); } } /* If no match, just find nearest. */ for (i = 0; ch->pcmrates[i] != 0; i++) { spd = ch->pcmrates[i]; threshold = spd + ((ch->pcmrates[i + 1] != 0) ? ((ch->pcmrates[i + 1] - spd) >> 1) : 0); if (speed < threshold) break; } ch->spd = spd; return (spd); } static uint16_t hdaa_stream_format(struct hdaa_chan *ch) { int i; uint16_t fmt; fmt = 0; if (ch->fmt & AFMT_S16_LE) fmt |= ch->bit16 << 4; else if (ch->fmt & AFMT_S32_LE) fmt |= ch->bit32 << 4; else fmt |= 1 << 4; for (i = 0; i < HDA_RATE_TAB_LEN; i++) { if (hda_rate_tab[i].valid && ch->spd == hda_rate_tab[i].rate) { fmt |= hda_rate_tab[i].base; fmt |= hda_rate_tab[i].mul; fmt |= hda_rate_tab[i].div; break; } } fmt |= (AFMT_CHANNEL(ch->fmt) - 1); return (fmt); } static int hdaa_allowed_stripes(uint16_t fmt) { static const int bits[8] = { 8, 16, 20, 24, 32, 32, 32, 32 }; int size; size = bits[(fmt >> 4) & 0x03]; size *= (fmt & 0x0f) + 1; size *= ((fmt >> 11) & 0x07) + 1; return (0xffffffffU >> (32 - fls(size / 8))); } static void hdaa_audio_setup(struct hdaa_chan *ch) { struct hdaa_audio_as *as = &ch->devinfo->as[ch->as]; struct hdaa_widget *w, *wp; int i, j, k, chn, cchn, totalchn, totalextchn, c; uint16_t fmt, dfmt; /* Mapping channel pairs to codec pins/converters. */ const static uint16_t convmap[2][5] = {{ 0x0010, 0x0001, 0x0201, 0x0231, 0x0231 }, /* 5.1 */ { 0x0010, 0x0001, 0x2001, 0x2031, 0x2431 }};/* 7.1 */ /* Mapping formats to HDMI channel allocations. */ const static uint8_t hdmica[2][8] = {{ 0x02, 0x00, 0x04, 0x08, 0x0a, 0x0e, 0x12, 0x12 }, /* x.0 */ { 0x01, 0x03, 0x01, 0x03, 0x09, 0x0b, 0x0f, 0x13 }}; /* x.1 */ /* Mapping formats to HDMI channels order. */ const static uint32_t hdmich[2][8] = {{ 0xFFFF0F00, 0xFFFFFF10, 0xFFF2FF10, 0xFF32FF10, 0xFF324F10, 0xF5324F10, 0x54326F10, 0x54326F10 }, /* x.0 */ { 0xFFFFF000, 0xFFFF0100, 0xFFFFF210, 0xFFFF2310, 0xFF32F410, 0xFF324510, 0xF6324510, 0x76325410 }}; /* x.1 */ int convmapid = -1; nid_t nid; uint8_t csum; totalchn = AFMT_CHANNEL(ch->fmt); totalextchn = AFMT_EXTCHANNEL(ch->fmt); HDA_BOOTHVERBOSE( device_printf(ch->pdevinfo->dev, "PCMDIR_%s: Stream setup fmt=%08x (%d.%d) speed=%d\n", (ch->dir == PCMDIR_PLAY) ? "PLAY" : "REC", ch->fmt, totalchn - totalextchn, totalextchn, ch->spd); ); fmt = hdaa_stream_format(ch); /* Set channels to I/O converters mapping for known speaker setups. */ if ((as->pinset == 0x0007 || as->pinset == 0x0013)) /* Standard 5.1 */ convmapid = 0; else if (as->pinset == 0x0017) /* Standard 7.1 */ convmapid = 1; dfmt = HDA_CMD_SET_DIGITAL_CONV_FMT1_DIGEN; if (ch->fmt & AFMT_AC3) dfmt |= HDA_CMD_SET_DIGITAL_CONV_FMT1_NAUDIO; chn = 0; for (i = 0; ch->io[i] != -1; i++) { w = hdaa_widget_get(ch->devinfo, ch->io[i]); if (w == NULL) continue; /* If HP redirection is enabled, but failed to use same DAC, make last DAC to duplicate first one. */ if (as->fakeredir && i == (as->pincnt - 1)) { c = (ch->sid << 4); } else { /* Map channels to I/O converters, if set. */ if (convmapid >= 0) chn = (((convmap[convmapid][totalchn / 2] >> i * 4) & 0xf) - 1) * 2; if (chn < 0 || chn >= totalchn) { c = 0; } else { c = (ch->sid << 4) | chn; } } hda_command(ch->devinfo->dev, HDA_CMD_SET_CONV_FMT(0, ch->io[i], fmt)); if (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap)) { hda_command(ch->devinfo->dev, HDA_CMD_SET_DIGITAL_CONV_FMT1(0, ch->io[i], dfmt)); } hda_command(ch->devinfo->dev, HDA_CMD_SET_CONV_STREAM_CHAN(0, ch->io[i], c)); if (HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE(w->param.widget_cap)) { hda_command(ch->devinfo->dev, HDA_CMD_SET_STRIPE_CONTROL(0, w->nid, ch->stripectl)); } cchn = HDA_PARAM_AUDIO_WIDGET_CAP_CC(w->param.widget_cap); if (cchn > 1 && chn < totalchn) { cchn = min(cchn, totalchn - chn - 1); hda_command(ch->devinfo->dev, HDA_CMD_SET_CONV_CHAN_COUNT(0, ch->io[i], cchn)); } HDA_BOOTHVERBOSE( device_printf(ch->pdevinfo->dev, "PCMDIR_%s: Stream setup nid=%d: " "fmt=0x%04x, dfmt=0x%04x, chan=0x%04x, " "chan_count=0x%02x, stripe=%d\n", (ch->dir == PCMDIR_PLAY) ? "PLAY" : "REC", ch->io[i], fmt, dfmt, c, cchn, ch->stripectl); ); for (j = 0; j < 16; j++) { if (as->dacs[ch->asindex][j] != ch->io[i]) continue; nid = as->pins[j]; wp = hdaa_widget_get(ch->devinfo, nid); if (wp == NULL) continue; if (!HDA_PARAM_PIN_CAP_DP(wp->wclass.pin.cap) && !HDA_PARAM_PIN_CAP_HDMI(wp->wclass.pin.cap)) continue; /* Set channel mapping. */ for (k = 0; k < 8; k++) { hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_CHAN_SLOT(0, nid, (((hdmich[totalextchn == 0 ? 0 : 1][totalchn - 1] >> (k * 4)) & 0xf) << 4) | k)); } /* * Enable High Bit Rate (HBR) Encoded Packet Type * (EPT), if supported and needed (8ch data). */ if (HDA_PARAM_PIN_CAP_HDMI(wp->wclass.pin.cap) && HDA_PARAM_PIN_CAP_HBR(wp->wclass.pin.cap)) { wp->wclass.pin.ctrl &= ~HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK; if ((ch->fmt & AFMT_AC3) && (cchn == 7)) wp->wclass.pin.ctrl |= 0x03; hda_command(ch->devinfo->dev, HDA_CMD_SET_PIN_WIDGET_CTRL(0, nid, wp->wclass.pin.ctrl)); } /* Stop audio infoframe transmission. */ hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_INDEX(0, nid, 0x00)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_XMIT(0, nid, 0x00)); /* Clear audio infoframe buffer. */ hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_INDEX(0, nid, 0x00)); for (k = 0; k < 32; k++) hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x00)); /* Write HDMI/DisplayPort audio infoframe. */ hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_INDEX(0, nid, 0x00)); if (w->eld != NULL && w->eld_len >= 6 && ((w->eld[5] >> 2) & 0x3) == 1) { /* DisplayPort */ hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x84)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x1b)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x44)); } else { /* HDMI */ hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x84)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x01)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x0a)); csum = 0; csum -= 0x84 + 0x01 + 0x0a + (totalchn - 1) + hdmica[totalextchn == 0 ? 0 : 1][totalchn - 1]; hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, csum)); } hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, totalchn - 1)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x00)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, 0x00)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_DATA(0, nid, hdmica[totalextchn == 0 ? 0 : 1][totalchn - 1])); /* Start audio infoframe transmission. */ hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_INDEX(0, nid, 0x00)); hda_command(ch->devinfo->dev, HDA_CMD_SET_HDMI_DIP_XMIT(0, nid, 0xc0)); } chn += cchn + 1; } } /* * Greatest Common Divisor. */ static unsigned gcd(unsigned a, unsigned b) { u_int c; while (b != 0) { c = a; a = b; b = (c % b); } return (a); } /* * Least Common Multiple. */ static unsigned lcm(unsigned a, unsigned b) { return ((a * b) / gcd(a, b)); } static int hdaa_channel_setfragments(kobj_t obj, void *data, uint32_t blksz, uint32_t blkcnt) { struct hdaa_chan *ch = data; blksz -= blksz % lcm(HDA_DMA_ALIGNMENT, sndbuf_getalign(ch->b)); if (blksz > (sndbuf_getmaxsize(ch->b) / HDA_BDL_MIN)) blksz = sndbuf_getmaxsize(ch->b) / HDA_BDL_MIN; if (blksz < HDA_BLK_MIN) blksz = HDA_BLK_MIN; if (blkcnt > HDA_BDL_MAX) blkcnt = HDA_BDL_MAX; if (blkcnt < HDA_BDL_MIN) blkcnt = HDA_BDL_MIN; while ((blksz * blkcnt) > sndbuf_getmaxsize(ch->b)) { if ((blkcnt >> 1) >= HDA_BDL_MIN) blkcnt >>= 1; else if ((blksz >> 1) >= HDA_BLK_MIN) blksz >>= 1; else break; } if ((sndbuf_getblksz(ch->b) != blksz || sndbuf_getblkcnt(ch->b) != blkcnt) && sndbuf_resize(ch->b, blkcnt, blksz) != 0) device_printf(ch->devinfo->dev, "%s: failed blksz=%u blkcnt=%u\n", __func__, blksz, blkcnt); ch->blksz = sndbuf_getblksz(ch->b); ch->blkcnt = sndbuf_getblkcnt(ch->b); return (0); } static uint32_t hdaa_channel_setblocksize(kobj_t obj, void *data, uint32_t blksz) { struct hdaa_chan *ch = data; hdaa_channel_setfragments(obj, data, blksz, ch->pdevinfo->chan_blkcnt); return (ch->blksz); } static void hdaa_channel_stop(struct hdaa_chan *ch) { struct hdaa_devinfo *devinfo = ch->devinfo; struct hdaa_widget *w; int i; if ((ch->flags & HDAA_CHN_RUNNING) == 0) return; ch->flags &= ~HDAA_CHN_RUNNING; HDAC_STREAM_STOP(device_get_parent(devinfo->dev), devinfo->dev, ch->dir == PCMDIR_PLAY ? 1 : 0, ch->sid); for (i = 0; ch->io[i] != -1; i++) { w = hdaa_widget_get(ch->devinfo, ch->io[i]); if (w == NULL) continue; if (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap)) { hda_command(devinfo->dev, HDA_CMD_SET_DIGITAL_CONV_FMT1(0, ch->io[i], 0)); } hda_command(devinfo->dev, HDA_CMD_SET_CONV_STREAM_CHAN(0, ch->io[i], 0)); } HDAC_STREAM_FREE(device_get_parent(devinfo->dev), devinfo->dev, ch->dir == PCMDIR_PLAY ? 1 : 0, ch->sid); } static int hdaa_channel_start(struct hdaa_chan *ch) { struct hdaa_devinfo *devinfo = ch->devinfo; uint32_t fmt; fmt = hdaa_stream_format(ch); ch->stripectl = fls(ch->stripecap & hdaa_allowed_stripes(fmt)) - 1; ch->sid = HDAC_STREAM_ALLOC(device_get_parent(devinfo->dev), devinfo->dev, ch->dir == PCMDIR_PLAY ? 1 : 0, fmt, ch->stripectl, &ch->dmapos); if (ch->sid <= 0) return (EBUSY); hdaa_audio_setup(ch); HDAC_STREAM_RESET(device_get_parent(devinfo->dev), devinfo->dev, ch->dir == PCMDIR_PLAY ? 1 : 0, ch->sid); HDAC_STREAM_START(device_get_parent(devinfo->dev), devinfo->dev, ch->dir == PCMDIR_PLAY ? 1 : 0, ch->sid, sndbuf_getbufaddr(ch->b), ch->blksz, ch->blkcnt); ch->flags |= HDAA_CHN_RUNNING; return (0); } static int hdaa_channel_trigger(kobj_t obj, void *data, int go) { struct hdaa_chan *ch = data; int error = 0; if (!PCMTRIG_COMMON(go)) return (0); hdaa_lock(ch->devinfo); switch (go) { case PCMTRIG_START: error = hdaa_channel_start(ch); break; case PCMTRIG_STOP: case PCMTRIG_ABORT: hdaa_channel_stop(ch); break; default: break; } hdaa_unlock(ch->devinfo); return (error); } static uint32_t hdaa_channel_getptr(kobj_t obj, void *data) { struct hdaa_chan *ch = data; struct hdaa_devinfo *devinfo = ch->devinfo; uint32_t ptr; hdaa_lock(devinfo); if (ch->dmapos != NULL) { ptr = *(ch->dmapos); } else { ptr = HDAC_STREAM_GETPTR( device_get_parent(devinfo->dev), devinfo->dev, ch->dir == PCMDIR_PLAY ? 1 : 0, ch->sid); } hdaa_unlock(devinfo); /* * Round to available space and force 128 bytes aligment. */ ptr %= ch->blksz * ch->blkcnt; ptr &= HDA_BLK_ALIGN; return (ptr); } static struct pcmchan_caps * hdaa_channel_getcaps(kobj_t obj, void *data) { return (&((struct hdaa_chan *)data)->caps); } static kobj_method_t hdaa_channel_methods[] = { KOBJMETHOD(channel_init, hdaa_channel_init), KOBJMETHOD(channel_setformat, hdaa_channel_setformat), KOBJMETHOD(channel_setspeed, hdaa_channel_setspeed), KOBJMETHOD(channel_setblocksize, hdaa_channel_setblocksize), KOBJMETHOD(channel_setfragments, hdaa_channel_setfragments), KOBJMETHOD(channel_trigger, hdaa_channel_trigger), KOBJMETHOD(channel_getptr, hdaa_channel_getptr), KOBJMETHOD(channel_getcaps, hdaa_channel_getcaps), KOBJMETHOD_END }; CHANNEL_DECLARE(hdaa_channel); static int hdaa_audio_ctl_ossmixer_init(struct snd_mixer *m) { struct hdaa_pcm_devinfo *pdevinfo = mix_getdevinfo(m); struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w, *cw; uint32_t mask, recmask; int i, j; hdaa_lock(devinfo); pdevinfo->mixer = m; /* Make sure that in case of soft volume it won't stay muted. */ for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { pdevinfo->left[i] = 100; pdevinfo->right[i] = 100; } /* Declare volume controls assigned to this association. */ mask = pdevinfo->ossmask; if (pdevinfo->playas >= 0) { /* Declate EAPD as ogain control. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX || w->param.eapdbtl == HDA_INVALID || w->bindas != pdevinfo->playas) continue; mask |= SOUND_MASK_OGAIN; break; } /* Declare soft PCM volume if needed. */ if ((mask & SOUND_MASK_PCM) == 0 || (devinfo->quirks & HDAA_QUIRK_SOFTPCMVOL) || pdevinfo->minamp[SOUND_MIXER_PCM] == pdevinfo->maxamp[SOUND_MIXER_PCM]) { mask |= SOUND_MASK_PCM; pcm_setflags(pdevinfo->dev, pcm_getflags(pdevinfo->dev) | SD_F_SOFTPCMVOL); HDA_BOOTHVERBOSE( device_printf(pdevinfo->dev, "Forcing Soft PCM volume\n"); ); } /* Declare master volume if needed. */ if ((mask & SOUND_MASK_VOLUME) == 0) { mask |= SOUND_MASK_VOLUME; mix_setparentchild(m, SOUND_MIXER_VOLUME, SOUND_MASK_PCM); mix_setrealdev(m, SOUND_MIXER_VOLUME, SOUND_MIXER_NONE); HDA_BOOTHVERBOSE( device_printf(pdevinfo->dev, "Forcing master volume with PCM\n"); ); } } /* Declare record sources available to this association. */ recmask = 0; if (pdevinfo->recas >= 0) { for (i = 0; i < 16; i++) { if (devinfo->as[pdevinfo->recas].dacs[0][i] < 0) continue; w = hdaa_widget_get(devinfo, devinfo->as[pdevinfo->recas].dacs[0][i]); if (w == NULL || w->enable == 0) continue; for (j = 0; j < w->nconns; j++) { if (w->connsenable[j] == 0) continue; cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || cw->enable == 0) continue; if (cw->bindas != pdevinfo->recas && cw->bindas != -2) continue; recmask |= cw->ossmask; } } } recmask &= (1 << SOUND_MIXER_NRDEVICES) - 1; mask &= (1 << SOUND_MIXER_NRDEVICES) - 1; pdevinfo->ossmask = mask; mix_setrecdevs(m, recmask); mix_setdevs(m, mask); hdaa_unlock(devinfo); return (0); } /* * Update amplification per pdevinfo per ossdev, calculate summary coefficient * and write it to codec, update *left and *right to reflect remaining error. */ static void hdaa_audio_ctl_dev_set(struct hdaa_audio_ctl *ctl, int ossdev, int mute, int *left, int *right) { int i, zleft, zright, sleft, sright, smute, lval, rval; ctl->devleft[ossdev] = *left; ctl->devright[ossdev] = *right; ctl->devmute[ossdev] = mute; smute = sleft = sright = zleft = zright = 0; for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { sleft += ctl->devleft[i]; sright += ctl->devright[i]; smute |= ctl->devmute[i]; if (i == ossdev) continue; zleft += ctl->devleft[i]; zright += ctl->devright[i]; } lval = QDB2VAL(ctl, sleft); rval = QDB2VAL(ctl, sright); hdaa_audio_ctl_amp_set(ctl, smute, lval, rval); *left -= VAL2QDB(ctl, lval) - VAL2QDB(ctl, QDB2VAL(ctl, zleft)); *right -= VAL2QDB(ctl, rval) - VAL2QDB(ctl, QDB2VAL(ctl, zright)); } /* * Trace signal from source, setting volumes on the way. */ static void hdaa_audio_ctl_source_volume(struct hdaa_pcm_devinfo *pdevinfo, int ossdev, nid_t nid, int index, int mute, int left, int right, int depth) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w, *wc; struct hdaa_audio_ctl *ctl; int i, j, conns = 0; if (depth > HDA_PARSE_MAXDEPTH) return; w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return; /* Count number of active inputs. */ if (depth > 0) { for (j = 0; j < w->nconns; j++) { if (!w->connsenable[j]) continue; conns++; } } /* If this is not a first step - use input mixer. Pins have common input ctl so care must be taken. */ if (depth > 0 && (conns == 1 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX)) { ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, index, 1); if (ctl) hdaa_audio_ctl_dev_set(ctl, ossdev, mute, &left, &right); } /* If widget has own ossdev - not traverse it. It will be traversed on it's own. */ if (w->ossdev >= 0 && depth > 0) return; /* We must not traverse pin */ if ((w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) && depth > 0) return; /* * If signals mixed, we can't assign controls farther. * Ignore this on depth zero. Caller must knows why. */ if (conns > 1 && (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER || w->selconn != index)) return; ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_OUT, -1, 1); if (ctl) hdaa_audio_ctl_dev_set(ctl, ossdev, mute, &left, &right); for (i = devinfo->startnode; i < devinfo->endnode; i++) { wc = hdaa_widget_get(devinfo, i); if (wc == NULL || wc->enable == 0) continue; for (j = 0; j < wc->nconns; j++) { if (wc->connsenable[j] && wc->conns[j] == nid) { hdaa_audio_ctl_source_volume(pdevinfo, ossdev, wc->nid, j, mute, left, right, depth + 1); } } } return; } /* * Trace signal from destination, setting volumes on the way. */ static void hdaa_audio_ctl_dest_volume(struct hdaa_pcm_devinfo *pdevinfo, int ossdev, nid_t nid, int index, int mute, int left, int right, int depth) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w, *wc; struct hdaa_audio_ctl *ctl; int i, j, consumers, cleft, cright; if (depth > HDA_PARSE_MAXDEPTH) return; w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return; if (depth > 0) { /* If this node produce output for several consumers, we can't touch it. */ consumers = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { wc = hdaa_widget_get(devinfo, i); if (wc == NULL || wc->enable == 0) continue; for (j = 0; j < wc->nconns; j++) { if (wc->connsenable[j] && wc->conns[j] == nid) consumers++; } } /* The only exception is if real HP redirection is configured and this is a duplication point. XXX: Actually exception is not completely correct. XXX: Duplication point check is not perfect. */ if ((consumers == 2 && (w->bindas < 0 || as[w->bindas].hpredir < 0 || as[w->bindas].fakeredir || (w->bindseqmask & (1 << 15)) == 0)) || consumers > 2) return; /* Else use it's output mixer. */ ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_OUT, -1, 1); if (ctl) hdaa_audio_ctl_dev_set(ctl, ossdev, mute, &left, &right); } /* We must not traverse pin */ if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && depth > 0) return; for (i = 0; i < w->nconns; i++) { if (w->connsenable[i] == 0) continue; if (index >= 0 && i != index) continue; cleft = left; cright = right; ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, i, 1); if (ctl) hdaa_audio_ctl_dev_set(ctl, ossdev, mute, &cleft, &cright); hdaa_audio_ctl_dest_volume(pdevinfo, ossdev, w->conns[i], -1, mute, cleft, cright, depth + 1); } } /* * Set volumes for the specified pdevinfo and ossdev. */ static void hdaa_audio_ctl_dev_volume(struct hdaa_pcm_devinfo *pdevinfo, unsigned dev) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w, *cw; uint32_t mute; int lvol, rvol; int i, j; mute = 0; if (pdevinfo->left[dev] == 0) { mute |= HDAA_AMP_MUTE_LEFT; lvol = -4000; } else lvol = ((pdevinfo->maxamp[dev] - pdevinfo->minamp[dev]) * pdevinfo->left[dev] + 50) / 100 + pdevinfo->minamp[dev]; if (pdevinfo->right[dev] == 0) { mute |= HDAA_AMP_MUTE_RIGHT; rvol = -4000; } else rvol = ((pdevinfo->maxamp[dev] - pdevinfo->minamp[dev]) * pdevinfo->right[dev] + 50) / 100 + pdevinfo->minamp[dev]; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->bindas < 0) { if (pdevinfo->index != 0) continue; } else { if (w->bindas != pdevinfo->playas && w->bindas != pdevinfo->recas) continue; } if (dev == SOUND_MIXER_RECLEV && w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) { hdaa_audio_ctl_dest_volume(pdevinfo, dev, w->nid, -1, mute, lvol, rvol, 0); continue; } if (dev == SOUND_MIXER_VOLUME && w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && devinfo->as[w->bindas].dir == HDAA_CTL_OUT) { hdaa_audio_ctl_dest_volume(pdevinfo, dev, w->nid, -1, mute, lvol, rvol, 0); continue; } if (dev == SOUND_MIXER_IGAIN && w->pflags & HDAA_ADC_MONITOR) { for (j = 0; j < w->nconns; j++) { if (!w->connsenable[j]) continue; cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || cw->enable == 0) continue; if (cw->bindas == -1) continue; if (cw->bindas >= 0 && devinfo->as[cw->bindas].dir != HDAA_CTL_IN) continue; hdaa_audio_ctl_dest_volume(pdevinfo, dev, w->nid, j, mute, lvol, rvol, 0); } continue; } if (w->ossdev != dev) continue; hdaa_audio_ctl_source_volume(pdevinfo, dev, w->nid, -1, mute, lvol, rvol, 0); if (dev == SOUND_MIXER_IMIX && (w->pflags & HDAA_IMIX_AS_DST)) hdaa_audio_ctl_dest_volume(pdevinfo, dev, w->nid, -1, mute, lvol, rvol, 0); } } /* * OSS Mixer set method. */ static int hdaa_audio_ctl_ossmixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct hdaa_pcm_devinfo *pdevinfo = mix_getdevinfo(m); struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w; int i; hdaa_lock(devinfo); /* Save new values. */ pdevinfo->left[dev] = left; pdevinfo->right[dev] = right; /* 'ogain' is the special case implemented with EAPD. */ if (dev == SOUND_MIXER_OGAIN) { uint32_t orig; w = NULL; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX || w->param.eapdbtl == HDA_INVALID) continue; break; } if (i >= devinfo->endnode) { hdaa_unlock(devinfo); return (-1); } orig = w->param.eapdbtl; if (left == 0) w->param.eapdbtl &= ~HDA_CMD_SET_EAPD_BTL_ENABLE_EAPD; else w->param.eapdbtl |= HDA_CMD_SET_EAPD_BTL_ENABLE_EAPD; if (orig != w->param.eapdbtl) { uint32_t val; val = w->param.eapdbtl; if (devinfo->quirks & HDAA_QUIRK_EAPDINV) val ^= HDA_CMD_SET_EAPD_BTL_ENABLE_EAPD; hda_command(devinfo->dev, HDA_CMD_SET_EAPD_BTL_ENABLE(0, w->nid, val)); } hdaa_unlock(devinfo); return (left | (left << 8)); } /* Recalculate all controls related to this OSS device. */ hdaa_audio_ctl_dev_volume(pdevinfo, dev); hdaa_unlock(devinfo); return (left | (right << 8)); } /* * Set mixer settings to our own default values: * +20dB for mics, -10dB for analog vol, mute for igain, 0dB for others. */ static void hdaa_audio_ctl_set_defaults(struct hdaa_pcm_devinfo *pdevinfo) { int amp, vol, dev; for (dev = 0; dev < SOUND_MIXER_NRDEVICES; dev++) { if ((pdevinfo->ossmask & (1 << dev)) == 0) continue; /* If the value was overriden, leave it as is. */ if (resource_int_value(device_get_name(pdevinfo->dev), device_get_unit(pdevinfo->dev), ossnames[dev], &vol) == 0) continue; vol = -1; if (dev == SOUND_MIXER_OGAIN) vol = 100; else if (dev == SOUND_MIXER_IGAIN) vol = 0; else if (dev == SOUND_MIXER_MIC || dev == SOUND_MIXER_MONITOR) amp = 20 * 4; /* +20dB */ else if (dev == SOUND_MIXER_VOLUME && !pdevinfo->digital) amp = -10 * 4; /* -10dB */ else amp = 0; if (vol < 0 && (pdevinfo->maxamp[dev] - pdevinfo->minamp[dev]) <= 0) { vol = 100; } else if (vol < 0) { vol = ((amp - pdevinfo->minamp[dev]) * 100 + (pdevinfo->maxamp[dev] - pdevinfo->minamp[dev]) / 2) / (pdevinfo->maxamp[dev] - pdevinfo->minamp[dev]); vol = imin(imax(vol, 1), 100); } mix_set(pdevinfo->mixer, dev, vol, vol); } } /* * Recursively commutate specified record source. */ static uint32_t hdaa_audio_ctl_recsel_comm(struct hdaa_pcm_devinfo *pdevinfo, uint32_t src, nid_t nid, int depth) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w, *cw; struct hdaa_audio_ctl *ctl; char buf[64]; int i, muted; uint32_t res = 0; if (depth > HDA_PARSE_MAXDEPTH) return (0); w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return (0); for (i = 0; i < w->nconns; i++) { if (w->connsenable[i] == 0) continue; cw = hdaa_widget_get(devinfo, w->conns[i]); if (cw == NULL || cw->enable == 0 || cw->bindas == -1) continue; /* Call recursively to trace signal to it's source if needed. */ if ((src & cw->ossmask) != 0) { if (cw->ossdev < 0) { res |= hdaa_audio_ctl_recsel_comm(pdevinfo, src, w->conns[i], depth + 1); } else { res |= cw->ossmask; } } /* We have two special cases: mixers and others (selectors). */ if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) { ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, i, 1); if (ctl == NULL) continue; /* If we have input control on this node mute them * according to requested sources. */ muted = (src & cw->ossmask) ? 0 : 1; if (muted != ctl->forcemute) { ctl->forcemute = muted; hdaa_audio_ctl_amp_set(ctl, HDAA_AMP_MUTE_DEFAULT, HDAA_AMP_VOL_DEFAULT, HDAA_AMP_VOL_DEFAULT); } HDA_BOOTHVERBOSE( device_printf(pdevinfo->dev, "Recsel (%s): nid %d source %d %s\n", hdaa_audio_ctl_ossmixer_mask2allname( src, buf, sizeof(buf)), nid, i, muted?"mute":"unmute"); ); } else { if (w->nconns == 1) break; if ((src & cw->ossmask) == 0) continue; /* If we found requested source - select it and exit. */ hdaa_widget_connection_select(w, i); HDA_BOOTHVERBOSE( device_printf(pdevinfo->dev, "Recsel (%s): nid %d source %d select\n", hdaa_audio_ctl_ossmixer_mask2allname( src, buf, sizeof(buf)), nid, i); ); break; } } return (res); } static uint32_t hdaa_audio_ctl_ossmixer_setrecsrc(struct snd_mixer *m, uint32_t src) { struct hdaa_pcm_devinfo *pdevinfo = mix_getdevinfo(m); struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w; struct hdaa_audio_as *as; struct hdaa_audio_ctl *ctl; struct hdaa_chan *ch; int i, j; uint32_t ret = 0xffffffff; hdaa_lock(devinfo); if (pdevinfo->recas < 0) { hdaa_unlock(devinfo); return (0); } as = &devinfo->as[pdevinfo->recas]; /* For non-mixed associations we always recording everything. */ if (!as->mixed) { hdaa_unlock(devinfo); return (mix_getrecdevs(m)); } /* Commutate requested recsrc for each ADC. */ for (j = 0; j < as->num_chans; j++) { ch = &devinfo->chans[as->chans[j]]; for (i = 0; ch->io[i] >= 0; i++) { w = hdaa_widget_get(devinfo, ch->io[i]); if (w == NULL || w->enable == 0) continue; ret &= hdaa_audio_ctl_recsel_comm(pdevinfo, src, ch->io[i], 0); } } if (ret == 0xffffffff) ret = 0; /* * Some controls could be shared. Reset volumes for controls * related to previously chosen devices, as they may no longer * affect the signal. */ i = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { if (ctl->enable == 0 || !(ctl->ossmask & pdevinfo->recsrc)) continue; if (!((pdevinfo->playas >= 0 && ctl->widget->bindas == pdevinfo->playas) || (pdevinfo->recas >= 0 && ctl->widget->bindas == pdevinfo->recas) || (pdevinfo->index == 0 && ctl->widget->bindas == -2))) continue; for (j = 0; j < SOUND_MIXER_NRDEVICES; j++) { if (pdevinfo->recsrc & (1 << j)) { ctl->devleft[j] = 0; ctl->devright[j] = 0; ctl->devmute[j] = 0; } } } /* * Some controls could be shared. Set volumes for controls * related to devices selected both previously and now. */ for (j = 0; j < SOUND_MIXER_NRDEVICES; j++) { if ((ret | pdevinfo->recsrc) & (1 << j)) hdaa_audio_ctl_dev_volume(pdevinfo, j); } pdevinfo->recsrc = ret; hdaa_unlock(devinfo); return (ret); } static kobj_method_t hdaa_audio_ctl_ossmixer_methods[] = { KOBJMETHOD(mixer_init, hdaa_audio_ctl_ossmixer_init), KOBJMETHOD(mixer_set, hdaa_audio_ctl_ossmixer_set), KOBJMETHOD(mixer_setrecsrc, hdaa_audio_ctl_ossmixer_setrecsrc), KOBJMETHOD_END }; MIXER_DECLARE(hdaa_audio_ctl_ossmixer); static void hdaa_dump_gpi(struct hdaa_devinfo *devinfo) { device_t dev = devinfo->dev; int i; uint32_t data, wake, unsol, sticky; if (HDA_PARAM_GPIO_COUNT_NUM_GPI(devinfo->gpio_cap) > 0) { data = hda_command(dev, HDA_CMD_GET_GPI_DATA(0, devinfo->nid)); wake = hda_command(dev, HDA_CMD_GET_GPI_WAKE_ENABLE_MASK(0, devinfo->nid)); unsol = hda_command(dev, HDA_CMD_GET_GPI_UNSOLICITED_ENABLE_MASK(0, devinfo->nid)); sticky = hda_command(dev, HDA_CMD_GET_GPI_STICKY_MASK(0, devinfo->nid)); for (i = 0; i < HDA_PARAM_GPIO_COUNT_NUM_GPI(devinfo->gpio_cap); i++) { device_printf(dev, " GPI%d:%s%s%s state=%d", i, (sticky & (1 << i)) ? " sticky" : "", (unsol & (1 << i)) ? " unsol" : "", (wake & (1 << i)) ? " wake" : "", (data >> i) & 1); } } } static void hdaa_dump_gpio(struct hdaa_devinfo *devinfo) { device_t dev = devinfo->dev; int i; uint32_t data, dir, enable, wake, unsol, sticky; if (HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap) > 0) { data = hda_command(dev, HDA_CMD_GET_GPIO_DATA(0, devinfo->nid)); enable = hda_command(dev, HDA_CMD_GET_GPIO_ENABLE_MASK(0, devinfo->nid)); dir = hda_command(dev, HDA_CMD_GET_GPIO_DIRECTION(0, devinfo->nid)); wake = hda_command(dev, HDA_CMD_GET_GPIO_WAKE_ENABLE_MASK(0, devinfo->nid)); unsol = hda_command(dev, HDA_CMD_GET_GPIO_UNSOLICITED_ENABLE_MASK(0, devinfo->nid)); sticky = hda_command(dev, HDA_CMD_GET_GPIO_STICKY_MASK(0, devinfo->nid)); for (i = 0; i < HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap); i++) { device_printf(dev, " GPIO%d: ", i); if ((enable & (1 << i)) == 0) { printf("disabled\n"); continue; } if ((dir & (1 << i)) == 0) { printf("input%s%s%s", (sticky & (1 << i)) ? " sticky" : "", (unsol & (1 << i)) ? " unsol" : "", (wake & (1 << i)) ? " wake" : ""); } else printf("output"); printf(" state=%d\n", (data >> i) & 1); } } } static void hdaa_dump_gpo(struct hdaa_devinfo *devinfo) { device_t dev = devinfo->dev; int i; uint32_t data; if (HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap) > 0) { data = hda_command(dev, HDA_CMD_GET_GPO_DATA(0, devinfo->nid)); for (i = 0; i < HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap); i++) { device_printf(dev, " GPO%d: state=%d", i, (data >> i) & 1); } } } static void hdaa_audio_parse(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; uint32_t res; int i; nid_t nid; nid = devinfo->nid; res = hda_command(devinfo->dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_GPIO_COUNT)); devinfo->gpio_cap = res; HDA_BOOTVERBOSE( device_printf(devinfo->dev, "NumGPIO=%d NumGPO=%d " "NumGPI=%d GPIWake=%d GPIUnsol=%d\n", HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_NUM_GPI(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_GPI_WAKE(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_GPI_UNSOL(devinfo->gpio_cap)); hdaa_dump_gpi(devinfo); hdaa_dump_gpio(devinfo); hdaa_dump_gpo(devinfo); ); res = hda_command(devinfo->dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_SUPP_STREAM_FORMATS)); devinfo->supp_stream_formats = res; res = hda_command(devinfo->dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_SUPP_PCM_SIZE_RATE)); devinfo->supp_pcm_size_rate = res; res = hda_command(devinfo->dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_OUTPUT_AMP_CAP)); devinfo->outamp_cap = res; res = hda_command(devinfo->dev, HDA_CMD_GET_PARAMETER(0, nid, HDA_PARAM_INPUT_AMP_CAP)); devinfo->inamp_cap = res; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL) device_printf(devinfo->dev, "Ghost widget! nid=%d!\n", i); else { w->devinfo = devinfo; w->nid = i; w->enable = 1; w->selconn = -1; w->pflags = 0; w->ossdev = -1; w->bindas = -1; w->param.eapdbtl = HDA_INVALID; hdaa_widget_parse(w); } } } static void hdaa_audio_postprocess(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; int i; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL) continue; hdaa_widget_postprocess(w); } } static void hdaa_audio_ctl_parse(struct hdaa_devinfo *devinfo) { struct hdaa_audio_ctl *ctls; struct hdaa_widget *w, *cw; int i, j, cnt, max, ocap, icap; int mute, offset, step, size; /* XXX This is redundant */ max = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->param.outamp_cap != 0) max++; if (w->param.inamp_cap != 0) { switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR: case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER: for (j = 0; j < w->nconns; j++) { cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || cw->enable == 0) continue; max++; } break; default: max++; break; } } } devinfo->ctlcnt = max; if (max < 1) return; ctls = (struct hdaa_audio_ctl *)malloc( sizeof(*ctls) * max, M_HDAA, M_ZERO | M_NOWAIT); if (ctls == NULL) { /* Blekh! */ device_printf(devinfo->dev, "unable to allocate ctls!\n"); devinfo->ctlcnt = 0; return; } cnt = 0; for (i = devinfo->startnode; cnt < max && i < devinfo->endnode; i++) { if (cnt >= max) { device_printf(devinfo->dev, "%s: Ctl overflow!\n", __func__); break; } w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; ocap = w->param.outamp_cap; icap = w->param.inamp_cap; if (ocap != 0) { mute = HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP(ocap); step = HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS(ocap); size = HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE(ocap); offset = HDA_PARAM_OUTPUT_AMP_CAP_OFFSET(ocap); /*if (offset > step) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, "BUGGY outamp: nid=%d " "[offset=%d > step=%d]\n", w->nid, offset, step); ); offset = step; }*/ ctls[cnt].enable = 1; ctls[cnt].widget = w; ctls[cnt].mute = mute; ctls[cnt].step = step; ctls[cnt].size = size; ctls[cnt].offset = offset; ctls[cnt].left = offset; ctls[cnt].right = offset; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX || w->waspin) ctls[cnt].ndir = HDAA_CTL_IN; else ctls[cnt].ndir = HDAA_CTL_OUT; ctls[cnt++].dir = HDAA_CTL_OUT; } if (icap != 0) { mute = HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP(icap); step = HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS(icap); size = HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE(icap); offset = HDA_PARAM_OUTPUT_AMP_CAP_OFFSET(icap); /*if (offset > step) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, "BUGGY inamp: nid=%d " "[offset=%d > step=%d]\n", w->nid, offset, step); ); offset = step; }*/ switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR: case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER: for (j = 0; j < w->nconns; j++) { if (cnt >= max) { device_printf(devinfo->dev, "%s: Ctl overflow!\n", __func__); break; } cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || cw->enable == 0) continue; ctls[cnt].enable = 1; ctls[cnt].widget = w; ctls[cnt].childwidget = cw; ctls[cnt].index = j; ctls[cnt].mute = mute; ctls[cnt].step = step; ctls[cnt].size = size; ctls[cnt].offset = offset; ctls[cnt].left = offset; ctls[cnt].right = offset; ctls[cnt].ndir = HDAA_CTL_IN; ctls[cnt++].dir = HDAA_CTL_IN; } break; default: if (cnt >= max) { device_printf(devinfo->dev, "%s: Ctl overflow!\n", __func__); break; } ctls[cnt].enable = 1; ctls[cnt].widget = w; ctls[cnt].mute = mute; ctls[cnt].step = step; ctls[cnt].size = size; ctls[cnt].offset = offset; ctls[cnt].left = offset; ctls[cnt].right = offset; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) ctls[cnt].ndir = HDAA_CTL_OUT; else ctls[cnt].ndir = HDAA_CTL_IN; ctls[cnt++].dir = HDAA_CTL_IN; break; } } } devinfo->ctl = ctls; } static void hdaa_audio_as_parse(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as; struct hdaa_widget *w; int i, j, cnt, max, type, dir, assoc, seq, first, hpredir; /* Count present associations */ max = 0; for (j = 1; j < 16; j++) { for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (HDA_CONFIG_DEFAULTCONF_ASSOCIATION(w->wclass.pin.config) != j) continue; max++; if (j != 15) /* There could be many 1-pin assocs #15 */ break; } } devinfo->ascnt = max; if (max < 1) return; as = (struct hdaa_audio_as *)malloc( sizeof(*as) * max, M_HDAA, M_ZERO | M_NOWAIT); if (as == NULL) { /* Blekh! */ device_printf(devinfo->dev, "unable to allocate assocs!\n"); devinfo->ascnt = 0; return; } for (i = 0; i < max; i++) { as[i].hpredir = -1; as[i].digital = 0; as[i].num_chans = 1; as[i].location = -1; } /* Scan associations skipping as=0. */ cnt = 0; for (j = 1; j < 16; j++) { first = 16; hpredir = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; assoc = HDA_CONFIG_DEFAULTCONF_ASSOCIATION(w->wclass.pin.config); seq = HDA_CONFIG_DEFAULTCONF_SEQUENCE(w->wclass.pin.config); if (assoc != j) { continue; } KASSERT(cnt < max, ("%s: Associations owerflow (%d of %d)", __func__, cnt, max)); type = w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK; /* Get pin direction. */ if (type == HDA_CONFIG_DEFAULTCONF_DEVICE_LINE_OUT || type == HDA_CONFIG_DEFAULTCONF_DEVICE_SPEAKER || type == HDA_CONFIG_DEFAULTCONF_DEVICE_HP_OUT || type == HDA_CONFIG_DEFAULTCONF_DEVICE_SPDIF_OUT || type == HDA_CONFIG_DEFAULTCONF_DEVICE_DIGITAL_OTHER_OUT) dir = HDAA_CTL_OUT; else dir = HDAA_CTL_IN; /* If this is a first pin - create new association. */ if (as[cnt].pincnt == 0) { as[cnt].enable = 1; as[cnt].index = j; as[cnt].dir = dir; } if (seq < first) first = seq; /* Check association correctness. */ if (as[cnt].pins[seq] != 0) { device_printf(devinfo->dev, "%s: Duplicate pin %d (%d) " "in association %d! Disabling association.\n", __func__, seq, w->nid, j); as[cnt].enable = 0; } if (dir != as[cnt].dir) { device_printf(devinfo->dev, "%s: Pin %d has wrong " "direction for association %d! Disabling " "association.\n", __func__, w->nid, j); as[cnt].enable = 0; } if (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap)) { as[cnt].digital |= 0x1; if (HDA_PARAM_PIN_CAP_HDMI(w->wclass.pin.cap)) as[cnt].digital |= 0x2; if (HDA_PARAM_PIN_CAP_DP(w->wclass.pin.cap)) as[cnt].digital |= 0x4; } if (as[cnt].location == -1) { as[cnt].location = HDA_CONFIG_DEFAULTCONF_LOCATION(w->wclass.pin.config); } else if (as[cnt].location != HDA_CONFIG_DEFAULTCONF_LOCATION(w->wclass.pin.config)) { as[cnt].location = -2; } /* Headphones with seq=15 may mean redirection. */ if (type == HDA_CONFIG_DEFAULTCONF_DEVICE_HP_OUT && seq == 15) hpredir = 1; as[cnt].pins[seq] = w->nid; as[cnt].pincnt++; /* Association 15 is a multiple unassociated pins. */ if (j == 15) cnt++; } if (j != 15 && as[cnt].pincnt > 0) { if (hpredir && as[cnt].pincnt > 1) as[cnt].hpredir = first; cnt++; } } for (i = 0; i < max; i++) { if (as[i].dir == HDAA_CTL_IN && (as[i].pincnt == 1 || as[i].pins[14] > 0 || as[i].pins[15] > 0)) as[i].mixed = 1; } HDA_BOOTVERBOSE( device_printf(devinfo->dev, "%d associations found:\n", max); for (i = 0; i < max; i++) { device_printf(devinfo->dev, "Association %d (%d) %s%s:\n", i, as[i].index, (as[i].dir == HDAA_CTL_IN)?"in":"out", as[i].enable?"":" (disabled)"); for (j = 0; j < 16; j++) { if (as[i].pins[j] == 0) continue; device_printf(devinfo->dev, " Pin nid=%d seq=%d\n", as[i].pins[j], j); } } ); devinfo->as = as; } /* * Trace path from DAC to pin. */ static nid_t hdaa_audio_trace_dac(struct hdaa_devinfo *devinfo, int as, int seq, nid_t nid, int dupseq, int min, int only, int depth) { struct hdaa_widget *w; int i, im = -1; nid_t m = 0, ret; if (depth > HDA_PARSE_MAXDEPTH) return (0); w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return (0); HDA_BOOTHVERBOSE( if (!only) { device_printf(devinfo->dev, " %*stracing via nid %d\n", depth + 1, "", w->nid); } ); /* Use only unused widgets */ if (w->bindas >= 0 && w->bindas != as) { HDA_BOOTHVERBOSE( if (!only) { device_printf(devinfo->dev, " %*snid %d busy by association %d\n", depth + 1, "", w->nid, w->bindas); } ); return (0); } if (dupseq < 0) { if (w->bindseqmask != 0) { HDA_BOOTHVERBOSE( if (!only) { device_printf(devinfo->dev, " %*snid %d busy by seqmask %x\n", depth + 1, "", w->nid, w->bindseqmask); } ); return (0); } } else { /* If this is headphones - allow duplicate first pin. */ if (w->bindseqmask != 0 && (w->bindseqmask & (1 << dupseq)) == 0) { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d busy by seqmask %x\n", depth + 1, "", w->nid, w->bindseqmask); ); return (0); } } switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT: /* Do not traverse input. AD1988 has digital monitor for which we are not ready. */ break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT: /* If we are tracing HP take only dac of first pin. */ if ((only == 0 || only == w->nid) && (w->nid >= min) && (dupseq < 0 || w->nid == devinfo->as[as].dacs[0][dupseq])) m = w->nid; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX: if (depth > 0) break; /* Fall */ default: /* Find reachable DACs with smallest nid respecting constraints. */ for (i = 0; i < w->nconns; i++) { if (w->connsenable[i] == 0) continue; if (w->selconn != -1 && w->selconn != i) continue; if ((ret = hdaa_audio_trace_dac(devinfo, as, seq, w->conns[i], dupseq, min, only, depth + 1)) != 0) { if (m == 0 || ret < m) { m = ret; im = i; } if (only || dupseq >= 0) break; } } if (im >= 0 && only && ((w->nconns > 1 && w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR)) w->selconn = im; break; } if (m && only) { w->bindas = as; w->bindseqmask |= (1 << seq); } HDA_BOOTHVERBOSE( if (!only) { device_printf(devinfo->dev, " %*snid %d returned %d\n", depth + 1, "", w->nid, m); } ); return (m); } /* * Trace path from widget to ADC. */ static nid_t hdaa_audio_trace_adc(struct hdaa_devinfo *devinfo, int as, int seq, nid_t nid, int mixed, int min, int only, int depth, int *length, int onlylength) { struct hdaa_widget *w, *wc; int i, j, im, lm = HDA_PARSE_MAXDEPTH; nid_t m = 0, ret; if (depth > HDA_PARSE_MAXDEPTH) return (0); w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return (0); HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*stracing via nid %d\n", depth + 1, "", w->nid); ); /* Use only unused widgets */ if (w->bindas >= 0 && w->bindas != as) { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d busy by association %d\n", depth + 1, "", w->nid, w->bindas); ); return (0); } if (!mixed && w->bindseqmask != 0) { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d busy by seqmask %x\n", depth + 1, "", w->nid, w->bindseqmask); ); return (0); } switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT: if ((only == 0 || only == w->nid) && (w->nid >= min) && (onlylength == 0 || onlylength == depth)) { m = w->nid; *length = depth; } break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX: if (depth > 0) break; /* Fall */ default: /* Try to find reachable ADCs with specified nid. */ for (j = devinfo->startnode; j < devinfo->endnode; j++) { wc = hdaa_widget_get(devinfo, j); if (wc == NULL || wc->enable == 0) continue; im = -1; for (i = 0; i < wc->nconns; i++) { if (wc->connsenable[i] == 0) continue; if (wc->conns[i] != nid) continue; if ((ret = hdaa_audio_trace_adc(devinfo, as, seq, j, mixed, min, only, depth + 1, length, onlylength)) != 0) { if (m == 0 || ret < m || (ret == m && *length < lm)) { m = ret; im = i; lm = *length; } else *length = lm; if (only) break; } } if (im >= 0 && only && ((wc->nconns > 1 && wc->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) || wc->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR)) wc->selconn = im; } break; } if (m && only) { w->bindas = as; w->bindseqmask |= (1 << seq); } HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d returned %d\n", depth + 1, "", w->nid, m); ); return (m); } /* * Erase trace path of the specified association. */ static void hdaa_audio_undo_trace(struct hdaa_devinfo *devinfo, int as, int seq) { struct hdaa_widget *w; int i; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->bindas == as) { if (seq >= 0) { w->bindseqmask &= ~(1 << seq); if (w->bindseqmask == 0) { w->bindas = -1; w->selconn = -1; } } else { w->bindas = -1; w->bindseqmask = 0; w->selconn = -1; } } } } /* * Trace association path from DAC to output */ static int hdaa_audio_trace_as_out(struct hdaa_devinfo *devinfo, int as, int seq) { struct hdaa_audio_as *ases = devinfo->as; int i, hpredir; nid_t min, res; /* Find next pin */ for (i = seq; i < 16 && ases[as].pins[i] == 0; i++) ; /* Check if there is no any left. If so - we succeeded. */ if (i == 16) return (1); hpredir = (i == 15 && ases[as].fakeredir == 0)?ases[as].hpredir:-1; min = 0; do { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Tracing pin %d with min nid %d", ases[as].pins[i], min); if (hpredir >= 0) printf(" and hpredir %d", hpredir); printf("\n"); ); /* Trace this pin taking min nid into account. */ res = hdaa_audio_trace_dac(devinfo, as, i, ases[as].pins[i], hpredir, min, 0, 0); if (res == 0) { /* If we failed - return to previous and redo it. */ HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Unable to trace pin %d seq %d with min " "nid %d", ases[as].pins[i], i, min); if (hpredir >= 0) printf(" and hpredir %d", hpredir); printf("\n"); ); return (0); } HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Pin %d traced to DAC %d", ases[as].pins[i], res); if (hpredir >= 0) printf(" and hpredir %d", hpredir); if (ases[as].fakeredir) printf(" with fake redirection"); printf("\n"); ); /* Trace again to mark the path */ hdaa_audio_trace_dac(devinfo, as, i, ases[as].pins[i], hpredir, min, res, 0); ases[as].dacs[0][i] = res; /* We succeeded, so call next. */ if (hdaa_audio_trace_as_out(devinfo, as, i + 1)) return (1); /* If next failed, we should retry with next min */ hdaa_audio_undo_trace(devinfo, as, i); ases[as].dacs[0][i] = 0; min = res + 1; } while (1); } /* * Check equivalency of two DACs. */ static int hdaa_audio_dacs_equal(struct hdaa_widget *w1, struct hdaa_widget *w2) { struct hdaa_devinfo *devinfo = w1->devinfo; struct hdaa_widget *w3; int i, j, c1, c2; if (memcmp(&w1->param, &w2->param, sizeof(w1->param))) return (0); for (i = devinfo->startnode; i < devinfo->endnode; i++) { w3 = hdaa_widget_get(devinfo, i); if (w3 == NULL || w3->enable == 0) continue; if (w3->bindas != w1->bindas) continue; if (w3->nconns == 0) continue; c1 = c2 = -1; for (j = 0; j < w3->nconns; j++) { if (w3->connsenable[j] == 0) continue; if (w3->conns[j] == w1->nid) c1 = j; if (w3->conns[j] == w2->nid) c2 = j; } if (c1 < 0) continue; if (c2 < 0) return (0); if (w3->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) return (0); } return (1); } /* * Check equivalency of two ADCs. */ static int hdaa_audio_adcs_equal(struct hdaa_widget *w1, struct hdaa_widget *w2) { struct hdaa_devinfo *devinfo = w1->devinfo; struct hdaa_widget *w3, *w4; int i; if (memcmp(&w1->param, &w2->param, sizeof(w1->param))) return (0); if (w1->nconns != 1 || w2->nconns != 1) return (0); if (w1->conns[0] == w2->conns[0]) return (1); w3 = hdaa_widget_get(devinfo, w1->conns[0]); if (w3 == NULL || w3->enable == 0) return (0); w4 = hdaa_widget_get(devinfo, w2->conns[0]); if (w4 == NULL || w4->enable == 0) return (0); if (w3->bindas == w4->bindas && w3->bindseqmask == w4->bindseqmask) return (1); if (w4->bindas >= 0) return (0); if (w3->type != w4->type) return (0); if (memcmp(&w3->param, &w4->param, sizeof(w3->param))) return (0); if (w3->nconns != w4->nconns) return (0); for (i = 0; i < w3->nconns; i++) { if (w3->conns[i] != w4->conns[i]) return (0); } return (1); } /* * Look for equivalent DAC/ADC to implement second channel. */ static void hdaa_audio_adddac(struct hdaa_devinfo *devinfo, int asid) { struct hdaa_audio_as *as = &devinfo->as[asid]; struct hdaa_widget *w1, *w2; int i, pos; nid_t nid1, nid2; HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Looking for additional %sC " "for association %d (%d)\n", (as->dir == HDAA_CTL_OUT) ? "DA" : "AD", asid, as->index); ); /* Find the exisitng DAC position and return if found more the one. */ pos = -1; for (i = 0; i < 16; i++) { if (as->dacs[0][i] <= 0) continue; if (pos >= 0 && as->dacs[0][i] != as->dacs[0][pos]) return; pos = i; } nid1 = as->dacs[0][pos]; w1 = hdaa_widget_get(devinfo, nid1); w2 = NULL; for (nid2 = devinfo->startnode; nid2 < devinfo->endnode; nid2++) { w2 = hdaa_widget_get(devinfo, nid2); if (w2 == NULL || w2->enable == 0) continue; if (w2->bindas >= 0) continue; if (w1->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT) { if (w2->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT) continue; if (hdaa_audio_dacs_equal(w1, w2)) break; } else { if (w2->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) continue; if (hdaa_audio_adcs_equal(w1, w2)) break; } } if (nid2 >= devinfo->endnode) return; w2->bindas = w1->bindas; w2->bindseqmask = w1->bindseqmask; if (w1->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, " ADC %d considered equal to ADC %d\n", nid2, nid1); ); w1 = hdaa_widget_get(devinfo, w1->conns[0]); w2 = hdaa_widget_get(devinfo, w2->conns[0]); w2->bindas = w1->bindas; w2->bindseqmask = w1->bindseqmask; } else { HDA_BOOTVERBOSE( device_printf(devinfo->dev, " DAC %d considered equal to DAC %d\n", nid2, nid1); ); } for (i = 0; i < 16; i++) { if (as->dacs[0][i] <= 0) continue; as->dacs[as->num_chans][i] = nid2; } as->num_chans++; } /* * Trace association path from input to ADC */ static int hdaa_audio_trace_as_in(struct hdaa_devinfo *devinfo, int as) { struct hdaa_audio_as *ases = devinfo->as; struct hdaa_widget *w; int i, j, k, length; for (j = devinfo->startnode; j < devinfo->endnode; j++) { w = hdaa_widget_get(devinfo, j); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) continue; if (w->bindas >= 0 && w->bindas != as) continue; /* Find next pin */ for (i = 0; i < 16; i++) { if (ases[as].pins[i] == 0) continue; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Tracing pin %d to ADC %d\n", ases[as].pins[i], j); ); /* Trace this pin taking goal into account. */ if (hdaa_audio_trace_adc(devinfo, as, i, ases[as].pins[i], 1, 0, j, 0, &length, 0) == 0) { /* If we failed - return to previous and redo it. */ HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Unable to trace pin %d to ADC %d, undo traces\n", ases[as].pins[i], j); ); hdaa_audio_undo_trace(devinfo, as, -1); for (k = 0; k < 16; k++) ases[as].dacs[0][k] = 0; break; } HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Pin %d traced to ADC %d\n", ases[as].pins[i], j); ); ases[as].dacs[0][i] = j; } if (i == 16) return (1); } return (0); } /* * Trace association path from input to multiple ADCs */ static int hdaa_audio_trace_as_in_mch(struct hdaa_devinfo *devinfo, int as, int seq) { struct hdaa_audio_as *ases = devinfo->as; int i, length; nid_t min, res; /* Find next pin */ for (i = seq; i < 16 && ases[as].pins[i] == 0; i++) ; /* Check if there is no any left. If so - we succeeded. */ if (i == 16) return (1); min = 0; do { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Tracing pin %d with min nid %d", ases[as].pins[i], min); printf("\n"); ); /* Trace this pin taking min nid into account. */ res = hdaa_audio_trace_adc(devinfo, as, i, ases[as].pins[i], 0, min, 0, 0, &length, 0); if (res == 0) { /* If we failed - return to previous and redo it. */ HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Unable to trace pin %d seq %d with min " "nid %d", ases[as].pins[i], i, min); printf("\n"); ); return (0); } HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Pin %d traced to ADC %d\n", ases[as].pins[i], res); ); /* Trace again to mark the path */ hdaa_audio_trace_adc(devinfo, as, i, ases[as].pins[i], 0, min, res, 0, &length, length); ases[as].dacs[0][i] = res; /* We succeeded, so call next. */ if (hdaa_audio_trace_as_in_mch(devinfo, as, i + 1)) return (1); /* If next failed, we should retry with next min */ hdaa_audio_undo_trace(devinfo, as, i); ases[as].dacs[0][i] = 0; min = res + 1; } while (1); } /* * Trace input monitor path from mixer to output association. */ static int hdaa_audio_trace_to_out(struct hdaa_devinfo *devinfo, nid_t nid, int depth) { struct hdaa_audio_as *ases = devinfo->as; struct hdaa_widget *w, *wc; int i, j; nid_t res = 0; if (depth > HDA_PARSE_MAXDEPTH) return (0); w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return (0); HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*stracing via nid %d\n", depth + 1, "", w->nid); ); /* Use only unused widgets */ if (depth > 0 && w->bindas != -1) { if (w->bindas < 0 || ases[w->bindas].dir == HDAA_CTL_OUT) { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d found output association %d\n", depth + 1, "", w->nid, w->bindas); ); if (w->bindas >= 0) w->pflags |= HDAA_ADC_MONITOR; return (1); } else { HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d busy by input association %d\n", depth + 1, "", w->nid, w->bindas); ); return (0); } } switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT: /* Do not traverse input. AD1988 has digital monitor for which we are not ready. */ break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX: if (depth > 0) break; /* Fall */ default: /* Try to find reachable ADCs with specified nid. */ for (j = devinfo->startnode; j < devinfo->endnode; j++) { wc = hdaa_widget_get(devinfo, j); if (wc == NULL || wc->enable == 0) continue; for (i = 0; i < wc->nconns; i++) { if (wc->connsenable[i] == 0) continue; if (wc->conns[i] != nid) continue; if (hdaa_audio_trace_to_out(devinfo, j, depth + 1) != 0) { res = 1; if (wc->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR && wc->selconn == -1) wc->selconn = i; } } } break; } if (res && w->bindas == -1) w->bindas = -2; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " %*snid %d returned %d\n", depth + 1, "", w->nid, res); ); return (res); } /* * Trace extra associations (beeper, monitor) */ static void hdaa_audio_trace_as_extra(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w; int j; /* Input monitor */ /* Find mixer associated with input, but supplying signal for output associations. Hope it will be input monitor. */ HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Tracing input monitor\n"); ); for (j = devinfo->startnode; j < devinfo->endnode; j++) { w = hdaa_widget_get(devinfo, j); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) continue; if (w->bindas < 0 || as[w->bindas].dir != HDAA_CTL_IN) continue; HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Tracing nid %d to out\n", j); ); if (hdaa_audio_trace_to_out(devinfo, w->nid, 0)) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, " nid %d is input monitor\n", w->nid); ); w->ossdev = SOUND_MIXER_IMIX; } } /* Other inputs monitor */ /* Find input pins supplying signal for output associations. Hope it will be input monitoring. */ HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Tracing other input monitors\n"); ); for (j = devinfo->startnode; j < devinfo->endnode; j++) { w = hdaa_widget_get(devinfo, j); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (w->bindas < 0 || as[w->bindas].dir != HDAA_CTL_IN) continue; HDA_BOOTVERBOSE( device_printf(devinfo->dev, " Tracing nid %d to out\n", j); ); if (hdaa_audio_trace_to_out(devinfo, w->nid, 0)) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, " nid %d is input monitor\n", w->nid); ); } } /* Beeper */ HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Tracing beeper\n"); ); for (j = devinfo->startnode; j < devinfo->endnode; j++) { w = hdaa_widget_get(devinfo, j); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_BEEP_WIDGET) continue; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Tracing nid %d to out\n", j); ); if (hdaa_audio_trace_to_out(devinfo, w->nid, 0)) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, " nid %d traced to out\n", j); ); } w->bindas = -2; } } /* * Bind assotiations to PCM channels */ static void hdaa_audio_bind_as(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; int i, j, cnt = 0, free; for (j = 0; j < devinfo->ascnt; j++) { if (as[j].enable) cnt += as[j].num_chans; } if (devinfo->num_chans == 0) { devinfo->chans = (struct hdaa_chan *)malloc( sizeof(struct hdaa_chan) * cnt, M_HDAA, M_ZERO | M_NOWAIT); if (devinfo->chans == NULL) { device_printf(devinfo->dev, "Channels memory allocation failed!\n"); return; } } else { devinfo->chans = (struct hdaa_chan *)realloc(devinfo->chans, sizeof(struct hdaa_chan) * (devinfo->num_chans + cnt), M_HDAA, M_ZERO | M_NOWAIT); if (devinfo->chans == NULL) { devinfo->num_chans = 0; device_printf(devinfo->dev, "Channels memory allocation failed!\n"); return; } /* Fixup relative pointers after realloc */ for (j = 0; j < devinfo->num_chans; j++) devinfo->chans[j].caps.fmtlist = devinfo->chans[j].fmtlist; } free = devinfo->num_chans; devinfo->num_chans += cnt; for (j = free; j < free + cnt; j++) { devinfo->chans[j].devinfo = devinfo; devinfo->chans[j].as = -1; } /* Assign associations in order of their numbers, */ for (j = 0; j < devinfo->ascnt; j++) { if (as[j].enable == 0) continue; for (i = 0; i < as[j].num_chans; i++) { devinfo->chans[free].as = j; devinfo->chans[free].asindex = i; devinfo->chans[free].dir = (as[j].dir == HDAA_CTL_IN) ? PCMDIR_REC : PCMDIR_PLAY; hdaa_pcmchannel_setup(&devinfo->chans[free]); as[j].chans[i] = free; free++; } } } static void hdaa_audio_disable_nonaudio(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; int i; /* Disable power and volume widgets. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_POWER_WIDGET || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_VOLUME_WIDGET) { w->enable = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling nid %d due to it's" " non-audio type.\n", w->nid); ); } } } static void hdaa_audio_disable_useless(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w, *cw; struct hdaa_audio_ctl *ctl; int done, found, i, j, k; /* Disable useless pins. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) { if ((w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK) == HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_NONE) { w->enable = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling pin nid %d due" " to None connectivity.\n", w->nid); ); } else if ((w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_ASSOCIATION_MASK) == 0) { w->enable = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling unassociated" " pin nid %d.\n", w->nid); ); } } } do { done = 1; /* Disable and mute controls for disabled widgets. */ i = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { if (ctl->enable == 0) continue; if (ctl->widget->enable == 0 || (ctl->childwidget != NULL && ctl->childwidget->enable == 0)) { ctl->forcemute = 1; ctl->muted = HDAA_AMP_MUTE_ALL; ctl->left = 0; ctl->right = 0; ctl->enable = 0; if (ctl->ndir == HDAA_CTL_IN) ctl->widget->connsenable[ctl->index] = 0; done = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling ctl %d nid %d cnid %d due" " to disabled widget.\n", i, ctl->widget->nid, (ctl->childwidget != NULL)? ctl->childwidget->nid:-1); ); } } /* Disable useless widgets. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; /* Disable inputs with disabled child widgets. */ for (j = 0; j < w->nconns; j++) { if (w->connsenable[j]) { cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || cw->enable == 0) { w->connsenable[j] = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling nid %d connection %d due" " to disabled child widget.\n", i, j); ); } } } if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR && w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) continue; /* Disable mixers and selectors without inputs. */ found = 0; for (j = 0; j < w->nconns; j++) { if (w->connsenable[j]) { found = 1; break; } } if (found == 0) { w->enable = 0; done = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling nid %d due to all it's" " inputs disabled.\n", w->nid); ); } /* Disable nodes without consumers. */ if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR && w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) continue; found = 0; for (k = devinfo->startnode; k < devinfo->endnode; k++) { cw = hdaa_widget_get(devinfo, k); if (cw == NULL || cw->enable == 0) continue; for (j = 0; j < cw->nconns; j++) { if (cw->connsenable[j] && cw->conns[j] == i) { found = 1; break; } } } if (found == 0) { w->enable = 0; done = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling nid %d due to all it's" " consumers disabled.\n", w->nid); ); } } } while (done == 0); } static void hdaa_audio_disable_unas(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w, *cw; struct hdaa_audio_ctl *ctl; int i, j, k; /* Disable unassosiated widgets. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->bindas == -1) { w->enable = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling unassociated nid %d.\n", w->nid); ); } } /* Disable input connections on input pin and * output on output. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (w->bindas < 0) continue; if (as[w->bindas].dir == HDAA_CTL_IN) { for (j = 0; j < w->nconns; j++) { if (w->connsenable[j] == 0) continue; w->connsenable[j] = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling connection to input pin " "nid %d conn %d.\n", i, j); ); } ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, -1, 1); if (ctl && ctl->enable) { ctl->forcemute = 1; ctl->muted = HDAA_AMP_MUTE_ALL; ctl->left = 0; ctl->right = 0; ctl->enable = 0; } } else { ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_OUT, -1, 1); if (ctl && ctl->enable) { ctl->forcemute = 1; ctl->muted = HDAA_AMP_MUTE_ALL; ctl->left = 0; ctl->right = 0; ctl->enable = 0; } for (k = devinfo->startnode; k < devinfo->endnode; k++) { cw = hdaa_widget_get(devinfo, k); if (cw == NULL || cw->enable == 0) continue; for (j = 0; j < cw->nconns; j++) { if (cw->connsenable[j] && cw->conns[j] == i) { cw->connsenable[j] = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling connection from output pin " "nid %d conn %d cnid %d.\n", k, j, i); ); if (cw->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && cw->nconns > 1) continue; ctl = hdaa_audio_ctl_amp_get(devinfo, k, HDAA_CTL_IN, j, 1); if (ctl && ctl->enable) { ctl->forcemute = 1; ctl->muted = HDAA_AMP_MUTE_ALL; ctl->left = 0; ctl->right = 0; ctl->enable = 0; } } } } } } } static void hdaa_audio_disable_notselected(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w; int i, j; /* On playback path we can safely disable all unseleted inputs. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->nconns <= 1) continue; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) continue; if (w->bindas < 0 || as[w->bindas].dir == HDAA_CTL_IN) continue; for (j = 0; j < w->nconns; j++) { if (w->connsenable[j] == 0) continue; if (w->selconn < 0 || w->selconn == j) continue; w->connsenable[j] = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling unselected connection " "nid %d conn %d.\n", i, j); ); } } } static void hdaa_audio_disable_crossas(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *ases = devinfo->as; struct hdaa_widget *w, *cw; struct hdaa_audio_ctl *ctl; int i, j; /* Disable crossassociatement and unwanted crosschannel connections. */ /* ... using selectors */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->nconns <= 1) continue; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) continue; /* Allow any -> mix */ if (w->bindas == -2) continue; for (j = 0; j < w->nconns; j++) { if (w->connsenable[j] == 0) continue; cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || w->enable == 0) continue; /* Allow mix -> out. */ if (cw->bindas == -2 && w->bindas >= 0 && ases[w->bindas].dir == HDAA_CTL_OUT) continue; /* Allow mix -> mixed-in. */ if (cw->bindas == -2 && w->bindas >= 0 && ases[w->bindas].mixed) continue; /* Allow in -> mix. */ if ((w->pflags & HDAA_ADC_MONITOR) && cw->bindas >= 0 && ases[cw->bindas].dir == HDAA_CTL_IN) continue; /* Allow if have common as/seqs. */ if (w->bindas == cw->bindas && (w->bindseqmask & cw->bindseqmask) != 0) continue; w->connsenable[j] = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling crossassociatement connection " "nid %d conn %d cnid %d.\n", i, j, cw->nid); ); } } /* ... using controls */ i = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { if (ctl->enable == 0 || ctl->childwidget == NULL) continue; /* Allow any -> mix */ if (ctl->widget->bindas == -2) continue; /* Allow mix -> out. */ if (ctl->childwidget->bindas == -2 && ctl->widget->bindas >= 0 && ases[ctl->widget->bindas].dir == HDAA_CTL_OUT) continue; /* Allow mix -> mixed-in. */ if (ctl->childwidget->bindas == -2 && ctl->widget->bindas >= 0 && ases[ctl->widget->bindas].mixed) continue; /* Allow in -> mix. */ if ((ctl->widget->pflags & HDAA_ADC_MONITOR) && ctl->childwidget->bindas >= 0 && ases[ctl->childwidget->bindas].dir == HDAA_CTL_IN) continue; /* Allow if have common as/seqs. */ if (ctl->widget->bindas == ctl->childwidget->bindas && (ctl->widget->bindseqmask & ctl->childwidget->bindseqmask) != 0) continue; ctl->forcemute = 1; ctl->muted = HDAA_AMP_MUTE_ALL; ctl->left = 0; ctl->right = 0; ctl->enable = 0; if (ctl->ndir == HDAA_CTL_IN) ctl->widget->connsenable[ctl->index] = 0; HDA_BOOTHVERBOSE( device_printf(devinfo->dev, " Disabling crossassociatement connection " "ctl %d nid %d cnid %d.\n", i, ctl->widget->nid, ctl->childwidget->nid); ); } } /* * Find controls to control amplification for source and calculate possible * amplification range. */ static int hdaa_audio_ctl_source_amp(struct hdaa_devinfo *devinfo, nid_t nid, int index, int ossdev, int ctlable, int depth, int *minamp, int *maxamp) { struct hdaa_widget *w, *wc; struct hdaa_audio_ctl *ctl; int i, j, conns = 0, tminamp, tmaxamp, cminamp, cmaxamp, found = 0; if (depth > HDA_PARSE_MAXDEPTH) return (found); w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return (found); /* Count number of active inputs. */ if (depth > 0) { for (j = 0; j < w->nconns; j++) { if (!w->connsenable[j]) continue; conns++; } } /* If this is not a first step - use input mixer. Pins have common input ctl so care must be taken. */ if (depth > 0 && ctlable && (conns == 1 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX)) { ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, index, 1); if (ctl) { ctl->ossmask |= (1 << ossdev); found++; if (*minamp == *maxamp) { *minamp += MINQDB(ctl); *maxamp += MAXQDB(ctl); } } } /* If widget has own ossdev - not traverse it. It will be traversed on it's own. */ if (w->ossdev >= 0 && depth > 0) return (found); /* We must not traverse pin */ if ((w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) && depth > 0) return (found); /* record that this widget exports such signal, */ w->ossmask |= (1 << ossdev); /* * If signals mixed, we can't assign controls farther. * Ignore this on depth zero. Caller must knows why. */ if (conns > 1 && w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) ctlable = 0; if (ctlable) { ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_OUT, -1, 1); if (ctl) { ctl->ossmask |= (1 << ossdev); found++; if (*minamp == *maxamp) { *minamp += MINQDB(ctl); *maxamp += MAXQDB(ctl); } } } cminamp = cmaxamp = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { wc = hdaa_widget_get(devinfo, i); if (wc == NULL || wc->enable == 0) continue; for (j = 0; j < wc->nconns; j++) { if (wc->connsenable[j] && wc->conns[j] == nid) { tminamp = tmaxamp = 0; found += hdaa_audio_ctl_source_amp(devinfo, wc->nid, j, ossdev, ctlable, depth + 1, &tminamp, &tmaxamp); if (cminamp == 0 && cmaxamp == 0) { cminamp = tminamp; cmaxamp = tmaxamp; } else if (tminamp != tmaxamp) { cminamp = imax(cminamp, tminamp); cmaxamp = imin(cmaxamp, tmaxamp); } } } } if (*minamp == *maxamp && cminamp < cmaxamp) { *minamp += cminamp; *maxamp += cmaxamp; } return (found); } /* * Find controls to control amplification for destination and calculate * possible amplification range. */ static int hdaa_audio_ctl_dest_amp(struct hdaa_devinfo *devinfo, nid_t nid, int index, int ossdev, int depth, int *minamp, int *maxamp) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w, *wc; struct hdaa_audio_ctl *ctl; int i, j, consumers, tminamp, tmaxamp, cminamp, cmaxamp, found = 0; if (depth > HDA_PARSE_MAXDEPTH) return (found); w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return (found); if (depth > 0) { /* If this node produce output for several consumers, we can't touch it. */ consumers = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { wc = hdaa_widget_get(devinfo, i); if (wc == NULL || wc->enable == 0) continue; for (j = 0; j < wc->nconns; j++) { if (wc->connsenable[j] && wc->conns[j] == nid) consumers++; } } /* The only exception is if real HP redirection is configured and this is a duplication point. XXX: Actually exception is not completely correct. XXX: Duplication point check is not perfect. */ if ((consumers == 2 && (w->bindas < 0 || as[w->bindas].hpredir < 0 || as[w->bindas].fakeredir || (w->bindseqmask & (1 << 15)) == 0)) || consumers > 2) return (found); /* Else use it's output mixer. */ ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_OUT, -1, 1); if (ctl) { ctl->ossmask |= (1 << ossdev); found++; if (*minamp == *maxamp) { *minamp += MINQDB(ctl); *maxamp += MAXQDB(ctl); } } } /* We must not traverse pin */ if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && depth > 0) return (found); cminamp = cmaxamp = 0; for (i = 0; i < w->nconns; i++) { if (w->connsenable[i] == 0) continue; if (index >= 0 && i != index) continue; tminamp = tmaxamp = 0; ctl = hdaa_audio_ctl_amp_get(devinfo, w->nid, HDAA_CTL_IN, i, 1); if (ctl) { ctl->ossmask |= (1 << ossdev); found++; if (*minamp == *maxamp) { tminamp += MINQDB(ctl); tmaxamp += MAXQDB(ctl); } } found += hdaa_audio_ctl_dest_amp(devinfo, w->conns[i], -1, ossdev, depth + 1, &tminamp, &tmaxamp); if (cminamp == 0 && cmaxamp == 0) { cminamp = tminamp; cmaxamp = tmaxamp; } else if (tminamp != tmaxamp) { cminamp = imax(cminamp, tminamp); cmaxamp = imin(cmaxamp, tmaxamp); } } if (*minamp == *maxamp && cminamp < cmaxamp) { *minamp += cminamp; *maxamp += cmaxamp; } return (found); } /* * Assign OSS names to sound sources */ static void hdaa_audio_assign_names(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w; int i, j; int type = -1, use, used = 0; static const int types[7][13] = { { SOUND_MIXER_LINE, SOUND_MIXER_LINE1, SOUND_MIXER_LINE2, SOUND_MIXER_LINE3, -1 }, /* line */ { SOUND_MIXER_MONITOR, SOUND_MIXER_MIC, -1 }, /* int mic */ { SOUND_MIXER_MIC, SOUND_MIXER_MONITOR, -1 }, /* ext mic */ { SOUND_MIXER_CD, -1 }, /* cd */ { SOUND_MIXER_SPEAKER, -1 }, /* speaker */ { SOUND_MIXER_DIGITAL1, SOUND_MIXER_DIGITAL2, SOUND_MIXER_DIGITAL3, -1 }, /* digital */ { SOUND_MIXER_LINE, SOUND_MIXER_LINE1, SOUND_MIXER_LINE2, SOUND_MIXER_LINE3, SOUND_MIXER_PHONEIN, SOUND_MIXER_PHONEOUT, SOUND_MIXER_VIDEO, SOUND_MIXER_RADIO, SOUND_MIXER_DIGITAL1, SOUND_MIXER_DIGITAL2, SOUND_MIXER_DIGITAL3, SOUND_MIXER_MONITOR, -1 } /* others */ }; /* Surely known names */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->bindas == -1) continue; use = -1; switch (w->type) { case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX: if (as[w->bindas].dir == HDAA_CTL_OUT) break; type = -1; switch (w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK) { case HDA_CONFIG_DEFAULTCONF_DEVICE_LINE_IN: type = 0; break; case HDA_CONFIG_DEFAULTCONF_DEVICE_MIC_IN: if ((w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK) == HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_JACK) break; type = 1; break; case HDA_CONFIG_DEFAULTCONF_DEVICE_CD: type = 3; break; case HDA_CONFIG_DEFAULTCONF_DEVICE_SPEAKER: type = 4; break; case HDA_CONFIG_DEFAULTCONF_DEVICE_SPDIF_IN: case HDA_CONFIG_DEFAULTCONF_DEVICE_DIGITAL_OTHER_IN: type = 5; break; } if (type == -1) break; j = 0; while (types[type][j] >= 0 && (used & (1 << types[type][j])) != 0) { j++; } if (types[type][j] >= 0) use = types[type][j]; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT: use = SOUND_MIXER_PCM; break; case HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_BEEP_WIDGET: use = SOUND_MIXER_SPEAKER; break; default: break; } if (use >= 0) { w->ossdev = use; used |= (1 << use); } } /* Semi-known names */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->ossdev >= 0) continue; if (w->bindas == -1) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (as[w->bindas].dir == HDAA_CTL_OUT) continue; type = -1; switch (w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK) { case HDA_CONFIG_DEFAULTCONF_DEVICE_LINE_OUT: case HDA_CONFIG_DEFAULTCONF_DEVICE_SPEAKER: case HDA_CONFIG_DEFAULTCONF_DEVICE_HP_OUT: case HDA_CONFIG_DEFAULTCONF_DEVICE_AUX: type = 0; break; case HDA_CONFIG_DEFAULTCONF_DEVICE_MIC_IN: type = 2; break; case HDA_CONFIG_DEFAULTCONF_DEVICE_SPDIF_OUT: case HDA_CONFIG_DEFAULTCONF_DEVICE_DIGITAL_OTHER_OUT: type = 5; break; } if (type == -1) break; j = 0; while (types[type][j] >= 0 && (used & (1 << types[type][j])) != 0) { j++; } if (types[type][j] >= 0) { w->ossdev = types[type][j]; used |= (1 << types[type][j]); } } /* Others */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->ossdev >= 0) continue; if (w->bindas == -1) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (as[w->bindas].dir == HDAA_CTL_OUT) continue; j = 0; while (types[6][j] >= 0 && (used & (1 << types[6][j])) != 0) { j++; } if (types[6][j] >= 0) { w->ossdev = types[6][j]; used |= (1 << types[6][j]); } } } static void hdaa_audio_build_tree(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; int j, res; /* Trace all associations in order of their numbers. */ for (j = 0; j < devinfo->ascnt; j++) { if (as[j].enable == 0) continue; HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Tracing association %d (%d)\n", j, as[j].index); ); if (as[j].dir == HDAA_CTL_OUT) { retry: res = hdaa_audio_trace_as_out(devinfo, j, 0); if (res == 0 && as[j].hpredir >= 0 && as[j].fakeredir == 0) { /* If CODEC can't do analog HP redirection try to make it using one more DAC. */ as[j].fakeredir = 1; goto retry; } } else if (as[j].mixed) res = hdaa_audio_trace_as_in(devinfo, j); else res = hdaa_audio_trace_as_in_mch(devinfo, j, 0); if (res) { HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Association %d (%d) trace succeeded\n", j, as[j].index); ); } else { HDA_BOOTVERBOSE( device_printf(devinfo->dev, "Association %d (%d) trace failed\n", j, as[j].index); ); as[j].enable = 0; } } /* Look for additional DACs/ADCs. */ for (j = 0; j < devinfo->ascnt; j++) { if (as[j].enable == 0) continue; hdaa_audio_adddac(devinfo, j); } /* Trace mixer and beeper pseudo associations. */ hdaa_audio_trace_as_extra(devinfo); } /* * Store in pdevinfo new data about whether and how we can control signal * for OSS device to/from specified widget. */ static void hdaa_adjust_amp(struct hdaa_widget *w, int ossdev, int found, int minamp, int maxamp) { struct hdaa_devinfo *devinfo = w->devinfo; struct hdaa_pcm_devinfo *pdevinfo; if (w->bindas >= 0) pdevinfo = devinfo->as[w->bindas].pdevinfo; else pdevinfo = &devinfo->devs[0]; if (found) pdevinfo->ossmask |= (1 << ossdev); if (minamp == 0 && maxamp == 0) return; if (pdevinfo->minamp[ossdev] == 0 && pdevinfo->maxamp[ossdev] == 0) { pdevinfo->minamp[ossdev] = minamp; pdevinfo->maxamp[ossdev] = maxamp; } else { pdevinfo->minamp[ossdev] = imax(pdevinfo->minamp[ossdev], minamp); pdevinfo->maxamp[ossdev] = imin(pdevinfo->maxamp[ossdev], maxamp); } } /* * Trace signals from/to all possible sources/destionstions to find possible * recording sources, OSS device control ranges and to assign controls. */ static void hdaa_audio_assign_mixers(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w, *cw; int i, j, minamp, maxamp, found; /* Assign mixers to the tree. */ for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; minamp = maxamp = 0; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_BEEP_WIDGET || (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && as[w->bindas].dir == HDAA_CTL_IN)) { if (w->ossdev < 0) continue; found = hdaa_audio_ctl_source_amp(devinfo, w->nid, -1, w->ossdev, 1, 0, &minamp, &maxamp); hdaa_adjust_amp(w, w->ossdev, found, minamp, maxamp); } else if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) { found = hdaa_audio_ctl_dest_amp(devinfo, w->nid, -1, SOUND_MIXER_RECLEV, 0, &minamp, &maxamp); hdaa_adjust_amp(w, SOUND_MIXER_RECLEV, found, minamp, maxamp); } else if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && as[w->bindas].dir == HDAA_CTL_OUT) { found = hdaa_audio_ctl_dest_amp(devinfo, w->nid, -1, SOUND_MIXER_VOLUME, 0, &minamp, &maxamp); hdaa_adjust_amp(w, SOUND_MIXER_VOLUME, found, minamp, maxamp); } if (w->ossdev == SOUND_MIXER_IMIX) { minamp = maxamp = 0; found = hdaa_audio_ctl_source_amp(devinfo, w->nid, -1, w->ossdev, 1, 0, &minamp, &maxamp); if (minamp == maxamp) { /* If we are unable to control input monitor as source - try to control it as destination. */ found += hdaa_audio_ctl_dest_amp(devinfo, w->nid, -1, w->ossdev, 0, &minamp, &maxamp); w->pflags |= HDAA_IMIX_AS_DST; } hdaa_adjust_amp(w, w->ossdev, found, minamp, maxamp); } if (w->pflags & HDAA_ADC_MONITOR) { for (j = 0; j < w->nconns; j++) { if (!w->connsenable[j]) continue; cw = hdaa_widget_get(devinfo, w->conns[j]); if (cw == NULL || cw->enable == 0) continue; if (cw->bindas == -1) continue; if (cw->bindas >= 0 && as[cw->bindas].dir != HDAA_CTL_IN) continue; minamp = maxamp = 0; found = hdaa_audio_ctl_dest_amp(devinfo, w->nid, j, SOUND_MIXER_IGAIN, 0, &minamp, &maxamp); hdaa_adjust_amp(w, SOUND_MIXER_IGAIN, found, minamp, maxamp); } } } } static void hdaa_audio_prepare_pin_ctrl(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w; uint32_t pincap; int i; for (i = 0; i < devinfo->nodecnt; i++) { w = &devinfo->widget[i]; if (w == NULL) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX && w->waspin == 0) continue; pincap = w->wclass.pin.cap; /* Disable everything. */ w->wclass.pin.ctrl &= ~( HDA_CMD_SET_PIN_WIDGET_CTRL_HPHN_ENABLE | HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE | HDA_CMD_SET_PIN_WIDGET_CTRL_IN_ENABLE | HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK); if (w->enable == 0) { /* Pin is unused so left it disabled. */ continue; } else if (w->waspin) { /* Enable input for beeper input. */ w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_IN_ENABLE; } else if (w->bindas < 0 || as[w->bindas].enable == 0) { /* Pin is unused so left it disabled. */ continue; } else if (as[w->bindas].dir == HDAA_CTL_IN) { /* Input pin, configure for input. */ if (HDA_PARAM_PIN_CAP_INPUT_CAP(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_IN_ENABLE; if ((devinfo->quirks & HDAA_QUIRK_IVREF100) && HDA_PARAM_PIN_CAP_VREF_CTRL_100(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE( HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_100); else if ((devinfo->quirks & HDAA_QUIRK_IVREF80) && HDA_PARAM_PIN_CAP_VREF_CTRL_80(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE( HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_80); else if ((devinfo->quirks & HDAA_QUIRK_IVREF50) && HDA_PARAM_PIN_CAP_VREF_CTRL_50(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE( HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_50); } else { /* Output pin, configure for output. */ if (HDA_PARAM_PIN_CAP_OUTPUT_CAP(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE; if (HDA_PARAM_PIN_CAP_HEADPHONE_CAP(pincap) && (w->wclass.pin.config & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK) == HDA_CONFIG_DEFAULTCONF_DEVICE_HP_OUT) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_HPHN_ENABLE; if ((devinfo->quirks & HDAA_QUIRK_OVREF100) && HDA_PARAM_PIN_CAP_VREF_CTRL_100(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE( HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_100); else if ((devinfo->quirks & HDAA_QUIRK_OVREF80) && HDA_PARAM_PIN_CAP_VREF_CTRL_80(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE( HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_80); else if ((devinfo->quirks & HDAA_QUIRK_OVREF50) && HDA_PARAM_PIN_CAP_VREF_CTRL_50(pincap)) w->wclass.pin.ctrl |= HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE( HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_50); } } } static void hdaa_audio_ctl_commit(struct hdaa_devinfo *devinfo) { struct hdaa_audio_ctl *ctl; int i, z; i = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { if (ctl->enable == 0 || ctl->ossmask != 0) { /* Mute disabled and mixer controllable controls. * Last will be initialized by mixer_init(). * This expected to reduce click on startup. */ hdaa_audio_ctl_amp_set(ctl, HDAA_AMP_MUTE_ALL, 0, 0); continue; } /* Init fixed controls to 0dB amplification. */ z = ctl->offset; if (z > ctl->step) z = ctl->step; hdaa_audio_ctl_amp_set(ctl, HDAA_AMP_MUTE_NONE, z, z); } } static void hdaa_gpio_commit(struct hdaa_devinfo *devinfo) { uint32_t gdata, gmask, gdir; int i, numgpio; numgpio = HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap); if (devinfo->gpio != 0 && numgpio != 0) { gdata = hda_command(devinfo->dev, HDA_CMD_GET_GPIO_DATA(0, devinfo->nid)); gmask = hda_command(devinfo->dev, HDA_CMD_GET_GPIO_ENABLE_MASK(0, devinfo->nid)); gdir = hda_command(devinfo->dev, HDA_CMD_GET_GPIO_DIRECTION(0, devinfo->nid)); for (i = 0; i < numgpio; i++) { if ((devinfo->gpio & HDAA_GPIO_MASK(i)) == HDAA_GPIO_SET(i)) { gdata |= (1 << i); gmask |= (1 << i); gdir |= (1 << i); } else if ((devinfo->gpio & HDAA_GPIO_MASK(i)) == HDAA_GPIO_CLEAR(i)) { gdata &= ~(1 << i); gmask |= (1 << i); gdir |= (1 << i); } else if ((devinfo->gpio & HDAA_GPIO_MASK(i)) == HDAA_GPIO_DISABLE(i)) { gmask &= ~(1 << i); } else if ((devinfo->gpio & HDAA_GPIO_MASK(i)) == HDAA_GPIO_INPUT(i)) { gmask |= (1 << i); gdir &= ~(1 << i); } } HDA_BOOTVERBOSE( device_printf(devinfo->dev, "GPIO commit\n"); ); hda_command(devinfo->dev, HDA_CMD_SET_GPIO_ENABLE_MASK(0, devinfo->nid, gmask)); hda_command(devinfo->dev, HDA_CMD_SET_GPIO_DIRECTION(0, devinfo->nid, gdir)); hda_command(devinfo->dev, HDA_CMD_SET_GPIO_DATA(0, devinfo->nid, gdata)); HDA_BOOTVERBOSE( hdaa_dump_gpio(devinfo); ); } } static void hdaa_gpo_commit(struct hdaa_devinfo *devinfo) { uint32_t gdata; int i, numgpo; numgpo = HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap); if (devinfo->gpo != 0 && numgpo != 0) { gdata = hda_command(devinfo->dev, HDA_CMD_GET_GPO_DATA(0, devinfo->nid)); for (i = 0; i < numgpo; i++) { if ((devinfo->gpio & HDAA_GPIO_MASK(i)) == HDAA_GPIO_SET(i)) { gdata |= (1 << i); } else if ((devinfo->gpio & HDAA_GPIO_MASK(i)) == HDAA_GPIO_CLEAR(i)) { gdata &= ~(1 << i); } } HDA_BOOTVERBOSE( device_printf(devinfo->dev, "GPO commit\n"); ); hda_command(devinfo->dev, HDA_CMD_SET_GPO_DATA(0, devinfo->nid, gdata)); HDA_BOOTVERBOSE( hdaa_dump_gpo(devinfo); ); } } static void hdaa_audio_commit(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; int i; /* Commit controls. */ hdaa_audio_ctl_commit(devinfo); /* Commit selectors, pins and EAPD. */ for (i = 0; i < devinfo->nodecnt; i++) { w = &devinfo->widget[i]; if (w == NULL) continue; if (w->selconn == -1) w->selconn = 0; if (w->nconns > 0) hdaa_widget_connection_select(w, w->selconn); if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX || w->waspin) { hda_command(devinfo->dev, HDA_CMD_SET_PIN_WIDGET_CTRL(0, w->nid, w->wclass.pin.ctrl)); } if (w->param.eapdbtl != HDA_INVALID) { uint32_t val; val = w->param.eapdbtl; if (devinfo->quirks & HDAA_QUIRK_EAPDINV) val ^= HDA_CMD_SET_EAPD_BTL_ENABLE_EAPD; hda_command(devinfo->dev, HDA_CMD_SET_EAPD_BTL_ENABLE(0, w->nid, val)); } } hdaa_gpio_commit(devinfo); hdaa_gpo_commit(devinfo); } static void hdaa_powerup(struct hdaa_devinfo *devinfo) { int i; hda_command(devinfo->dev, HDA_CMD_SET_POWER_STATE(0, devinfo->nid, HDA_CMD_POWER_STATE_D0)); DELAY(100); for (i = devinfo->startnode; i < devinfo->endnode; i++) { hda_command(devinfo->dev, HDA_CMD_SET_POWER_STATE(0, i, HDA_CMD_POWER_STATE_D0)); } DELAY(1000); } static int hdaa_pcmchannel_setup(struct hdaa_chan *ch) { struct hdaa_devinfo *devinfo = ch->devinfo; struct hdaa_audio_as *as = devinfo->as; struct hdaa_widget *w; uint32_t cap, fmtcap, pcmcap; int i, j, ret, channels, onlystereo; uint16_t pinset; ch->caps = hdaa_caps; ch->caps.fmtlist = ch->fmtlist; ch->bit16 = 1; ch->bit32 = 0; ch->pcmrates[0] = 48000; ch->pcmrates[1] = 0; ch->stripecap = 0xff; ret = 0; channels = 0; onlystereo = 1; pinset = 0; fmtcap = devinfo->supp_stream_formats; pcmcap = devinfo->supp_pcm_size_rate; for (i = 0; i < 16; i++) { /* Check as is correct */ if (ch->as < 0) break; /* Cound only present DACs */ if (as[ch->as].dacs[ch->asindex][i] <= 0) continue; /* Ignore duplicates */ for (j = 0; j < ret; j++) { if (ch->io[j] == as[ch->as].dacs[ch->asindex][i]) break; } if (j < ret) continue; w = hdaa_widget_get(devinfo, as[ch->as].dacs[ch->asindex][i]); if (w == NULL || w->enable == 0) continue; cap = w->param.supp_stream_formats; if (!HDA_PARAM_SUPP_STREAM_FORMATS_PCM(cap) && !HDA_PARAM_SUPP_STREAM_FORMATS_AC3(cap)) continue; /* Many CODECs does not declare AC3 support on SPDIF. I don't beleave that they doesn't support it! */ if (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap)) cap |= HDA_PARAM_SUPP_STREAM_FORMATS_AC3_MASK; if (ret == 0) { fmtcap = cap; pcmcap = w->param.supp_pcm_size_rate; } else { fmtcap &= cap; pcmcap &= w->param.supp_pcm_size_rate; } ch->io[ret++] = as[ch->as].dacs[ch->asindex][i]; ch->stripecap &= w->wclass.conv.stripecap; /* Do not count redirection pin/dac channels. */ if (i == 15 && as[ch->as].hpredir >= 0) continue; channels += HDA_PARAM_AUDIO_WIDGET_CAP_CC(w->param.widget_cap) + 1; if (HDA_PARAM_AUDIO_WIDGET_CAP_CC(w->param.widget_cap) != 1) onlystereo = 0; pinset |= (1 << i); } ch->io[ret] = -1; ch->channels = channels; if (as[ch->as].fakeredir) ret--; /* Standard speaks only about stereo pins and playback, ... */ if ((!onlystereo) || as[ch->as].mixed) pinset = 0; /* ..., but there it gives us info about speakers layout. */ as[ch->as].pinset = pinset; ch->supp_stream_formats = fmtcap; ch->supp_pcm_size_rate = pcmcap; /* * 8bit = 0 * 16bit = 1 * 20bit = 2 * 24bit = 3 * 32bit = 4 */ if (ret > 0) { i = 0; if (HDA_PARAM_SUPP_STREAM_FORMATS_PCM(fmtcap)) { if (HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT(pcmcap)) ch->bit16 = 1; else if (HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT(pcmcap)) ch->bit16 = 0; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT(pcmcap)) ch->bit32 = 3; else if (HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT(pcmcap)) ch->bit32 = 2; else if (HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT(pcmcap)) ch->bit32 = 4; if (!(devinfo->quirks & HDAA_QUIRK_FORCESTEREO)) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 1, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 1, 0); } if (channels >= 2) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 2, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 2, 0); } if (channels >= 3 && !onlystereo) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 3, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 3, 0); ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 3, 1); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 3, 1); } if (channels >= 4) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 4, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 4, 0); if (!onlystereo) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 4, 1); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 4, 1); } } if (channels >= 5 && !onlystereo) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 5, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 5, 0); ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 5, 1); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 5, 1); } if (channels >= 6) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 6, 1); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 6, 1); if (!onlystereo) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 6, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 6, 0); } } if (channels >= 7 && !onlystereo) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 7, 0); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 7, 0); ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 7, 1); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 7, 1); } if (channels >= 8) { ch->fmtlist[i++] = SND_FORMAT(AFMT_S16_LE, 8, 1); if (ch->bit32) ch->fmtlist[i++] = SND_FORMAT(AFMT_S32_LE, 8, 1); } } if (HDA_PARAM_SUPP_STREAM_FORMATS_AC3(fmtcap)) { ch->fmtlist[i++] = SND_FORMAT(AFMT_AC3, 2, 0); if (channels >= 8) { ch->fmtlist[i++] = SND_FORMAT(AFMT_AC3, 8, 0); ch->fmtlist[i++] = SND_FORMAT(AFMT_AC3, 8, 1); } } ch->fmtlist[i] = 0; i = 0; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ(pcmcap)) ch->pcmrates[i++] = 8000; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ(pcmcap)) ch->pcmrates[i++] = 11025; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ(pcmcap)) ch->pcmrates[i++] = 16000; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ(pcmcap)) ch->pcmrates[i++] = 22050; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ(pcmcap)) ch->pcmrates[i++] = 32000; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ(pcmcap)) ch->pcmrates[i++] = 44100; /* if (HDA_PARAM_SUPP_PCM_SIZE_RATE_48KHZ(pcmcap)) */ ch->pcmrates[i++] = 48000; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ(pcmcap)) ch->pcmrates[i++] = 88200; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ(pcmcap)) ch->pcmrates[i++] = 96000; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ(pcmcap)) ch->pcmrates[i++] = 176400; if (HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ(pcmcap)) ch->pcmrates[i++] = 192000; /* if (HDA_PARAM_SUPP_PCM_SIZE_RATE_384KHZ(pcmcap)) */ ch->pcmrates[i] = 0; if (i > 0) { ch->caps.minspeed = ch->pcmrates[0]; ch->caps.maxspeed = ch->pcmrates[i - 1]; } } return (ret); } static void hdaa_prepare_pcms(struct hdaa_devinfo *devinfo) { struct hdaa_audio_as *as = devinfo->as; int i, j, k, apdev = 0, ardev = 0, dpdev = 0, drdev = 0; for (i = 0; i < devinfo->ascnt; i++) { if (as[i].enable == 0) continue; if (as[i].dir == HDAA_CTL_IN) { if (as[i].digital) drdev++; else ardev++; } else { if (as[i].digital) dpdev++; else apdev++; } } devinfo->num_devs = max(ardev, apdev) + max(drdev, dpdev); devinfo->devs = (struct hdaa_pcm_devinfo *)malloc( devinfo->num_devs * sizeof(struct hdaa_pcm_devinfo), M_HDAA, M_ZERO | M_NOWAIT); if (devinfo->devs == NULL) { device_printf(devinfo->dev, "Unable to allocate memory for devices\n"); return; } for (i = 0; i < devinfo->num_devs; i++) { devinfo->devs[i].index = i; devinfo->devs[i].devinfo = devinfo; devinfo->devs[i].playas = -1; devinfo->devs[i].recas = -1; devinfo->devs[i].digital = 255; } for (i = 0; i < devinfo->ascnt; i++) { if (as[i].enable == 0) continue; for (j = 0; j < devinfo->num_devs; j++) { if (devinfo->devs[j].digital != 255 && (!devinfo->devs[j].digital) != (!as[i].digital)) continue; if (as[i].dir == HDAA_CTL_IN) { if (devinfo->devs[j].recas >= 0) continue; devinfo->devs[j].recas = i; } else { if (devinfo->devs[j].playas >= 0) continue; devinfo->devs[j].playas = i; } as[i].pdevinfo = &devinfo->devs[j]; for (k = 0; k < as[i].num_chans; k++) { devinfo->chans[as[i].chans[k]].pdevinfo = &devinfo->devs[j]; } devinfo->devs[j].digital = as[i].digital; break; } } } static void hdaa_create_pcms(struct hdaa_devinfo *devinfo) { int i; for (i = 0; i < devinfo->num_devs; i++) { struct hdaa_pcm_devinfo *pdevinfo = &devinfo->devs[i]; pdevinfo->dev = device_add_child(devinfo->dev, "pcm", -1); device_set_ivars(pdevinfo->dev, (void *)pdevinfo); } } static void hdaa_dump_ctls(struct hdaa_pcm_devinfo *pdevinfo, const char *banner, uint32_t flag) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_audio_ctl *ctl; char buf[64]; int i, j, printed; if (flag == 0) { flag = ~(SOUND_MASK_VOLUME | SOUND_MASK_PCM | SOUND_MASK_CD | SOUND_MASK_LINE | SOUND_MASK_RECLEV | SOUND_MASK_MIC | SOUND_MASK_SPEAKER | SOUND_MASK_IGAIN | SOUND_MASK_OGAIN | SOUND_MASK_IMIX | SOUND_MASK_MONITOR); } for (j = 0; j < SOUND_MIXER_NRDEVICES; j++) { if ((flag & (1 << j)) == 0) continue; i = 0; printed = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { if (ctl->enable == 0 || ctl->widget->enable == 0) continue; if (!((pdevinfo->playas >= 0 && ctl->widget->bindas == pdevinfo->playas) || (pdevinfo->recas >= 0 && ctl->widget->bindas == pdevinfo->recas) || (ctl->widget->bindas == -2 && pdevinfo->index == 0))) continue; if ((ctl->ossmask & (1 << j)) == 0) continue; if (printed == 0) { device_printf(pdevinfo->dev, "\n"); if (banner != NULL) { device_printf(pdevinfo->dev, "%s", banner); } else { device_printf(pdevinfo->dev, "Unknown Ctl"); } printf(" (OSS: %s)", hdaa_audio_ctl_ossmixer_mask2allname(1 << j, buf, sizeof(buf))); if (pdevinfo->ossmask & (1 << j)) { printf(": %+d/%+ddB\n", pdevinfo->minamp[j] / 4, pdevinfo->maxamp[j] / 4); } else printf("\n"); device_printf(pdevinfo->dev, " |\n"); printed = 1; } device_printf(pdevinfo->dev, " +- ctl %2d (nid %3d %s", i, ctl->widget->nid, (ctl->ndir == HDAA_CTL_IN)?"in ":"out"); if (ctl->ndir == HDAA_CTL_IN && ctl->ndir == ctl->dir) printf(" %2d): ", ctl->index); else printf("): "); if (ctl->step > 0) { printf("%+d/%+ddB (%d steps)%s\n", MINQDB(ctl) / 4, MAXQDB(ctl) / 4, ctl->step + 1, ctl->mute?" + mute":""); } else printf("%s\n", ctl->mute?"mute":""); } } } static void hdaa_dump_audio_formats(device_t dev, uint32_t fcap, uint32_t pcmcap) { uint32_t cap; cap = fcap; if (cap != 0) { device_printf(dev, " Stream cap: 0x%08x\n", cap); device_printf(dev, " "); if (HDA_PARAM_SUPP_STREAM_FORMATS_AC3(cap)) printf(" AC3"); if (HDA_PARAM_SUPP_STREAM_FORMATS_FLOAT32(cap)) printf(" FLOAT32"); if (HDA_PARAM_SUPP_STREAM_FORMATS_PCM(cap)) printf(" PCM"); printf("\n"); } cap = pcmcap; if (cap != 0) { device_printf(dev, " PCM cap: 0x%08x\n", cap); device_printf(dev, " "); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT(cap)) printf(" 8"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT(cap)) printf(" 16"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT(cap)) printf(" 20"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT(cap)) printf(" 24"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT(cap)) printf(" 32"); printf(" bits,"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ(cap)) printf(" 8"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ(cap)) printf(" 11"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ(cap)) printf(" 16"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ(cap)) printf(" 22"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ(cap)) printf(" 32"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ(cap)) printf(" 44"); printf(" 48"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ(cap)) printf(" 88"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ(cap)) printf(" 96"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ(cap)) printf(" 176"); if (HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ(cap)) printf(" 192"); printf(" KHz\n"); } } static void hdaa_dump_pin(struct hdaa_widget *w) { uint32_t pincap; pincap = w->wclass.pin.cap; device_printf(w->devinfo->dev, " Pin cap: 0x%08x\n", pincap); device_printf(w->devinfo->dev, " "); if (HDA_PARAM_PIN_CAP_IMP_SENSE_CAP(pincap)) printf(" ISC"); if (HDA_PARAM_PIN_CAP_TRIGGER_REQD(pincap)) printf(" TRQD"); if (HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP(pincap)) printf(" PDC"); if (HDA_PARAM_PIN_CAP_HEADPHONE_CAP(pincap)) printf(" HP"); if (HDA_PARAM_PIN_CAP_OUTPUT_CAP(pincap)) printf(" OUT"); if (HDA_PARAM_PIN_CAP_INPUT_CAP(pincap)) printf(" IN"); if (HDA_PARAM_PIN_CAP_BALANCED_IO_PINS(pincap)) printf(" BAL"); if (HDA_PARAM_PIN_CAP_HDMI(pincap)) printf(" HDMI"); if (HDA_PARAM_PIN_CAP_VREF_CTRL(pincap)) { printf(" VREF["); if (HDA_PARAM_PIN_CAP_VREF_CTRL_50(pincap)) printf(" 50"); if (HDA_PARAM_PIN_CAP_VREF_CTRL_80(pincap)) printf(" 80"); if (HDA_PARAM_PIN_CAP_VREF_CTRL_100(pincap)) printf(" 100"); if (HDA_PARAM_PIN_CAP_VREF_CTRL_GROUND(pincap)) printf(" GROUND"); if (HDA_PARAM_PIN_CAP_VREF_CTRL_HIZ(pincap)) printf(" HIZ"); printf(" ]"); } if (HDA_PARAM_PIN_CAP_EAPD_CAP(pincap)) printf(" EAPD"); if (HDA_PARAM_PIN_CAP_DP(pincap)) printf(" DP"); if (HDA_PARAM_PIN_CAP_HBR(pincap)) printf(" HBR"); printf("\n"); device_printf(w->devinfo->dev, " Pin config: 0x%08x\n", w->wclass.pin.config); device_printf(w->devinfo->dev, " Pin control: 0x%08x", w->wclass.pin.ctrl); if (w->wclass.pin.ctrl & HDA_CMD_SET_PIN_WIDGET_CTRL_HPHN_ENABLE) printf(" HP"); if (w->wclass.pin.ctrl & HDA_CMD_SET_PIN_WIDGET_CTRL_IN_ENABLE) printf(" IN"); if (w->wclass.pin.ctrl & HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE) printf(" OUT"); if (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap)) { if ((w->wclass.pin.ctrl & HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK) == 0x03) printf(" HBR"); else if ((w->wclass.pin.ctrl & HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK) != 0) printf(" EPTs"); } else { if ((w->wclass.pin.ctrl & HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK) != 0) printf(" VREFs"); } printf("\n"); } static void hdaa_dump_pin_config(struct hdaa_widget *w, uint32_t conf) { device_printf(w->devinfo->dev, "%2d %08x %-2d %-2d " "%-13s %-5s %-7s %-10s %-7s %d%s\n", w->nid, conf, HDA_CONFIG_DEFAULTCONF_ASSOCIATION(conf), HDA_CONFIG_DEFAULTCONF_SEQUENCE(conf), HDA_DEVS[HDA_CONFIG_DEFAULTCONF_DEVICE(conf)], HDA_CONNS[HDA_CONFIG_DEFAULTCONF_CONNECTIVITY(conf)], HDA_CONNECTORS[HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE(conf)], HDA_LOCS[HDA_CONFIG_DEFAULTCONF_LOCATION(conf)], HDA_COLORS[HDA_CONFIG_DEFAULTCONF_COLOR(conf)], HDA_CONFIG_DEFAULTCONF_MISC(conf), (w->enable == 0)?" DISA":""); } static void hdaa_dump_pin_configs(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w; int i; device_printf(devinfo->dev, "nid 0x as seq " "device conn jack loc color misc\n"); for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; hdaa_dump_pin_config(w, w->wclass.pin.config); } } static void hdaa_dump_amp(device_t dev, uint32_t cap, const char *banner) { device_printf(dev, " %s amp: 0x%08x\n", banner, cap); device_printf(dev, " " "mute=%d step=%d size=%d offset=%d\n", HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP(cap), HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS(cap), HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE(cap), HDA_PARAM_OUTPUT_AMP_CAP_OFFSET(cap)); } static void hdaa_dump_nodes(struct hdaa_devinfo *devinfo) { struct hdaa_widget *w, *cw; char buf[64]; int i, j; device_printf(devinfo->dev, "\n"); device_printf(devinfo->dev, "Default Parameter\n"); device_printf(devinfo->dev, "-----------------\n"); hdaa_dump_audio_formats(devinfo->dev, devinfo->supp_stream_formats, devinfo->supp_pcm_size_rate); device_printf(devinfo->dev, " IN amp: 0x%08x\n", devinfo->inamp_cap); device_printf(devinfo->dev, " OUT amp: 0x%08x\n", devinfo->outamp_cap); for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL) { device_printf(devinfo->dev, "Ghost widget nid=%d\n", i); continue; } device_printf(devinfo->dev, "\n"); device_printf(devinfo->dev, " nid: %d%s\n", w->nid, (w->enable == 0) ? " [DISABLED]" : ""); device_printf(devinfo->dev, " Name: %s\n", w->name); device_printf(devinfo->dev, " Widget cap: 0x%08x\n", w->param.widget_cap); if (w->param.widget_cap & 0x0ee1) { device_printf(devinfo->dev, " "); if (HDA_PARAM_AUDIO_WIDGET_CAP_LR_SWAP(w->param.widget_cap)) printf(" LRSWAP"); if (HDA_PARAM_AUDIO_WIDGET_CAP_POWER_CTRL(w->param.widget_cap)) printf(" PWR"); if (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap)) printf(" DIGITAL"); if (HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP(w->param.widget_cap)) printf(" UNSOL"); if (HDA_PARAM_AUDIO_WIDGET_CAP_PROC_WIDGET(w->param.widget_cap)) printf(" PROC"); if (HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE(w->param.widget_cap)) printf(" STRIPE(x%d)", 1 << (fls(w->wclass.conv.stripecap) - 1)); j = HDA_PARAM_AUDIO_WIDGET_CAP_CC(w->param.widget_cap); if (j == 1) printf(" STEREO"); else if (j > 1) printf(" %dCH", j + 1); printf("\n"); } if (w->bindas != -1) { device_printf(devinfo->dev, " Association: %d (0x%08x)\n", w->bindas, w->bindseqmask); } if (w->ossmask != 0 || w->ossdev >= 0) { device_printf(devinfo->dev, " OSS: %s", hdaa_audio_ctl_ossmixer_mask2allname(w->ossmask, buf, sizeof(buf))); if (w->ossdev >= 0) printf(" (%s)", ossnames[w->ossdev]); printf("\n"); } if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT || w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) { hdaa_dump_audio_formats(devinfo->dev, w->param.supp_stream_formats, w->param.supp_pcm_size_rate); } else if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX || w->waspin) hdaa_dump_pin(w); if (w->param.eapdbtl != HDA_INVALID) device_printf(devinfo->dev, " EAPD: 0x%08x\n", w->param.eapdbtl); if (HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP(w->param.widget_cap) && w->param.outamp_cap != 0) hdaa_dump_amp(devinfo->dev, w->param.outamp_cap, "Output"); if (HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP(w->param.widget_cap) && w->param.inamp_cap != 0) hdaa_dump_amp(devinfo->dev, w->param.inamp_cap, " Input"); if (w->nconns > 0) { device_printf(devinfo->dev, " connections: %d\n", w->nconns); device_printf(devinfo->dev, " |\n"); } for (j = 0; j < w->nconns; j++) { cw = hdaa_widget_get(devinfo, w->conns[j]); device_printf(devinfo->dev, " + %s<- nid=%d [%s]", (w->connsenable[j] == 0)?"[DISABLED] ":"", w->conns[j], (cw == NULL) ? "GHOST!" : cw->name); if (cw == NULL) printf(" [UNKNOWN]"); else if (cw->enable == 0) printf(" [DISABLED]"); if (w->nconns > 1 && w->selconn == j && w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER) printf(" (selected)"); printf("\n"); } } } static void hdaa_dump_dst_nid(struct hdaa_pcm_devinfo *pdevinfo, nid_t nid, int depth) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w, *cw; char buf[64]; int i, printed = 0; if (depth > HDA_PARSE_MAXDEPTH) return; w = hdaa_widget_get(devinfo, nid); if (w == NULL || w->enable == 0) return; if (depth == 0) device_printf(pdevinfo->dev, "%*s", 4, ""); else device_printf(pdevinfo->dev, "%*s + <- ", 4 + (depth - 1) * 7, ""); printf("nid=%d [%s]", w->nid, w->name); if (depth > 0) { if (w->ossmask == 0) { printf("\n"); return; } printf(" [src: %s]", hdaa_audio_ctl_ossmixer_mask2allname( w->ossmask, buf, sizeof(buf))); if (w->ossdev >= 0) { printf("\n"); return; } } printf("\n"); for (i = 0; i < w->nconns; i++) { if (w->connsenable[i] == 0) continue; cw = hdaa_widget_get(devinfo, w->conns[i]); if (cw == NULL || cw->enable == 0 || cw->bindas == -1) continue; if (printed == 0) { device_printf(pdevinfo->dev, "%*s |\n", 4 + (depth) * 7, ""); printed = 1; } hdaa_dump_dst_nid(pdevinfo, w->conns[i], depth + 1); } } static void hdaa_dump_dac(struct hdaa_pcm_devinfo *pdevinfo) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_audio_as *as; struct hdaa_widget *w; int i, printed = 0; if (pdevinfo->playas < 0) return; as = &devinfo->as[pdevinfo->playas]; for (i = 0; i < 16; i++) { if (as->pins[i] <= 0) continue; w = hdaa_widget_get(devinfo, as->pins[i]); if (w == NULL || w->enable == 0) continue; if (printed == 0) { printed = 1; device_printf(pdevinfo->dev, "\n"); device_printf(pdevinfo->dev, "Playback:\n"); } device_printf(pdevinfo->dev, "\n"); hdaa_dump_dst_nid(pdevinfo, as->pins[i], 0); } } static void hdaa_dump_adc(struct hdaa_pcm_devinfo *pdevinfo) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w; int i; int printed = 0; if (pdevinfo->recas < 0) return; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT) continue; if (w->bindas != pdevinfo->recas) continue; if (printed == 0) { printed = 1; device_printf(pdevinfo->dev, "\n"); device_printf(pdevinfo->dev, "Record:\n"); } device_printf(pdevinfo->dev, "\n"); hdaa_dump_dst_nid(pdevinfo, i, 0); } } static void hdaa_dump_mix(struct hdaa_pcm_devinfo *pdevinfo) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_widget *w; int i; int printed = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0) continue; if (w->ossdev != SOUND_MIXER_IMIX) continue; if (w->bindas != pdevinfo->recas) continue; if (printed == 0) { printed = 1; device_printf(pdevinfo->dev, "\n"); device_printf(pdevinfo->dev, "Input Mix:\n"); } device_printf(pdevinfo->dev, "\n"); hdaa_dump_dst_nid(pdevinfo, i, 0); } } static void hdaa_dump_pcmchannels(struct hdaa_pcm_devinfo *pdevinfo) { struct hdaa_devinfo *devinfo = pdevinfo->devinfo; nid_t *nids; int chid, i; if (pdevinfo->playas >= 0) { device_printf(pdevinfo->dev, "\n"); device_printf(pdevinfo->dev, "Playback:\n"); device_printf(pdevinfo->dev, "\n"); chid = devinfo->as[pdevinfo->playas].chans[0]; hdaa_dump_audio_formats(pdevinfo->dev, devinfo->chans[chid].supp_stream_formats, devinfo->chans[chid].supp_pcm_size_rate); for (i = 0; i < devinfo->as[pdevinfo->playas].num_chans; i++) { chid = devinfo->as[pdevinfo->playas].chans[i]; device_printf(pdevinfo->dev, " DAC:"); for (nids = devinfo->chans[chid].io; *nids != -1; nids++) printf(" %d", *nids); printf("\n"); } } if (pdevinfo->recas >= 0) { device_printf(pdevinfo->dev, "\n"); device_printf(pdevinfo->dev, "Record:\n"); device_printf(pdevinfo->dev, "\n"); chid = devinfo->as[pdevinfo->recas].chans[0]; hdaa_dump_audio_formats(pdevinfo->dev, devinfo->chans[chid].supp_stream_formats, devinfo->chans[chid].supp_pcm_size_rate); for (i = 0; i < devinfo->as[pdevinfo->recas].num_chans; i++) { chid = devinfo->as[pdevinfo->recas].chans[i]; device_printf(pdevinfo->dev, " DAC:"); for (nids = devinfo->chans[chid].io; *nids != -1; nids++) printf(" %d", *nids); printf("\n"); } } } static void hdaa_pindump(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_widget *w; uint32_t res, pincap, delay; int i; device_printf(dev, "Dumping AFG pins:\n"); device_printf(dev, "nid 0x as seq " "device conn jack loc color misc\n"); for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; hdaa_dump_pin_config(w, w->wclass.pin.config); pincap = w->wclass.pin.cap; device_printf(dev, " Caps: %2s %3s %2s %4s %4s", HDA_PARAM_PIN_CAP_INPUT_CAP(pincap)?"IN":"", HDA_PARAM_PIN_CAP_OUTPUT_CAP(pincap)?"OUT":"", HDA_PARAM_PIN_CAP_HEADPHONE_CAP(pincap)?"HP":"", HDA_PARAM_PIN_CAP_EAPD_CAP(pincap)?"EAPD":"", HDA_PARAM_PIN_CAP_VREF_CTRL(pincap)?"VREF":""); if (HDA_PARAM_PIN_CAP_IMP_SENSE_CAP(pincap) || HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP(pincap)) { if (HDA_PARAM_PIN_CAP_TRIGGER_REQD(pincap)) { delay = 0; hda_command(dev, HDA_CMD_SET_PIN_SENSE(0, w->nid, 0)); do { res = hda_command(dev, HDA_CMD_GET_PIN_SENSE(0, w->nid)); if (res != 0x7fffffff && res != 0xffffffff) break; DELAY(10); } while (++delay < 10000); } else { delay = 0; res = hda_command(dev, HDA_CMD_GET_PIN_SENSE(0, w->nid)); } printf(" Sense: 0x%08x (%sconnected%s)", res, (res & HDA_CMD_GET_PIN_SENSE_PRESENCE_DETECT) ? "" : "dis", (HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(w->param.widget_cap) && (res & HDA_CMD_GET_PIN_SENSE_ELD_VALID)) ? ", ELD valid" : ""); if (delay > 0) printf(" delay %dus", delay * 10); } printf("\n"); } device_printf(dev, "NumGPIO=%d NumGPO=%d NumGPI=%d GPIWake=%d GPIUnsol=%d\n", HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_NUM_GPI(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_GPI_WAKE(devinfo->gpio_cap), HDA_PARAM_GPIO_COUNT_GPI_UNSOL(devinfo->gpio_cap)); hdaa_dump_gpi(devinfo); hdaa_dump_gpio(devinfo); hdaa_dump_gpo(devinfo); } static void hdaa_configure(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_audio_ctl *ctl; int i; HDA_BOOTHVERBOSE( device_printf(dev, "Applying built-in patches...\n"); ); hdaa_patch(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Applying local patches...\n"); ); hdaa_local_patch(devinfo); hdaa_audio_postprocess(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Parsing Ctls...\n"); ); hdaa_audio_ctl_parse(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling nonaudio...\n"); ); hdaa_audio_disable_nonaudio(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling useless...\n"); ); hdaa_audio_disable_useless(devinfo); HDA_BOOTVERBOSE( device_printf(dev, "Patched pins configuration:\n"); hdaa_dump_pin_configs(devinfo); ); HDA_BOOTHVERBOSE( device_printf(dev, "Parsing pin associations...\n"); ); hdaa_audio_as_parse(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Building AFG tree...\n"); ); hdaa_audio_build_tree(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling unassociated " "widgets...\n"); ); hdaa_audio_disable_unas(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling nonselected " "inputs...\n"); ); hdaa_audio_disable_notselected(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling useless...\n"); ); hdaa_audio_disable_useless(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling " "crossassociatement connections...\n"); ); hdaa_audio_disable_crossas(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Disabling useless...\n"); ); hdaa_audio_disable_useless(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Binding associations to channels...\n"); ); hdaa_audio_bind_as(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Assigning names to signal sources...\n"); ); hdaa_audio_assign_names(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Preparing PCM devices...\n"); ); hdaa_prepare_pcms(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Assigning mixers to the tree...\n"); ); hdaa_audio_assign_mixers(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Preparing pin controls...\n"); ); hdaa_audio_prepare_pin_ctrl(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "AFG commit...\n"); ); hdaa_audio_commit(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Applying direct built-in patches...\n"); ); hdaa_patch_direct(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Pin sense init...\n"); ); hdaa_sense_init(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Creating PCM devices...\n"); ); hdaa_create_pcms(devinfo); HDA_BOOTVERBOSE( if (devinfo->quirks != 0) { device_printf(dev, "FG config/quirks:"); for (i = 0; i < nitems(hdaa_quirks_tab); i++) { if ((devinfo->quirks & hdaa_quirks_tab[i].value) == hdaa_quirks_tab[i].value) printf(" %s", hdaa_quirks_tab[i].key); } printf("\n"); } device_printf(dev, "\n"); device_printf(dev, "+-------------------+\n"); device_printf(dev, "| DUMPING HDA NODES |\n"); device_printf(dev, "+-------------------+\n"); hdaa_dump_nodes(devinfo); ); HDA_BOOTHVERBOSE( device_printf(dev, "\n"); device_printf(dev, "+------------------------+\n"); device_printf(dev, "| DUMPING HDA AMPLIFIERS |\n"); device_printf(dev, "+------------------------+\n"); device_printf(dev, "\n"); i = 0; while ((ctl = hdaa_audio_ctl_each(devinfo, &i)) != NULL) { device_printf(dev, "%3d: nid %3d %s (%s) index %d", i, (ctl->widget != NULL) ? ctl->widget->nid : -1, (ctl->ndir == HDAA_CTL_IN)?"in ":"out", (ctl->dir == HDAA_CTL_IN)?"in ":"out", ctl->index); if (ctl->childwidget != NULL) printf(" cnid %3d", ctl->childwidget->nid); else printf(" "); printf(" ossmask=0x%08x\n", ctl->ossmask); device_printf(dev, " mute: %d step: %3d size: %3d off: %3d%s\n", ctl->mute, ctl->step, ctl->size, ctl->offset, (ctl->enable == 0) ? " [DISABLED]" : ((ctl->ossmask == 0) ? " [UNUSED]" : "")); } ); HDA_BOOTVERBOSE( device_printf(dev, "\n"); ); } static void hdaa_unconfigure(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_widget *w; int i, j; HDA_BOOTHVERBOSE( device_printf(dev, "Pin sense deinit...\n"); ); hdaa_sense_deinit(devinfo); free(devinfo->ctl, M_HDAA); devinfo->ctl = NULL; devinfo->ctlcnt = 0; free(devinfo->as, M_HDAA); devinfo->as = NULL; devinfo->ascnt = 0; free(devinfo->devs, M_HDAA); devinfo->devs = NULL; devinfo->num_devs = 0; free(devinfo->chans, M_HDAA); devinfo->chans = NULL; devinfo->num_chans = 0; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL) continue; w->enable = 1; w->selconn = -1; w->pflags = 0; w->bindas = -1; w->bindseqmask = 0; w->ossdev = -1; w->ossmask = 0; for (j = 0; j < w->nconns; j++) w->connsenable[j] = 1; if (w->type == HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) w->wclass.pin.config = w->wclass.pin.newconf; if (w->eld != NULL) { w->eld_len = 0; free(w->eld, M_HDAA); w->eld = NULL; } } } static int hdaa_sysctl_gpi_state(SYSCTL_HANDLER_ARGS) { struct hdaa_devinfo *devinfo = oidp->oid_arg1; device_t dev = devinfo->dev; char buf[256]; int n = 0, i, numgpi; uint32_t data = 0; buf[0] = 0; hdaa_lock(devinfo); numgpi = HDA_PARAM_GPIO_COUNT_NUM_GPI(devinfo->gpio_cap); if (numgpi > 0) { data = hda_command(dev, HDA_CMD_GET_GPI_DATA(0, devinfo->nid)); } hdaa_unlock(devinfo); for (i = 0; i < numgpi; i++) { n += snprintf(buf + n, sizeof(buf) - n, "%s%d=%d", n != 0 ? " " : "", i, ((data >> i) & 1)); } return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int hdaa_sysctl_gpio_state(SYSCTL_HANDLER_ARGS) { struct hdaa_devinfo *devinfo = oidp->oid_arg1; device_t dev = devinfo->dev; char buf[256]; int n = 0, i, numgpio; uint32_t data = 0, enable = 0, dir = 0; buf[0] = 0; hdaa_lock(devinfo); numgpio = HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap); if (numgpio > 0) { data = hda_command(dev, HDA_CMD_GET_GPIO_DATA(0, devinfo->nid)); enable = hda_command(dev, HDA_CMD_GET_GPIO_ENABLE_MASK(0, devinfo->nid)); dir = hda_command(dev, HDA_CMD_GET_GPIO_DIRECTION(0, devinfo->nid)); } hdaa_unlock(devinfo); for (i = 0; i < numgpio; i++) { n += snprintf(buf + n, sizeof(buf) - n, "%s%d=", n != 0 ? " " : "", i); if ((enable & (1 << i)) == 0) { n += snprintf(buf + n, sizeof(buf) - n, "disabled"); continue; } n += snprintf(buf + n, sizeof(buf) - n, "%sput(%d)", ((dir >> i) & 1) ? "out" : "in", ((data >> i) & 1)); } return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int hdaa_sysctl_gpio_config(SYSCTL_HANDLER_ARGS) { struct hdaa_devinfo *devinfo = oidp->oid_arg1; char buf[256]; int error, n = 0, i, numgpio; uint32_t gpio, x; gpio = devinfo->newgpio; numgpio = HDA_PARAM_GPIO_COUNT_NUM_GPIO(devinfo->gpio_cap); buf[0] = 0; for (i = 0; i < numgpio; i++) { x = (gpio & HDAA_GPIO_MASK(i)) >> HDAA_GPIO_SHIFT(i); n += snprintf(buf + n, sizeof(buf) - n, "%s%d=%s", n != 0 ? " " : "", i, HDA_GPIO_ACTIONS[x]); } error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); if (strncmp(buf, "0x", 2) == 0) gpio = strtol(buf + 2, NULL, 16); else gpio = hdaa_gpio_patch(gpio, buf); hdaa_lock(devinfo); devinfo->newgpio = devinfo->gpio = gpio; hdaa_gpio_commit(devinfo); hdaa_unlock(devinfo); return (0); } static int hdaa_sysctl_gpo_state(SYSCTL_HANDLER_ARGS) { struct hdaa_devinfo *devinfo = oidp->oid_arg1; device_t dev = devinfo->dev; char buf[256]; int n = 0, i, numgpo; uint32_t data = 0; buf[0] = 0; hdaa_lock(devinfo); numgpo = HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap); if (numgpo > 0) { data = hda_command(dev, HDA_CMD_GET_GPO_DATA(0, devinfo->nid)); } hdaa_unlock(devinfo); for (i = 0; i < numgpo; i++) { n += snprintf(buf + n, sizeof(buf) - n, "%s%d=%d", n != 0 ? " " : "", i, ((data >> i) & 1)); } return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int hdaa_sysctl_gpo_config(SYSCTL_HANDLER_ARGS) { struct hdaa_devinfo *devinfo = oidp->oid_arg1; char buf[256]; int error, n = 0, i, numgpo; uint32_t gpo, x; gpo = devinfo->newgpo; numgpo = HDA_PARAM_GPIO_COUNT_NUM_GPO(devinfo->gpio_cap); buf[0] = 0; for (i = 0; i < numgpo; i++) { x = (gpo & HDAA_GPIO_MASK(i)) >> HDAA_GPIO_SHIFT(i); n += snprintf(buf + n, sizeof(buf) - n, "%s%d=%s", n != 0 ? " " : "", i, HDA_GPIO_ACTIONS[x]); } error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); if (strncmp(buf, "0x", 2) == 0) gpo = strtol(buf + 2, NULL, 16); else gpo = hdaa_gpio_patch(gpo, buf); hdaa_lock(devinfo); devinfo->newgpo = devinfo->gpo = gpo; hdaa_gpo_commit(devinfo); hdaa_unlock(devinfo); return (0); } static int hdaa_sysctl_reconfig(SYSCTL_HANDLER_ARGS) { device_t dev; struct hdaa_devinfo *devinfo; int error, val; dev = oidp->oid_arg1; devinfo = device_get_softc(dev); if (devinfo == NULL) return (EINVAL); val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL || val == 0) return (error); HDA_BOOTHVERBOSE( device_printf(dev, "Reconfiguration...\n"); ); if ((error = device_delete_children(dev)) != 0) return (error); hdaa_lock(devinfo); hdaa_unconfigure(dev); hdaa_configure(dev); hdaa_unlock(devinfo); bus_generic_attach(dev); HDA_BOOTHVERBOSE( device_printf(dev, "Reconfiguration done\n"); ); return (0); } static int hdaa_suspend(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); int i; HDA_BOOTHVERBOSE( device_printf(dev, "Suspend...\n"); ); hdaa_lock(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Stop streams...\n"); ); for (i = 0; i < devinfo->num_chans; i++) { if (devinfo->chans[i].flags & HDAA_CHN_RUNNING) { devinfo->chans[i].flags |= HDAA_CHN_SUSPEND; hdaa_channel_stop(&devinfo->chans[i]); } } HDA_BOOTHVERBOSE( device_printf(dev, "Power down FG" " nid=%d to the D3 state...\n", devinfo->nid); ); hda_command(devinfo->dev, HDA_CMD_SET_POWER_STATE(0, devinfo->nid, HDA_CMD_POWER_STATE_D3)); callout_stop(&devinfo->poll_jack); hdaa_unlock(devinfo); callout_drain(&devinfo->poll_jack); HDA_BOOTHVERBOSE( device_printf(dev, "Suspend done\n"); ); return (0); } static int hdaa_resume(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); int i; HDA_BOOTHVERBOSE( device_printf(dev, "Resume...\n"); ); hdaa_lock(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Power up audio FG nid=%d...\n", devinfo->nid); ); hdaa_powerup(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "AFG commit...\n"); ); hdaa_audio_commit(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Applying direct built-in patches...\n"); ); hdaa_patch_direct(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Pin sense init...\n"); ); hdaa_sense_init(devinfo); hdaa_unlock(devinfo); for (i = 0; i < devinfo->num_devs; i++) { struct hdaa_pcm_devinfo *pdevinfo = &devinfo->devs[i]; HDA_BOOTHVERBOSE( device_printf(pdevinfo->dev, "OSS mixer reinitialization...\n"); ); if (mixer_reinit(pdevinfo->dev) == -1) device_printf(pdevinfo->dev, "unable to reinitialize the mixer\n"); } hdaa_lock(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Start streams...\n"); ); for (i = 0; i < devinfo->num_chans; i++) { if (devinfo->chans[i].flags & HDAA_CHN_SUSPEND) { devinfo->chans[i].flags &= ~HDAA_CHN_SUSPEND; hdaa_channel_start(&devinfo->chans[i]); } } hdaa_unlock(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Resume done\n"); ); return (0); } static int hdaa_probe(device_t dev) { const char *pdesc; char buf[128]; if (hda_get_node_type(dev) != HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_AUDIO) return (ENXIO); pdesc = device_get_desc(device_get_parent(dev)); snprintf(buf, sizeof(buf), "%.*s Audio Function Group", (int)(strlen(pdesc) - 10), pdesc); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int hdaa_attach(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); uint32_t res; nid_t nid = hda_get_node_id(dev); devinfo->dev = dev; devinfo->lock = HDAC_GET_MTX(device_get_parent(dev), dev); devinfo->nid = nid; devinfo->newquirks = -1; devinfo->newgpio = -1; devinfo->newgpo = -1; callout_init(&devinfo->poll_jack, CALLOUT_MPSAFE); devinfo->poll_ival = hz; hdaa_lock(devinfo); res = hda_command(dev, HDA_CMD_GET_PARAMETER(0 , nid, HDA_PARAM_SUB_NODE_COUNT)); hdaa_unlock(devinfo); devinfo->nodecnt = HDA_PARAM_SUB_NODE_COUNT_TOTAL(res); devinfo->startnode = HDA_PARAM_SUB_NODE_COUNT_START(res); devinfo->endnode = devinfo->startnode + devinfo->nodecnt; HDA_BOOTVERBOSE( device_printf(dev, "Subsystem ID: 0x%08x\n", hda_get_subsystem_id(dev)); ); HDA_BOOTHVERBOSE( device_printf(dev, "Audio Function Group at nid=%d: %d subnodes %d-%d\n", nid, devinfo->nodecnt, devinfo->startnode, devinfo->endnode - 1); ); if (devinfo->nodecnt > 0) devinfo->widget = (struct hdaa_widget *)malloc( sizeof(*(devinfo->widget)) * devinfo->nodecnt, M_HDAA, M_WAITOK | M_ZERO); else devinfo->widget = NULL; hdaa_lock(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Powering up...\n"); ); hdaa_powerup(devinfo); HDA_BOOTHVERBOSE( device_printf(dev, "Parsing audio FG...\n"); ); hdaa_audio_parse(devinfo); HDA_BOOTVERBOSE( device_printf(dev, "Original pins configuration:\n"); hdaa_dump_pin_configs(devinfo); ); hdaa_configure(dev); hdaa_unlock(devinfo); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "config", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, &devinfo->newquirks, sizeof(&devinfo->newquirks), hdaa_sysctl_quirks, "A", "Configuration options"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "gpi_state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, devinfo, sizeof(devinfo), hdaa_sysctl_gpi_state, "A", "GPI state"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "gpio_state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, devinfo, sizeof(devinfo), hdaa_sysctl_gpio_state, "A", "GPIO state"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "gpio_config", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, devinfo, sizeof(devinfo), hdaa_sysctl_gpio_config, "A", "GPIO configuration"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "gpo_state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, devinfo, sizeof(devinfo), hdaa_sysctl_gpo_state, "A", "GPO state"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "gpo_config", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, devinfo, sizeof(devinfo), hdaa_sysctl_gpo_config, "A", "GPO configuration"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "reconfig", CTLTYPE_INT | CTLFLAG_RW, dev, sizeof(dev), hdaa_sysctl_reconfig, "I", "Reprocess configuration"); bus_generic_attach(dev); return (0); } static int hdaa_detach(device_t dev) { struct hdaa_devinfo *devinfo = device_get_softc(dev); int error; if ((error = device_delete_children(dev)) != 0) return (error); hdaa_lock(devinfo); hdaa_unconfigure(dev); devinfo->poll_ival = 0; callout_stop(&devinfo->poll_jack); hdaa_unlock(devinfo); callout_drain(&devinfo->poll_jack); free(devinfo->widget, M_HDAA); return (0); } static int hdaa_print_child(device_t dev, device_t child) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_pcm_devinfo *pdevinfo = (struct hdaa_pcm_devinfo *)device_get_ivars(child); struct hdaa_audio_as *as; int retval, first = 1, i; retval = bus_print_child_header(dev, child); retval += printf(" at nid "); if (pdevinfo->playas >= 0) { as = &devinfo->as[pdevinfo->playas]; for (i = 0; i < 16; i++) { if (as->pins[i] <= 0) continue; retval += printf("%s%d", first ? "" : ",", as->pins[i]); first = 0; } } if (pdevinfo->recas >= 0) { if (pdevinfo->playas >= 0) { retval += printf(" and "); first = 1; } as = &devinfo->as[pdevinfo->recas]; for (i = 0; i < 16; i++) { if (as->pins[i] <= 0) continue; retval += printf("%s%d", first ? "" : ",", as->pins[i]); first = 0; } } retval += bus_print_child_footer(dev, child); return (retval); } static int hdaa_child_location_str(device_t dev, device_t child, char *buf, size_t buflen) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_pcm_devinfo *pdevinfo = (struct hdaa_pcm_devinfo *)device_get_ivars(child); struct hdaa_audio_as *as; int first = 1, i, len = 0; len += snprintf(buf + len, buflen - len, "nid="); if (pdevinfo->playas >= 0) { as = &devinfo->as[pdevinfo->playas]; for (i = 0; i < 16; i++) { if (as->pins[i] <= 0) continue; len += snprintf(buf + len, buflen - len, "%s%d", first ? "" : ",", as->pins[i]); first = 0; } } if (pdevinfo->recas >= 0) { as = &devinfo->as[pdevinfo->recas]; for (i = 0; i < 16; i++) { if (as->pins[i] <= 0) continue; len += snprintf(buf + len, buflen - len, "%s%d", first ? "" : ",", as->pins[i]); first = 0; } } return (0); } static void hdaa_stream_intr(device_t dev, int dir, int stream) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_chan *ch; int i; for (i = 0; i < devinfo->num_chans; i++) { ch = &devinfo->chans[i]; if (!(ch->flags & HDAA_CHN_RUNNING)) continue; if (ch->dir == ((dir == 1) ? PCMDIR_PLAY : PCMDIR_REC) && ch->sid == stream) { hdaa_unlock(devinfo); chn_intr(ch->c); hdaa_lock(devinfo); } } } static void hdaa_unsol_intr(device_t dev, uint32_t resp) { struct hdaa_devinfo *devinfo = device_get_softc(dev); struct hdaa_widget *w; int i, tag, flags; HDA_BOOTHVERBOSE( device_printf(dev, "Unsolicited response %08x\n", resp); ); tag = resp >> 26; for (i = devinfo->startnode; i < devinfo->endnode; i++) { w = hdaa_widget_get(devinfo, i); if (w == NULL || w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; if (w->unsol != tag) continue; if (HDA_PARAM_PIN_CAP_DP(w->wclass.pin.cap) || HDA_PARAM_PIN_CAP_HDMI(w->wclass.pin.cap)) flags = resp & 0x03; else flags = 0x01; if (flags & 0x01) hdaa_presence_handler(w); if (flags & 0x02) hdaa_eld_handler(w); } } static device_method_t hdaa_methods[] = { /* device interface */ DEVMETHOD(device_probe, hdaa_probe), DEVMETHOD(device_attach, hdaa_attach), DEVMETHOD(device_detach, hdaa_detach), DEVMETHOD(device_suspend, hdaa_suspend), DEVMETHOD(device_resume, hdaa_resume), /* Bus interface */ DEVMETHOD(bus_print_child, hdaa_print_child), DEVMETHOD(bus_child_location_str, hdaa_child_location_str), DEVMETHOD(hdac_stream_intr, hdaa_stream_intr), DEVMETHOD(hdac_unsol_intr, hdaa_unsol_intr), DEVMETHOD(hdac_pindump, hdaa_pindump), DEVMETHOD_END }; static driver_t hdaa_driver = { "hdaa", hdaa_methods, sizeof(struct hdaa_devinfo), }; static devclass_t hdaa_devclass; DRIVER_MODULE(snd_hda, hdacc, hdaa_driver, hdaa_devclass, NULL, NULL); static void hdaa_chan_formula(struct hdaa_devinfo *devinfo, int asid, char *buf, int buflen) { struct hdaa_audio_as *as; int c; as = &devinfo->as[asid]; c = devinfo->chans[as->chans[0]].channels; if (c == 1) snprintf(buf, buflen, "mono"); else if (c == 2) { if (as->hpredir < 0) buf[0] = 0; else snprintf(buf, buflen, "2.0"); } else if (as->pinset == 0x0003) snprintf(buf, buflen, "3.1"); else if (as->pinset == 0x0005 || as->pinset == 0x0011) snprintf(buf, buflen, "4.0"); else if (as->pinset == 0x0007 || as->pinset == 0x0013) snprintf(buf, buflen, "5.1"); else if (as->pinset == 0x0017) snprintf(buf, buflen, "7.1"); else snprintf(buf, buflen, "%dch", c); if (as->hpredir >= 0) strlcat(buf, "+HP", buflen); } static int hdaa_chan_type(struct hdaa_devinfo *devinfo, int asid) { struct hdaa_audio_as *as; struct hdaa_widget *w; int i, t = -1, t1; as = &devinfo->as[asid]; for (i = 0; i < 16; i++) { w = hdaa_widget_get(devinfo, as->pins[i]); if (w == NULL || w->enable == 0 || w->type != HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX) continue; t1 = HDA_CONFIG_DEFAULTCONF_DEVICE(w->wclass.pin.config); if (t == -1) t = t1; else if (t != t1) { t = -2; break; } } return (t); } static int hdaa_sysctl_32bit(SYSCTL_HANDLER_ARGS) { struct hdaa_audio_as *as = (struct hdaa_audio_as *)oidp->oid_arg1; struct hdaa_pcm_devinfo *pdevinfo = as->pdevinfo; struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_chan *ch; int error, val, i; uint32_t pcmcap; ch = &devinfo->chans[as->chans[0]]; val = (ch->bit32 == 4) ? 32 : ((ch->bit32 == 3) ? 24 : ((ch->bit32 == 2) ? 20 : 0)); error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); pcmcap = ch->supp_pcm_size_rate; if (val == 32 && HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT(pcmcap)) ch->bit32 = 4; else if (val == 24 && HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT(pcmcap)) ch->bit32 = 3; else if (val == 20 && HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT(pcmcap)) ch->bit32 = 2; else return (EINVAL); for (i = 1; i < as->num_chans; i++) devinfo->chans[as->chans[i]].bit32 = ch->bit32; return (0); } static int hdaa_pcm_probe(device_t dev) { struct hdaa_pcm_devinfo *pdevinfo = (struct hdaa_pcm_devinfo *)device_get_ivars(dev); struct hdaa_devinfo *devinfo = pdevinfo->devinfo; const char *pdesc; char chans1[8], chans2[8]; char buf[128]; int loc1, loc2, t1, t2; if (pdevinfo->playas >= 0) loc1 = devinfo->as[pdevinfo->playas].location; else loc1 = devinfo->as[pdevinfo->recas].location; if (pdevinfo->recas >= 0) loc2 = devinfo->as[pdevinfo->recas].location; else loc2 = loc1; if (loc1 != loc2) loc1 = -2; if (loc1 >= 0 && HDA_LOCS[loc1][0] == '0') loc1 = -2; chans1[0] = 0; chans2[0] = 0; t1 = t2 = -1; if (pdevinfo->playas >= 0) { hdaa_chan_formula(devinfo, pdevinfo->playas, chans1, sizeof(chans1)); t1 = hdaa_chan_type(devinfo, pdevinfo->playas); } if (pdevinfo->recas >= 0) { hdaa_chan_formula(devinfo, pdevinfo->recas, chans2, sizeof(chans2)); t2 = hdaa_chan_type(devinfo, pdevinfo->recas); } if (chans1[0] != 0 || chans2[0] != 0) { if (chans1[0] == 0 && pdevinfo->playas >= 0) snprintf(chans1, sizeof(chans1), "2.0"); else if (chans2[0] == 0 && pdevinfo->recas >= 0) snprintf(chans2, sizeof(chans2), "2.0"); if (strcmp(chans1, chans2) == 0) chans2[0] = 0; } if (t1 == -1) t1 = t2; else if (t2 == -1) t2 = t1; if (t1 != t2) t1 = -2; if (pdevinfo->digital) t1 = -2; pdesc = device_get_desc(device_get_parent(dev)); snprintf(buf, sizeof(buf), "%.*s (%s%s%s%s%s%s%s%s%s)", (int)(strlen(pdesc) - 21), pdesc, loc1 >= 0 ? HDA_LOCS[loc1] : "", loc1 >= 0 ? " " : "", (pdevinfo->digital == 0x7)?"HDMI/DP": ((pdevinfo->digital == 0x5)?"DisplayPort": ((pdevinfo->digital == 0x3)?"HDMI": ((pdevinfo->digital)?"Digital":"Analog"))), chans1[0] ? " " : "", chans1, chans2[0] ? "/" : "", chans2, t1 >= 0 ? " " : "", t1 >= 0 ? HDA_DEVS[t1] : ""); device_set_desc_copy(dev, buf); return (BUS_PROBE_SPECIFIC); } static int hdaa_pcm_attach(device_t dev) { struct hdaa_pcm_devinfo *pdevinfo = (struct hdaa_pcm_devinfo *)device_get_ivars(dev); struct hdaa_devinfo *devinfo = pdevinfo->devinfo; struct hdaa_audio_as *as; struct snddev_info *d; char status[SND_STATUSLEN]; int i; pdevinfo->chan_size = pcm_getbuffersize(dev, HDA_BUFSZ_MIN, HDA_BUFSZ_DEFAULT, HDA_BUFSZ_MAX); HDA_BOOTVERBOSE( device_printf(dev, "+--------------------------------------+\n"); device_printf(dev, "| DUMPING PCM Playback/Record Channels |\n"); device_printf(dev, "+--------------------------------------+\n"); hdaa_dump_pcmchannels(pdevinfo); device_printf(dev, "\n"); device_printf(dev, "+-------------------------------+\n"); device_printf(dev, "| DUMPING Playback/Record Paths |\n"); device_printf(dev, "+-------------------------------+\n"); hdaa_dump_dac(pdevinfo); hdaa_dump_adc(pdevinfo); hdaa_dump_mix(pdevinfo); device_printf(dev, "\n"); device_printf(dev, "+-------------------------+\n"); device_printf(dev, "| DUMPING Volume Controls |\n"); device_printf(dev, "+-------------------------+\n"); hdaa_dump_ctls(pdevinfo, "Master Volume", SOUND_MASK_VOLUME); hdaa_dump_ctls(pdevinfo, "PCM Volume", SOUND_MASK_PCM); hdaa_dump_ctls(pdevinfo, "CD Volume", SOUND_MASK_CD); hdaa_dump_ctls(pdevinfo, "Microphone Volume", SOUND_MASK_MIC); hdaa_dump_ctls(pdevinfo, "Microphone2 Volume", SOUND_MASK_MONITOR); hdaa_dump_ctls(pdevinfo, "Line-in Volume", SOUND_MASK_LINE); hdaa_dump_ctls(pdevinfo, "Speaker/Beep Volume", SOUND_MASK_SPEAKER); hdaa_dump_ctls(pdevinfo, "Recording Level", SOUND_MASK_RECLEV); hdaa_dump_ctls(pdevinfo, "Input Mix Level", SOUND_MASK_IMIX); hdaa_dump_ctls(pdevinfo, "Input Monitoring Level", SOUND_MASK_IGAIN); hdaa_dump_ctls(pdevinfo, NULL, 0); device_printf(dev, "\n"); ); if (resource_int_value(device_get_name(dev), device_get_unit(dev), "blocksize", &i) == 0 && i > 0) { i &= HDA_BLK_ALIGN; if (i < HDA_BLK_MIN) i = HDA_BLK_MIN; pdevinfo->chan_blkcnt = pdevinfo->chan_size / i; i = 0; while (pdevinfo->chan_blkcnt >> i) i++; pdevinfo->chan_blkcnt = 1 << (i - 1); if (pdevinfo->chan_blkcnt < HDA_BDL_MIN) pdevinfo->chan_blkcnt = HDA_BDL_MIN; else if (pdevinfo->chan_blkcnt > HDA_BDL_MAX) pdevinfo->chan_blkcnt = HDA_BDL_MAX; } else pdevinfo->chan_blkcnt = HDA_BDL_DEFAULT; /* * We don't register interrupt handler with snd_setup_intr * in pcm device. Mark pcm device as MPSAFE manually. */ pcm_setflags(dev, pcm_getflags(dev) | SD_F_MPSAFE); HDA_BOOTHVERBOSE( device_printf(dev, "OSS mixer initialization...\n"); ); if (mixer_init(dev, &hdaa_audio_ctl_ossmixer_class, pdevinfo) != 0) device_printf(dev, "Can't register mixer\n"); HDA_BOOTHVERBOSE( device_printf(dev, "Registering PCM channels...\n"); ); if (pcm_register(dev, pdevinfo, (pdevinfo->playas >= 0)?1:0, (pdevinfo->recas >= 0)?1:0) != 0) device_printf(dev, "Can't register PCM\n"); pdevinfo->registered++; d = device_get_softc(dev); if (pdevinfo->playas >= 0) { as = &devinfo->as[pdevinfo->playas]; for (i = 0; i < as->num_chans; i++) pcm_addchan(dev, PCMDIR_PLAY, &hdaa_channel_class, &devinfo->chans[as->chans[i]]); SYSCTL_ADD_PROC(&d->play_sysctl_ctx, SYSCTL_CHILDREN(d->play_sysctl_tree), OID_AUTO, "32bit", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, as, sizeof(as), hdaa_sysctl_32bit, "I", "Resolution of 32bit samples (20/24/32bit)"); } if (pdevinfo->recas >= 0) { as = &devinfo->as[pdevinfo->recas]; for (i = 0; i < as->num_chans; i++) pcm_addchan(dev, PCMDIR_REC, &hdaa_channel_class, &devinfo->chans[as->chans[i]]); SYSCTL_ADD_PROC(&d->rec_sysctl_ctx, SYSCTL_CHILDREN(d->rec_sysctl_tree), OID_AUTO, "32bit", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, as, sizeof(as), hdaa_sysctl_32bit, "I", "Resolution of 32bit samples (20/24/32bit)"); pdevinfo->autorecsrc = 2; resource_int_value(device_get_name(dev), device_get_unit(dev), "rec.autosrc", &pdevinfo->autorecsrc); SYSCTL_ADD_INT(&d->rec_sysctl_ctx, SYSCTL_CHILDREN(d->rec_sysctl_tree), OID_AUTO, - "autosrc", CTLTYPE_INT | CTLFLAG_RW, + "autosrc", CTLFLAG_RW, &pdevinfo->autorecsrc, 0, "Automatic recording source selection"); } if (pdevinfo->mixer != NULL) { hdaa_audio_ctl_set_defaults(pdevinfo); if (pdevinfo->recas >= 0) { as = &devinfo->as[pdevinfo->recas]; hdaa_lock(devinfo); hdaa_autorecsrc_handler(as, NULL); hdaa_unlock(devinfo); } } snprintf(status, SND_STATUSLEN, "on %s %s", device_get_nameunit(device_get_parent(dev)), PCM_KLDSTRING(snd_hda)); pcm_setstatus(dev, status); return (0); } static int hdaa_pcm_detach(device_t dev) { struct hdaa_pcm_devinfo *pdevinfo = (struct hdaa_pcm_devinfo *)device_get_ivars(dev); int err; if (pdevinfo->registered > 0) { err = pcm_unregister(dev); if (err != 0) return (err); } return (0); } static device_method_t hdaa_pcm_methods[] = { /* device interface */ DEVMETHOD(device_probe, hdaa_pcm_probe), DEVMETHOD(device_attach, hdaa_pcm_attach), DEVMETHOD(device_detach, hdaa_pcm_detach), DEVMETHOD_END }; static driver_t hdaa_pcm_driver = { "pcm", hdaa_pcm_methods, PCM_SOFTC_SIZE, }; DRIVER_MODULE(snd_hda_pcm, hdaa, hdaa_pcm_driver, pcm_devclass, NULL, NULL); MODULE_DEPEND(snd_hda, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER); MODULE_VERSION(snd_hda, 1); Index: stable/9/sys/dev/usb/wlan/if_run.c =================================================================== --- stable/9/sys/dev/usb/wlan/if_run.c (revision 273911) +++ stable/9/sys/dev/usb/wlan/if_run.c (revision 273912) Property changes on: stable/9/sys/dev/usb/wlan/if_run.c ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev/usb/wlan/if_run.c:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/usb/wlan/if_runreg.h =================================================================== --- stable/9/sys/dev/usb/wlan/if_runreg.h (revision 273911) +++ stable/9/sys/dev/usb/wlan/if_runreg.h (revision 273912) Property changes on: stable/9/sys/dev/usb/wlan/if_runreg.h ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev/usb/wlan/if_runreg.h:r263710,273377-273378,273423,273455 Index: stable/9/sys/dev/vxge/vxge.c =================================================================== --- stable/9/sys/dev/vxge/vxge.c (revision 273911) +++ stable/9/sys/dev/vxge/vxge.c (revision 273912) @@ -1,4201 +1,4201 @@ /*- * Copyright(c) 2002-2011 Exar Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification are permitted provided the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Exar Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ #include static int vxge_pci_bd_no = -1; static u32 vxge_drv_copyright = 0; static u32 vxge_dev_ref_count = 0; static u32 vxge_dev_req_reboot = 0; static int vpath_selector[VXGE_HAL_MAX_VIRTUAL_PATHS] = \ {0, 1, 3, 3, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15, 31}; /* * vxge_probe * Probes for x3100 devices */ int vxge_probe(device_t ndev) { int err = ENXIO; u16 pci_bd_no = 0; u16 pci_vendor_id = 0; u16 pci_device_id = 0; char adapter_name[64]; pci_vendor_id = pci_get_vendor(ndev); if (pci_vendor_id != VXGE_PCI_VENDOR_ID) goto _exit0; pci_device_id = pci_get_device(ndev); if (pci_device_id == VXGE_PCI_DEVICE_ID_TITAN_1) { pci_bd_no = (pci_get_bus(ndev) | pci_get_slot(ndev)); snprintf(adapter_name, sizeof(adapter_name), VXGE_ADAPTER_NAME, pci_get_revid(ndev)); device_set_desc_copy(ndev, adapter_name); if (!vxge_drv_copyright) { device_printf(ndev, VXGE_COPYRIGHT); vxge_drv_copyright = 1; } if (vxge_dev_req_reboot == 0) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } else { if (pci_bd_no != vxge_pci_bd_no) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } } } _exit0: return (err); } /* * vxge_attach * Connects driver to the system if probe was success @ndev handle */ int vxge_attach(device_t ndev) { int err = 0; vxge_dev_t *vdev; vxge_hal_device_t *hldev = NULL; vxge_hal_device_attr_t device_attr; vxge_free_resources_e error_level = VXGE_FREE_NONE; vxge_hal_status_e status = VXGE_HAL_OK; /* Get per-ndev buffer */ vdev = (vxge_dev_t *) device_get_softc(ndev); if (!vdev) goto _exit0; bzero(vdev, sizeof(vxge_dev_t)); vdev->ndev = ndev; strlcpy(vdev->ndev_name, "vxge", sizeof(vdev->ndev_name)); err = vxge_driver_config(vdev); if (err != 0) goto _exit0; /* Initialize HAL driver */ status = vxge_driver_init(vdev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "Failed to initialize driver\n"); goto _exit0; } /* Enable PCI bus-master */ pci_enable_busmaster(ndev); /* Allocate resources */ err = vxge_alloc_resources(vdev); if (err != 0) { device_printf(vdev->ndev, "resource allocation failed\n"); goto _exit0; } err = vxge_device_hw_info_get(vdev); if (err != 0) { error_level = VXGE_FREE_BAR2; goto _exit0; } /* Get firmware default values for Device Configuration */ vxge_hal_device_config_default_get(vdev->device_config); /* Customize Device Configuration based on User request */ vxge_vpath_config(vdev); /* Allocate ISR resources */ err = vxge_alloc_isr_resources(vdev); if (err != 0) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "isr resource allocation failed\n"); goto _exit0; } /* HAL attributes */ device_attr.bar0 = (u8 *) vdev->pdev->bar_info[0]; device_attr.bar1 = (u8 *) vdev->pdev->bar_info[1]; device_attr.bar2 = (u8 *) vdev->pdev->bar_info[2]; device_attr.regh0 = (vxge_bus_res_t *) vdev->pdev->reg_map[0]; device_attr.regh1 = (vxge_bus_res_t *) vdev->pdev->reg_map[1]; device_attr.regh2 = (vxge_bus_res_t *) vdev->pdev->reg_map[2]; device_attr.irqh = (pci_irq_h) vdev->config.isr_info[0].irq_handle; device_attr.cfgh = vdev->pdev; device_attr.pdev = vdev->pdev; /* Initialize HAL Device */ status = vxge_hal_device_initialize((vxge_hal_device_h *) &hldev, &device_attr, vdev->device_config); if (status != VXGE_HAL_OK) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "hal device initialization failed\n"); goto _exit0; } vdev->devh = hldev; vxge_hal_device_private_set(hldev, vdev); if (vdev->is_privilaged) { err = vxge_firmware_verify(vdev); if (err != 0) { vxge_dev_req_reboot = 1; error_level = VXGE_FREE_TERMINATE_DEVICE; goto _exit0; } } /* Allocate memory for vpath */ vdev->vpaths = (vxge_vpath_t *) vxge_mem_alloc(vdev->no_of_vpath * sizeof(vxge_vpath_t)); if (vdev->vpaths == NULL) { error_level = VXGE_FREE_TERMINATE_DEVICE; device_printf(vdev->ndev, "vpath memory allocation failed\n"); goto _exit0; } vdev->no_of_func = 1; if (vdev->is_privilaged) { vxge_hal_func_mode_count(vdev->devh, vdev->config.hw_info.function_mode, &vdev->no_of_func); vxge_bw_priority_config(vdev); } /* Initialize mutexes */ vxge_mutex_init(vdev); /* Initialize Media */ vxge_media_init(vdev); err = vxge_ifp_setup(ndev); if (err != 0) { error_level = VXGE_FREE_MEDIA; device_printf(vdev->ndev, "setting up interface failed\n"); goto _exit0; } err = vxge_isr_setup(vdev); if (err != 0) { error_level = VXGE_FREE_INTERFACE; device_printf(vdev->ndev, "failed to associate interrupt handler with device\n"); goto _exit0; } vxge_device_hw_info_print(vdev); vdev->is_active = TRUE; _exit0: if (error_level) { vxge_free_resources(ndev, error_level); err = ENXIO; } return (err); } /* * vxge_detach * Detaches driver from the Kernel subsystem */ int vxge_detach(device_t ndev) { vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); if (vdev->is_active) { vdev->is_active = FALSE; vxge_stop(vdev); vxge_free_resources(ndev, VXGE_FREE_ALL); } return (0); } /* * vxge_shutdown * To shutdown device before system shutdown */ int vxge_shutdown(device_t ndev) { vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); vxge_stop(vdev); return (0); } /* * vxge_init * Initialize the interface */ void vxge_init(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; VXGE_DRV_LOCK(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_init_locked * Initialize the interface */ void vxge_init_locked(vxge_dev_t *vdev) { int i, err = EINVAL; vxge_hal_device_t *hldev = vdev->devh; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_vpath_h vpath_handle; ifnet_t ifp = vdev->ifp; /* If device is in running state, initializing is not required */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) goto _exit0; VXGE_DRV_LOCK_ASSERT(vdev); /* Opening vpaths */ err = vxge_vpath_open(vdev); if (err != 0) goto _exit1; if (vdev->config.rth_enable) { status = vxge_rth_config(vdev); if (status != VXGE_HAL_OK) goto _exit1; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* check initial mtu before enabling the device */ status = vxge_hal_device_mtu_check(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "invalid mtu size %ld specified\n", ifp->if_mtu); goto _exit1; } status = vxge_hal_vpath_mtu_set(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "setting mtu in device failed\n"); goto _exit1; } } /* Enable HAL device */ status = vxge_hal_device_enable(hldev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to enable device\n"); goto _exit1; } if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) vxge_msix_enable(vdev); /* Checksum capability */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* Enabling mcast for all vpath */ vxge_hal_vpath_mcast_enable(vpath_handle); /* Enabling bcast for all vpath */ status = vxge_hal_vpath_bcast_enable(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "can't enable bcast on vpath (%d)\n", i); } /* Enable interrupts */ vxge_hal_device_intr_enable(vdev->devh); for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; bzero(&(vdev->vpaths[i].driver_stats), sizeof(vxge_drv_stats_t)); status = vxge_hal_vpath_enable(vpath_handle); if (status != VXGE_HAL_OK) goto _exit2; } vxge_os_mdelay(1000); /* Device is initialized */ vdev->is_initialized = TRUE; /* Now inform the stack we're ready */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; goto _exit0; _exit2: vxge_hal_device_intr_disable(vdev->devh); vxge_hal_device_disable(hldev); _exit1: vxge_vpath_close(vdev); _exit0: return; } /* * vxge_driver_init * Initializes HAL driver */ vxge_hal_status_e vxge_driver_init(vxge_dev_t *vdev) { vxge_hal_uld_cbs_t uld_callbacks; vxge_hal_driver_config_t driver_config; vxge_hal_status_e status = VXGE_HAL_OK; /* Initialize HAL driver */ if (!vxge_dev_ref_count) { bzero(&uld_callbacks, sizeof(vxge_hal_uld_cbs_t)); bzero(&driver_config, sizeof(vxge_hal_driver_config_t)); uld_callbacks.link_up = vxge_link_up; uld_callbacks.link_down = vxge_link_down; uld_callbacks.crit_err = vxge_crit_error; uld_callbacks.sched_timer = NULL; uld_callbacks.xpak_alarm_log = NULL; status = vxge_hal_driver_initialize(&driver_config, &uld_callbacks); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to initialize driver\n"); goto _exit0; } } vxge_hal_driver_debug_set(VXGE_TRACE); vxge_dev_ref_count++; _exit0: return (status); } /* * vxge_driver_config */ int vxge_driver_config(vxge_dev_t *vdev) { int i, err = 0; char temp_buffer[30]; vxge_bw_info_t bw_info; VXGE_GET_PARAM("hint.vxge.0.no_of_vpath", vdev->config, no_of_vpath, VXGE_DEFAULT_USER_HARDCODED); if (vdev->config.no_of_vpath == VXGE_DEFAULT_USER_HARDCODED) vdev->config.no_of_vpath = mp_ncpus; if (vdev->config.no_of_vpath <= 0) { err = EINVAL; device_printf(vdev->ndev, "Failed to load driver, \ invalid config : \'no_of_vpath\'\n"); goto _exit0; } VXGE_GET_PARAM("hint.vxge.0.intr_coalesce", vdev->config, intr_coalesce, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.rth_enable", vdev->config, rth_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.rth_bkt_sz", vdev->config, rth_bkt_sz, VXGE_DEFAULT_RTH_BUCKET_SIZE); VXGE_GET_PARAM("hint.vxge.0.lro_enable", vdev->config, lro_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tso_enable", vdev->config, tso_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tx_steering", vdev->config, tx_steering, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.msix_enable", vdev->config, intr_mode, VXGE_HAL_INTR_MODE_MSIX); VXGE_GET_PARAM("hint.vxge.0.ifqmaxlen", vdev->config, ifq_maxlen, VXGE_DEFAULT_CONFIG_IFQ_MAXLEN); VXGE_GET_PARAM("hint.vxge.0.port_mode", vdev->config, port_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.port_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.port_mode = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.l2_switch", vdev->config, l2_switch, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.l2_switch == VXGE_DEFAULT_USER_HARDCODED) vdev->config.l2_switch = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.fw_upgrade", vdev->config, fw_option, VXGE_FW_UPGRADE_ALL); VXGE_GET_PARAM("hint.vxge.0.low_latency", vdev->config, low_latency, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.func_mode", vdev->config, function_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.function_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; if (!(is_multi_func(vdev->config.function_mode) || is_single_func(vdev->config.function_mode))) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; for (i = 0; i < VXGE_HAL_MAX_FUNCTIONS; i++) { bw_info.func_id = i; sprintf(temp_buffer, "hint.vxge.0.bandwidth_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, bandwidth, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.bandwidth == VXGE_DEFAULT_USER_HARDCODED) bw_info.bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; sprintf(temp_buffer, "hint.vxge.0.priority_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, priority, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.priority == VXGE_DEFAULT_USER_HARDCODED) bw_info.priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; vxge_os_memcpy(&vdev->config.bw_info[i], &bw_info, sizeof(vxge_bw_info_t)); } _exit0: return (err); } /* * vxge_stop */ void vxge_stop(vxge_dev_t *vdev) { VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_stop_locked * Common code for both stop and part of reset. * disables device, interrupts and closes vpaths handle */ void vxge_stop_locked(vxge_dev_t *vdev) { u64 adapter_status = 0; vxge_hal_status_e status; vxge_hal_device_t *hldev = vdev->devh; ifnet_t ifp = vdev->ifp; VXGE_DRV_LOCK_ASSERT(vdev); /* If device is not in "Running" state, return */ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; /* Set appropriate flags */ vdev->is_initialized = FALSE; hldev->link_state = VXGE_HAL_LINK_NONE; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); /* Disable interrupts */ vxge_hal_device_intr_disable(hldev); /* Disable HAL device */ status = vxge_hal_device_disable(hldev); if (status != VXGE_HAL_OK) { vxge_hal_device_status(hldev, &adapter_status); device_printf(vdev->ndev, "adapter status: 0x%llx\n", adapter_status); } /* reset vpaths */ vxge_vpath_reset(vdev); vxge_os_mdelay(1000); /* Close Vpaths */ vxge_vpath_close(vdev); } void vxge_send(ifnet_t ifp) { vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vpath = &(vdev->vpaths[0]); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (VXGE_TX_TRYLOCK(vpath)) { vxge_send_locked(ifp, vpath); VXGE_TX_UNLOCK(vpath); } } } static inline void vxge_send_locked(ifnet_t ifp, vxge_vpath_t *vpath) { mbuf_t m_head = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (vxge_xmit(ifp, vpath, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); VXGE_DRV_STATS(vpath, tx_again); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } } #if __FreeBSD_version >= 800000 int vxge_mq_send(ifnet_t ifp, mbuf_t m_head) { int i = 0, err = 0; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; if (vdev->config.tx_steering) { i = vxge_vpath_get(vdev, m_head); } else if ((m_head->m_flags & M_FLOWID) != 0) { i = m_head->m_pkthdr.flowid % vdev->no_of_vpath; } vpath = &(vdev->vpaths[i]); if (VXGE_TX_TRYLOCK(vpath)) { err = vxge_mq_send_locked(ifp, vpath, m_head); VXGE_TX_UNLOCK(vpath); } else err = drbr_enqueue(ifp, vpath->br, m_head); return (err); } static inline int vxge_mq_send_locked(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t m_head) { int err = 0; mbuf_t next = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) { err = drbr_enqueue(ifp, vpath->br, m_head); goto _exit0; } if (m_head == NULL) { next = drbr_dequeue(ifp, vpath->br); } else if (drbr_needs_enqueue(ifp, vpath->br)) { if ((err = drbr_enqueue(ifp, vpath->br, m_head)) != 0) goto _exit0; next = drbr_dequeue(ifp, vpath->br); } else next = m_head; /* Process the queue */ while (next != NULL) { if ((err = vxge_xmit(ifp, vpath, &next)) != 0) { if (next == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; err = drbr_enqueue(ifp, vpath->br, next); VXGE_DRV_STATS(vpath, tx_again); break; } ifp->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) ifp->if_omcasts++; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; next = drbr_dequeue(ifp, vpath->br); } _exit0: return (err); } void vxge_mq_qflush(ifnet_t ifp) { int i; mbuf_t m_head; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (!vpath->handle) continue; VXGE_TX_LOCK(vpath); while ((m_head = buf_ring_dequeue_sc(vpath->br)) != NULL) vxge_free_packet(m_head); VXGE_TX_UNLOCK(vpath); } if_qflush(ifp); } #endif static inline int vxge_xmit(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t *m_headp) { int err, num_segs = 0; u32 txdl_avail, dma_index, tagged = 0; dma_addr_t dma_addr; bus_size_t dma_sizes; void *dtr_priv; vxge_txdl_priv_t *txdl_priv; vxge_hal_txdl_h txdlh; vxge_hal_status_e status; vxge_dev_t *vdev = vpath->vdev; VXGE_DRV_STATS(vpath, tx_xmit); txdl_avail = vxge_hal_fifo_free_txdl_count_get(vpath->handle); if (txdl_avail < VXGE_TX_LOW_THRESHOLD) { VXGE_DRV_STATS(vpath, tx_low_dtr_cnt); err = ENOBUFS; goto _exit0; } /* Reserve descriptors */ status = vxge_hal_fifo_txdl_reserve(vpath->handle, &txdlh, &dtr_priv); if (status != VXGE_HAL_OK) { VXGE_DRV_STATS(vpath, tx_reserve_failed); err = ENOBUFS; goto _exit0; } /* Update Tx private structure for this descriptor */ txdl_priv = (vxge_txdl_priv_t *) dtr_priv; /* * Map the packet for DMA. * Returns number of segments through num_segs. */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_tx, txdl_priv->dma_map, m_headp, txdl_priv->dma_buffers, &num_segs); if (vpath->driver_stats.tx_max_frags < num_segs) vpath->driver_stats.tx_max_frags = num_segs; if (err == ENOMEM) { VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } else if (err != 0) { vxge_free_packet(*m_headp); VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } txdl_priv->mbuf_pkt = *m_headp; /* Set VLAN tag in descriptor only if this packet has it */ if ((*m_headp)->m_flags & M_VLANTAG) vxge_hal_fifo_txdl_vlan_set(txdlh, (*m_headp)->m_pkthdr.ether_vtag); /* Set descriptor buffer for header and each fragment/segment */ for (dma_index = 0; dma_index < num_segs; dma_index++) { dma_sizes = txdl_priv->dma_buffers[dma_index].ds_len; dma_addr = htole64(txdl_priv->dma_buffers[dma_index].ds_addr); vxge_hal_fifo_txdl_buffer_set(vpath->handle, txdlh, dma_index, dma_addr, dma_sizes); } /* Pre-write Sync of mapping */ bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_PREWRITE); if ((*m_headp)->m_pkthdr.csum_flags & CSUM_TSO) { if ((*m_headp)->m_pkthdr.tso_segsz) { VXGE_DRV_STATS(vpath, tx_tso); vxge_hal_fifo_txdl_lso_set(txdlh, VXGE_HAL_FIFO_LSO_FRM_ENCAP_AUTO, (*m_headp)->m_pkthdr.tso_segsz); } } /* Checksum */ if (ifp->if_hwassist > 0) { vxge_hal_fifo_txdl_cksum_set_bits(txdlh, VXGE_HAL_FIFO_TXD_TX_CKO_IPV4_EN | VXGE_HAL_FIFO_TXD_TX_CKO_TCP_EN | VXGE_HAL_FIFO_TXD_TX_CKO_UDP_EN); } if ((vxge_hal_device_check_id(vdev->devh) == VXGE_HAL_CARD_TITAN_1A) && (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0))) tagged = 1; vxge_hal_fifo_txdl_post(vpath->handle, txdlh, tagged); VXGE_DRV_STATS(vpath, tx_posted); _exit0: return (err); } /* * vxge_tx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_tx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; err = bus_dmamap_create(vpath->dma_tag_tx, BUS_DMA_NOWAIT, &txdl_priv->dma_map); return ((err == 0) ? VXGE_HAL_OK : VXGE_HAL_FAIL); } /* * vxge_tx_compl * If the interrupt is due to Tx completion, free the sent buffer */ vxge_hal_status_e vxge_tx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_fifo_tcode_e t_code, void *userdata) { vxge_hal_status_e status = VXGE_HAL_OK; vxge_txdl_priv_t *txdl_priv; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; VXGE_TX_LOCK(vpath); /* * For each completed descriptor * Get private structure, free buffer, do unmapping, and free descriptor */ do { VXGE_DRV_STATS(vpath, tx_compl); if (t_code != VXGE_HAL_FIFO_T_CODE_OK) { device_printf(vdev->ndev, "tx transfer code %d\n", t_code); ifp->if_oerrors++; VXGE_DRV_STATS(vpath, tx_tcode); vxge_hal_fifo_handle_tcode(vpath_handle, txdlh, t_code); } ifp->if_opackets++; txdl_priv = (vxge_txdl_priv_t *) dtr_priv; bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } while (vxge_hal_fifo_txdl_next_completed(vpath_handle, &txdlh, &dtr_priv, &t_code) == VXGE_HAL_OK); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; VXGE_TX_UNLOCK(vpath); return (status); } /* ARGSUSED */ void vxge_tx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_txdl_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; if (state != VXGE_HAL_TXDL_STATE_POSTED) return; if (txdl_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } /* * vxge_rx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_rx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Create DMA map for these descriptors */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &rxd_priv->dma_map); if (err == 0) { if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); status = VXGE_HAL_FAIL; } } return (status); } /* * vxge_rx_compl */ vxge_hal_status_e vxge_rx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u8 t_code, void *userdata) { mbuf_t mbuf_up; vxge_rxd_priv_t *rxd_priv; vxge_hal_ring_rxd_info_t ext_info; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; struct lro_entry *queued = NULL; struct lro_ctrl *lro = &vpath->lro; /* get the interface pointer */ ifnet_t ifp = vdev->ifp; do { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { vxge_hal_ring_rxd_post(vpath_handle, rxdh); status = VXGE_HAL_FAIL; break; } VXGE_DRV_STATS(vpath, rx_compl); rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Gets details of mbuf i.e., packet length */ vxge_rx_rxd_1b_get(vpath, rxdh, dtr_priv); /* * Prepare one buffer to send it to upper layer Since upper * layer frees the buffer do not use rxd_priv->mbuf_pkt. * Meanwhile prepare a new buffer, do mapping, use with the * current descriptor and post descriptor back to ring vpath */ mbuf_up = rxd_priv->mbuf_pkt; if (t_code != VXGE_HAL_RING_RXD_T_CODE_OK) { ifp->if_ierrors++; VXGE_DRV_STATS(vpath, rx_tcode); status = vxge_hal_ring_handle_tcode(vpath_handle, rxdh, t_code); /* * If transfer code is not for unknown protocols and * vxge_hal_device_handle_tcode is NOT returned * VXGE_HAL_OK * drop this packet and increment rx_tcode stats */ if ((status != VXGE_HAL_OK) && (t_code != VXGE_HAL_RING_T_CODE_L3_PKT_ERR)) { vxge_free_packet(mbuf_up); vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } } if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { /* * If unable to allocate buffer, post descriptor back * to vpath for future processing of same packet. */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } /* Get the extended information */ vxge_hal_ring_rxd_1b_info_get(vpath_handle, rxdh, &ext_info); /* post descriptor with newly allocated mbuf back to vpath */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); vpath->rxd_posted++; if (vpath->rxd_posted % VXGE_RXD_REPLENISH_COUNT == 0) vxge_hal_ring_rxd_post_post_db(vpath_handle); /* * Set successfully computed checksums in the mbuf. * Leave the rest to the stack to be reverified. */ vxge_rx_checksum(ext_info, mbuf_up); #if __FreeBSD_version >= 800000 mbuf_up->m_flags |= M_FLOWID; mbuf_up->m_pkthdr.flowid = vpath->vp_index; #endif /* Post-Read sync for buffers */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); vxge_rx_input(ifp, mbuf_up, vpath); } while (vxge_hal_ring_rxd_next_completed(vpath_handle, &rxdh, &dtr_priv, &t_code) == VXGE_HAL_OK); /* Flush any outstanding LRO work */ if (vpath->lro_enable && vpath->lro.lro_cnt) { while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } } return (status); } static inline void vxge_rx_input(ifnet_t ifp, mbuf_t mbuf_up, vxge_vpath_t *vpath) { if (vpath->lro_enable && vpath->lro.lro_cnt) { if (tcp_lro_rx(&vpath->lro, mbuf_up, 0) == 0) return; } (*ifp->if_input) (ifp, mbuf_up); } static inline void vxge_rx_checksum(vxge_hal_ring_rxd_info_t ext_info, mbuf_t mbuf_up) { if (!(ext_info.proto & VXGE_HAL_FRAME_PROTO_IP_FRAG) && (ext_info.proto & VXGE_HAL_FRAME_PROTO_TCP_OR_UDP) && ext_info.l3_cksum_valid && ext_info.l4_cksum_valid) { mbuf_up->m_pkthdr.csum_data = htons(0xffff); mbuf_up->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mbuf_up->m_pkthdr.csum_flags |= CSUM_IP_VALID; mbuf_up->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } else { if (ext_info.vlan) { mbuf_up->m_pkthdr.ether_vtag = ext_info.vlan; mbuf_up->m_flags |= M_VLANTAG; } } } /* * vxge_rx_term During unload terminate and free all descriptors * @vpath_handle Rx vpath Handle @rxdh Rx Descriptor Handle @state Descriptor * State @userdata Per-adapter Data @reopen vpath open/reopen option */ /* ARGSUSED */ void vxge_rx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, vxge_hal_rxd_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; if (state != VXGE_HAL_RXD_STATE_POSTED) return; if (rxd_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); vxge_free_packet(rxd_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_ring_rxd_free(vpath_handle, rxdh); } /* * vxge_rx_rxd_1b_get * Get descriptors of packet to send up */ void vxge_rx_rxd_1b_get(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; mbuf_t mbuf_up = rxd_priv->mbuf_pkt; /* Retrieve data from completed descriptor */ vxge_hal_ring_rxd_1b_get(vpath->handle, rxdh, &rxd_priv->dma_addr[0], (u32 *) &rxd_priv->dma_sizes[0]); /* Update newly created buffer to be sent up with packet length */ mbuf_up->m_len = rxd_priv->dma_sizes[0]; mbuf_up->m_pkthdr.len = rxd_priv->dma_sizes[0]; mbuf_up->m_next = NULL; } /* * vxge_rx_rxd_1b_set * Allocates new mbufs to be placed into descriptors */ int vxge_rx_rxd_1b_set(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { int num_segs, err = 0; mbuf_t mbuf_pkt; bus_dmamap_t dma_map; bus_dma_segment_t dma_buffers[1]; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; vxge_dev_t *vdev = vpath->vdev; mbuf_pkt = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, vdev->rx_mbuf_sz); if (!mbuf_pkt) { err = ENOBUFS; VXGE_DRV_STATS(vpath, rx_no_buf); device_printf(vdev->ndev, "out of memory to allocate mbuf\n"); goto _exit0; } /* Update mbuf's length, packet length and receive interface */ mbuf_pkt->m_len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.rcvif = vdev->ifp; /* Load DMA map */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_rx, vpath->extra_dma_map, &mbuf_pkt, dma_buffers, &num_segs); if (err != 0) { VXGE_DRV_STATS(vpath, rx_map_fail); vxge_free_packet(mbuf_pkt); goto _exit0; } /* Unload DMA map of mbuf in current descriptor */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); /* Update descriptor private data */ dma_map = rxd_priv->dma_map; rxd_priv->mbuf_pkt = mbuf_pkt; rxd_priv->dma_addr[0] = htole64(dma_buffers->ds_addr); rxd_priv->dma_map = vpath->extra_dma_map; vpath->extra_dma_map = dma_map; /* Pre-Read/Write sync */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Set descriptor buffer */ vxge_hal_ring_rxd_1b_set(rxdh, rxd_priv->dma_addr[0], vdev->rx_mbuf_sz); _exit0: return (err); } /* * vxge_link_up * Callback for Link-up indication from HAL */ /* ARGSUSED */ void vxge_link_up(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_hal_device_hw_info_t *hw_info; vxge_dev_t *vdev = (vxge_dev_t *) userdata; hw_info = &vdev->config.hw_info; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_set(vpath->handle); vxge_hal_vpath_rti_ci_set(vpath->handle); } } if (vdev->is_privilaged && (hw_info->ports > 1)) { vxge_active_port_update(vdev); device_printf(vdev->ndev, "Active Port : %lld\n", vdev->active_port); } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } /* * vxge_link_down * Callback for Link-down indication from HAL */ /* ARGSUSED */ void vxge_link_down(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_reset(vpath->handle); vxge_hal_vpath_rti_ci_reset(vpath->handle); } } ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); } /* * vxge_reset */ void vxge_reset(vxge_dev_t *vdev) { if (!vdev->is_initialized) return; VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_crit_error * Callback for Critical error indication from HAL */ /* ARGSUSED */ void vxge_crit_error(vxge_hal_device_h devh, void *userdata, vxge_hal_event_e type, u64 serr_data) { vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; switch (type) { case VXGE_HAL_EVENT_SERR: case VXGE_HAL_EVENT_KDFCCTL: case VXGE_HAL_EVENT_CRITICAL: vxge_hal_device_intr_disable(vdev->devh); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); break; default: break; } } /* * vxge_ifp_setup */ int vxge_ifp_setup(device_t ndev) { ifnet_t ifp; int i, j, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); for (i = 0, j = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; if (j >= vdev->no_of_vpath) break; vdev->vpaths[j].vp_id = i; vdev->vpaths[j].vp_index = j; vdev->vpaths[j].vdev = vdev; vdev->vpaths[j].is_configured = TRUE; vxge_os_memcpy((u8 *) vdev->vpaths[j].mac_addr, (u8 *) (vdev->config.hw_info.mac_addrs[i]), (size_t) ETHER_ADDR_LEN); j++; } /* Get interface ifnet structure for this Ether device */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(vdev->ndev, "memory allocation for ifnet failed\n"); err = ENXIO; goto _exit0; } vdev->ifp = ifp; /* Initialize interface ifnet structure */ if_initname(ifp, device_get_name(ndev), device_get_unit(ndev)); ifp->if_mtu = ETHERMTU; ifp->if_baudrate = VXGE_BAUDRATE; ifp->if_init = vxge_init; ifp->if_softc = vdev; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = vxge_ioctl; ifp->if_start = vxge_send; #if __FreeBSD_version >= 800000 ifp->if_transmit = vxge_mq_send; ifp->if_qflush = vxge_mq_qflush; #endif ifp->if_snd.ifq_drv_maxlen = max(vdev->config.ifq_maxlen, ifqmaxlen); IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); /* IFQ_SET_READY(&ifp->if_snd); */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_JUMBO_MTU; if (vdev->config.tso_enable) vxge_tso_config(vdev); if (vdev->config.lro_enable) ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; strlcpy(vdev->ndev_name, device_get_nameunit(ndev), sizeof(vdev->ndev_name)); /* Attach the interface */ ether_ifattach(ifp, vdev->vpaths[0].mac_addr); _exit0: return (err); } /* * vxge_isr_setup * Register isr functions */ int vxge_isr_setup(vxge_dev_t *vdev) { int i, irq_rid, err = 0; vxge_vpath_t *vpath; void *isr_func_arg; void (*isr_func_ptr) (void *); switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[0].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), vxge_isr_filter, vxge_isr_line, vdev, &vdev->config.isr_info[0].irq_handle); break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { irq_rid = vdev->config.isr_info[i].irq_rid; vpath = &vdev->vpaths[irq_rid / 4]; if ((irq_rid % 4) == 2) { isr_func_ptr = vxge_isr_msix; isr_func_arg = (void *) vpath; } else if ((irq_rid % 4) == 3) { isr_func_ptr = vxge_isr_msix_alarm; isr_func_arg = (void *) vpath; } else break; err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), NULL, (void *) isr_func_ptr, (void *) isr_func_arg, &vdev->config.isr_info[i].irq_handle); if (err != 0) break; } if (err != 0) { /* Teardown interrupt handler */ while (--i > 0) bus_teardown_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } break; } return (err); } /* * vxge_isr_filter * ISR filter function - filter interrupts from other shared devices */ int vxge_isr_filter(void *handle) { u64 val64 = 0; vxge_dev_t *vdev = (vxge_dev_t *) handle; __hal_device_t *hldev = (__hal_device_t *) vdev->devh; vxge_hal_common_reg_t *common_reg = (vxge_hal_common_reg_t *) (hldev->common_reg); val64 = vxge_os_pio_mem_read64(vdev->pdev, (vdev->devh)->regh0, &common_reg->titan_general_int_status); return ((val64) ? FILTER_SCHEDULE_THREAD : FILTER_STRAY); } /* * vxge_isr_line * Interrupt service routine for Line interrupts */ void vxge_isr_line(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; vxge_hal_device_handle_irq(vdev->devh, 0); } void vxge_isr_msix(void *vpath_ptr) { u32 got_rx = 0; u32 got_tx = 0; __hal_virtualpath_t *hal_vpath; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; hal_vpath = ((__hal_vpath_handle_t *) vpath->handle)->vpath; VXGE_DRV_STATS(vpath, isr_msix); VXGE_HAL_DEVICE_STATS_SW_INFO_TRAFFIC_INTR(vdev->devh); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec); /* processing rx */ vxge_hal_vpath_poll_rx(vpath->handle, &got_rx); /* processing tx */ if (hal_vpath->vp_config->fifo.enable) { vxge_intr_coalesce_tx(vpath); vxge_hal_vpath_poll_tx(vpath->handle, &got_tx); } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); } void vxge_isr_msix_alarm(void *vpath_ptr) { int i; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; VXGE_HAL_DEVICE_STATS_SW_INFO_NOT_TRAFFIC_INTR(vdev->devh); /* Process alarms in each vpath */ for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec_alarm); status = vxge_hal_vpath_alarm_process(vpath->handle, 0); if ((status == VXGE_HAL_ERR_EVENT_SLOT_FREEZE) || (status == VXGE_HAL_ERR_EVENT_SERR)) { device_printf(vdev->ndev, "processing alarms urecoverable error %x\n", status); /* Stop the driver */ vdev->is_initialized = FALSE; break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } } /* * vxge_msix_enable */ vxge_hal_status_e vxge_msix_enable(vxge_dev_t *vdev) { int i, first_vp_id, msix_id; vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; /* * Unmasking and Setting MSIX vectors before enabling interrupts * tim[] : 0 - Tx ## 1 - Rx ## 2 - UMQ-DMQ ## 0 - BITMAP */ int tim[4] = {0, 1, 0, 0}; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = vdev->vpaths + i; first_vp_id = vdev->vpaths[0].vp_id; msix_id = vpath->vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE; tim[1] = vpath->msix_vec = msix_id + 1; vpath->msix_vec_alarm = first_vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE + VXGE_HAL_VPATH_MSIX_ALARM_ID; status = vxge_hal_vpath_mf_msix_set(vpath->handle, tim, VXGE_HAL_VPATH_MSIX_ALARM_ID); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to set msix vectors to vpath\n"); break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } return (status); } /* * vxge_media_init * Initializes, adds and sets media */ void vxge_media_init(vxge_dev_t *vdev) { ifmedia_init(&vdev->media, IFM_IMASK, vxge_media_change, vxge_media_status); /* Add supported media */ ifmedia_add(&vdev->media, IFM_ETHER | vdev->ifm_optics | IFM_FDX, 0, NULL); /* Set media */ ifmedia_add(&vdev->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vdev->media, IFM_ETHER | IFM_AUTO); } /* * vxge_media_status * Callback for interface media settings */ void vxge_media_status(ifnet_t ifp, struct ifmediareq *ifmr) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vxge_hal_device_t *hldev = vdev->devh; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* set link state */ if (vxge_hal_device_link_state_get(hldev) == VXGE_HAL_LINK_UP) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= vdev->ifm_optics | IFM_FDX; if_link_state_change(ifp, LINK_STATE_UP); } } /* * vxge_media_change * Media change driver callback */ int vxge_media_change(ifnet_t ifp) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifmedia *ifmediap = &vdev->media; return (IFM_TYPE(ifmediap->ifm_media) != IFM_ETHER ? EINVAL : 0); } /* * Allocate PCI resources */ int vxge_alloc_resources(vxge_dev_t *vdev) { int err = 0; vxge_pci_info_t *pci_info = NULL; vxge_free_resources_e error_level = VXGE_FREE_NONE; device_t ndev = vdev->ndev; /* Allocate Buffer for HAL Device Configuration */ vdev->device_config = (vxge_hal_device_config_t *) vxge_mem_alloc(sizeof(vxge_hal_device_config_t)); if (!vdev->device_config) { err = ENOMEM; error_level = VXGE_DISABLE_PCI_BUSMASTER; device_printf(vdev->ndev, "failed to allocate memory for device config\n"); goto _exit0; } pci_info = (vxge_pci_info_t *) vxge_mem_alloc(sizeof(vxge_pci_info_t)); if (!pci_info) { error_level = VXGE_FREE_DEVICE_CONFIG; err = ENOMEM; device_printf(vdev->ndev, "failed to allocate memory for pci info\n"); goto _exit0; } pci_info->ndev = ndev; vdev->pdev = pci_info; err = vxge_alloc_bar_resources(vdev, 0); if (err != 0) { error_level = VXGE_FREE_BAR0; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 1); if (err != 0) { error_level = VXGE_FREE_BAR1; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 2); if (err != 0) error_level = VXGE_FREE_BAR2; _exit0: if (error_level) vxge_free_resources(ndev, error_level); return (err); } /* * vxge_alloc_bar_resources * Allocates BAR resources */ int vxge_alloc_bar_resources(vxge_dev_t *vdev, int i) { int err = 0; int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); pci_info->bar_info[i] = bus_alloc_resource_any(vdev->ndev, SYS_RES_MEMORY, &res_id, RF_ACTIVE); if (pci_info->bar_info[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory for bus resources\n"); err = ENOMEM; goto _exit0; } pci_info->reg_map[i] = (vxge_bus_res_t *) vxge_mem_alloc(sizeof(vxge_bus_res_t)); if (pci_info->reg_map[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory bar resources\n"); err = ENOMEM; goto _exit0; } ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_tag = rman_get_bustag(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_handle = rman_get_bushandle(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bar_start_addr = pci_info->bar_info[i]; ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_res_len = rman_get_size(pci_info->bar_info[i]); _exit0: return (err); } /* * vxge_alloc_isr_resources */ int vxge_alloc_isr_resources(vxge_dev_t *vdev) { int i, err = 0, irq_rid; int msix_vec_reqd, intr_count, msix_count; int intr_mode = VXGE_HAL_INTR_MODE_IRQLINE; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { /* MSI-X messages supported by device */ intr_count = pci_msix_count(vdev->ndev); if (intr_count) { msix_vec_reqd = 4 * vdev->no_of_vpath; if (intr_count >= msix_vec_reqd) { intr_count = msix_vec_reqd; err = pci_alloc_msix(vdev->ndev, &intr_count); if (err == 0) intr_mode = VXGE_HAL_INTR_MODE_MSIX; } if ((err != 0) || (intr_count < msix_vec_reqd)) { device_printf(vdev->ndev, "Unable to allocate " "msi/x vectors switching to INTA mode\n"); } } } err = 0; vdev->intr_count = 0; vdev->config.intr_mode = intr_mode; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: vdev->config.isr_info[0].irq_rid = 0; vdev->config.isr_info[0].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[0].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[0].irq_res == NULL) { device_printf(vdev->ndev, "failed to allocate line interrupt resource\n"); err = ENOMEM; goto _exit0; } vdev->intr_count++; break; case VXGE_HAL_INTR_MODE_MSIX: msix_count = 0; for (i = 0; i < vdev->no_of_vpath; i++) { irq_rid = i * 4; vdev->config.isr_info[msix_count].irq_rid = irq_rid + 2; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); if (err != 0) break; msix_count++; } vdev->config.isr_info[msix_count].irq_rid = 3; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); break; } vdev->device_config->intr_mode = vdev->config.intr_mode; _exit0: return (err); } /* * vxge_free_resources * Undo what-all we did during load/attach */ void vxge_free_resources(device_t ndev, vxge_free_resources_e vxge_free_resource) { int i; vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); switch (vxge_free_resource) { case VXGE_FREE_ALL: for (i = 0; i < vdev->intr_count; i++) { bus_teardown_intr(ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } /* FALLTHROUGH */ case VXGE_FREE_INTERFACE: ether_ifdetach(vdev->ifp); bus_generic_detach(ndev); if_free(vdev->ifp); /* FALLTHROUGH */ case VXGE_FREE_MEDIA: ifmedia_removeall(&vdev->media); /* FALLTHROUGH */ case VXGE_FREE_MUTEX: vxge_mutex_destroy(vdev); /* FALLTHROUGH */ case VXGE_FREE_VPATH: vxge_mem_free(vdev->vpaths, vdev->no_of_vpath * sizeof(vxge_vpath_t)); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DEVICE: if (vdev->devh != NULL) { vxge_hal_device_private_set(vdev->devh, 0); vxge_hal_device_terminate(vdev->devh); } /* FALLTHROUGH */ case VXGE_FREE_ISR_RESOURCE: vxge_free_isr_resources(vdev); /* FALLTHROUGH */ case VXGE_FREE_BAR2: vxge_free_bar_resources(vdev, 2); /* FALLTHROUGH */ case VXGE_FREE_BAR1: vxge_free_bar_resources(vdev, 1); /* FALLTHROUGH */ case VXGE_FREE_BAR0: vxge_free_bar_resources(vdev, 0); /* FALLTHROUGH */ case VXGE_FREE_PCI_INFO: vxge_mem_free(vdev->pdev, sizeof(vxge_pci_info_t)); /* FALLTHROUGH */ case VXGE_FREE_DEVICE_CONFIG: vxge_mem_free(vdev->device_config, sizeof(vxge_hal_device_config_t)); /* FALLTHROUGH */ case VXGE_DISABLE_PCI_BUSMASTER: pci_disable_busmaster(ndev); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DRIVER: if (vxge_dev_ref_count) { --vxge_dev_ref_count; if (0 == vxge_dev_ref_count) vxge_hal_driver_terminate(); } /* FALLTHROUGH */ default: case VXGE_FREE_NONE: break; /* NOTREACHED */ } } void vxge_free_isr_resources(vxge_dev_t *vdev) { int i; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: if (vdev->config.isr_info[0].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[0].irq_rid, vdev->config.isr_info[0].irq_res); vdev->config.isr_info[0].irq_res = NULL; } break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { if (vdev->config.isr_info[i].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[i].irq_rid, vdev->config.isr_info[i].irq_res); vdev->config.isr_info[i].irq_res = NULL; } } if (vdev->intr_count) pci_release_msi(vdev->ndev); break; } } void vxge_free_bar_resources(vxge_dev_t *vdev, int i) { int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); if (pci_info->bar_info[i]) bus_release_resource(vdev->ndev, SYS_RES_MEMORY, res_id, pci_info->bar_info[i]); vxge_mem_free(pci_info->reg_map[i], sizeof(vxge_bus_res_t)); } /* * vxge_init_mutex * Initializes mutexes used in driver */ void vxge_mutex_init(vxge_dev_t *vdev) { int i; snprintf(vdev->mtx_drv_name, sizeof(vdev->mtx_drv_name), "%s_drv", vdev->ndev_name); mtx_init(&vdev->mtx_drv, vdev->mtx_drv_name, MTX_NETWORK_LOCK, MTX_DEF); for (i = 0; i < vdev->no_of_vpath; i++) { snprintf(vdev->vpaths[i].mtx_tx_name, sizeof(vdev->vpaths[i].mtx_tx_name), "%s_tx_%d", vdev->ndev_name, i); mtx_init(&vdev->vpaths[i].mtx_tx, vdev->vpaths[i].mtx_tx_name, NULL, MTX_DEF); } } /* * vxge_mutex_destroy * Destroys mutexes used in driver */ void vxge_mutex_destroy(vxge_dev_t *vdev) { int i; for (i = 0; i < vdev->no_of_vpath; i++) VXGE_TX_LOCK_DESTROY(&(vdev->vpaths[i])); VXGE_DRV_LOCK_DESTROY(vdev); } /* * vxge_rth_config */ vxge_hal_status_e vxge_rth_config(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_rth_hash_types_t hash_types; vxge_hal_status_e status = VXGE_HAL_OK; u8 mtable[256] = {0}; /* Filling matable with bucket-to-vpath mapping */ vdev->config.rth_bkt_sz = VXGE_DEFAULT_RTH_BUCKET_SIZE; for (i = 0; i < (1 << vdev->config.rth_bkt_sz); i++) mtable[i] = i % vdev->no_of_vpath; /* Fill RTH hash types */ hash_types.hash_type_tcpipv4_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV4; hash_types.hash_type_tcpipv6_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6; hash_types.hash_type_tcpipv6ex_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6_EX; hash_types.hash_type_ipv4_en = VXGE_HAL_RING_HASH_TYPE_IPV4; hash_types.hash_type_ipv6_en = VXGE_HAL_RING_HASH_TYPE_IPV6; hash_types.hash_type_ipv6ex_en = VXGE_HAL_RING_HASH_TYPE_IPV6_EX; /* set indirection table, bucket-to-vpath mapping */ status = vxge_hal_vpath_rts_rth_itable_set(vdev->vpath_handles, vdev->no_of_vpath, mtable, ((u32) (1 << vdev->config.rth_bkt_sz))); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed\n"); goto _exit0; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_rts_rth_set(vpath_handle, RTH_ALG_JENKINS, &hash_types, vdev->config.rth_bkt_sz, TRUE); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed for vpath (%d)\n", vdev->vpaths[i].vp_id); break; } } _exit0: return (status); } /* * vxge_vpath_config * Sets HAL parameter values from kenv */ void vxge_vpath_config(vxge_dev_t *vdev) { int i; u32 no_of_vpath = 0; vxge_hal_vp_config_t *vp_config; vxge_hal_device_config_t *device_config = vdev->device_config; device_config->debug_level = VXGE_TRACE; device_config->debug_mask = VXGE_COMPONENT_ALL; device_config->device_poll_millis = VXGE_DEFAULT_DEVICE_POLL_MILLIS; vdev->config.no_of_vpath = min(vdev->config.no_of_vpath, vdev->max_supported_vpath); for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { vp_config = &(device_config->vp_config[i]); vp_config->fifo.enable = VXGE_HAL_FIFO_DISABLE; vp_config->ring.enable = VXGE_HAL_RING_DISABLE; } for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (no_of_vpath >= vdev->config.no_of_vpath) break; if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; no_of_vpath++; vp_config = &(device_config->vp_config[i]); vp_config->mtu = VXGE_HAL_DEFAULT_MTU; vp_config->ring.enable = VXGE_HAL_RING_ENABLE; vp_config->ring.post_mode = VXGE_HAL_RING_POST_MODE_DOORBELL; vp_config->ring.buffer_mode = VXGE_HAL_RING_RXD_BUFFER_MODE_1; vp_config->ring.ring_length = vxge_ring_length_get(VXGE_HAL_RING_RXD_BUFFER_MODE_1); vp_config->ring.scatter_mode = VXGE_HAL_RING_SCATTER_MODE_A; vp_config->rpa_all_vid_en = VXGE_DEFAULT_ALL_VID_ENABLE; vp_config->rpa_strip_vlan_tag = VXGE_DEFAULT_STRIP_VLAN_TAG; vp_config->rpa_ucast_all_addr_en = VXGE_HAL_VPATH_RPA_UCAST_ALL_ADDR_DISABLE; vp_config->rti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->rti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->rti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_RX_NET_UTIL; vp_config->rti.uec_a = VXGE_DEFAULT_RTI_RX_UFC_A; vp_config->rti.uec_b = VXGE_DEFAULT_RTI_RX_UFC_B; vp_config->rti.uec_c = VXGE_DEFAULT_RTI_RX_UFC_C; vp_config->rti.uec_d = VXGE_DEFAULT_RTI_RX_UFC_D; vp_config->rti.urange_a = VXGE_DEFAULT_RTI_RX_URANGE_A; vp_config->rti.urange_b = VXGE_DEFAULT_RTI_RX_URANGE_B; vp_config->rti.urange_c = VXGE_DEFAULT_RTI_RX_URANGE_C; vp_config->rti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->rti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->rti.btimer_val = (VXGE_DEFAULT_RTI_BTIMER_VAL * 1000) / 272; vp_config->rti.rtimer_val = (VXGE_DEFAULT_RTI_RTIMER_VAL * 1000) / 272; vp_config->rti.ltimer_val = (VXGE_DEFAULT_RTI_LTIMER_VAL * 1000) / 272; if ((no_of_vpath > 1) && (VXGE_DEFAULT_CONFIG_MQ_ENABLE == 0)) continue; vp_config->fifo.enable = VXGE_HAL_FIFO_ENABLE; vp_config->fifo.max_aligned_frags = VXGE_DEFAULT_FIFO_ALIGNED_FRAGS; vp_config->tti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->tti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->tti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_TX_NET_UTIL; vp_config->tti.uec_a = VXGE_DEFAULT_TTI_TX_UFC_A; vp_config->tti.uec_b = VXGE_DEFAULT_TTI_TX_UFC_B; vp_config->tti.uec_c = VXGE_DEFAULT_TTI_TX_UFC_C; vp_config->tti.uec_d = VXGE_DEFAULT_TTI_TX_UFC_D; vp_config->tti.urange_a = VXGE_DEFAULT_TTI_TX_URANGE_A; vp_config->tti.urange_b = VXGE_DEFAULT_TTI_TX_URANGE_B; vp_config->tti.urange_c = VXGE_DEFAULT_TTI_TX_URANGE_C; vp_config->tti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->tti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->tti.btimer_val = (VXGE_DEFAULT_TTI_BTIMER_VAL * 1000) / 272; vp_config->tti.rtimer_val = (VXGE_DEFAULT_TTI_RTIMER_VAL * 1000) / 272; vp_config->tti.ltimer_val = (VXGE_DEFAULT_TTI_LTIMER_VAL * 1000) / 272; } vdev->no_of_vpath = no_of_vpath; if (vdev->no_of_vpath == 1) vdev->config.tx_steering = 0; if (vdev->config.rth_enable && (vdev->no_of_vpath > 1)) { device_config->rth_en = VXGE_HAL_RTH_ENABLE; device_config->rth_it_type = VXGE_HAL_RTH_IT_TYPE_MULTI_IT; } vdev->config.rth_enable = device_config->rth_en; } /* * vxge_vpath_cb_fn * Virtual path Callback function */ /* ARGSUSED */ static vxge_hal_status_e vxge_vpath_cb_fn(vxge_hal_client_h client_handle, vxge_hal_up_msg_h msgh, vxge_hal_message_type_e msg_type, vxge_hal_obj_id_t obj_id, vxge_hal_result_e result, vxge_hal_opaque_handle_t *opaque_handle) { return (VXGE_HAL_OK); } /* * vxge_vpath_open */ int vxge_vpath_open(vxge_dev_t *vdev) { int i, err = EINVAL; u64 func_id; vxge_vpath_t *vpath; vxge_hal_vpath_attr_t vpath_attr; vxge_hal_status_e status = VXGE_HAL_OK; struct lro_ctrl *lro = NULL; bzero(&vpath_attr, sizeof(vxge_hal_vpath_attr_t)); for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); lro = &vpath->lro; /* Vpath vpath_attr: FIFO */ vpath_attr.vp_id = vpath->vp_id; vpath_attr.fifo_attr.callback = vxge_tx_compl; vpath_attr.fifo_attr.txdl_init = vxge_tx_replenish; vpath_attr.fifo_attr.txdl_term = vxge_tx_term; vpath_attr.fifo_attr.userdata = vpath; vpath_attr.fifo_attr.per_txdl_space = sizeof(vxge_txdl_priv_t); /* Vpath vpath_attr: Ring */ vpath_attr.ring_attr.callback = vxge_rx_compl; vpath_attr.ring_attr.rxd_init = vxge_rx_replenish; vpath_attr.ring_attr.rxd_term = vxge_rx_term; vpath_attr.ring_attr.userdata = vpath; vpath_attr.ring_attr.per_rxd_space = sizeof(vxge_rxd_priv_t); err = vxge_dma_tags_create(vpath); if (err != 0) { device_printf(vdev->ndev, "failed to create dma tags\n"); break; } #if __FreeBSD_version >= 800000 vpath->br = buf_ring_alloc(VXGE_DEFAULT_BR_SIZE, M_DEVBUF, M_WAITOK, &vpath->mtx_tx); if (vpath->br == NULL) { err = ENOMEM; break; } #endif status = vxge_hal_vpath_open(vdev->devh, &vpath_attr, (vxge_hal_vpath_callback_f) vxge_vpath_cb_fn, NULL, &vpath->handle); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to open vpath (%d)\n", vpath->vp_id); err = EPERM; break; } vpath->is_open = TRUE; vdev->vpath_handles[i] = vpath->handle; vpath->tx_ticks = ticks; vpath->rx_ticks = ticks; vpath->tti_rtimer_val = VXGE_DEFAULT_TTI_RTIMER_VAL; vpath->tti_rtimer_val = VXGE_DEFAULT_TTI_RTIMER_VAL; vpath->tx_intr_coalesce = vdev->config.intr_coalesce; vpath->rx_intr_coalesce = vdev->config.intr_coalesce; func_id = vdev->config.hw_info.func_id; if (vdev->config.low_latency && (vdev->config.bw_info[func_id].priority == VXGE_DEFAULT_VPATH_PRIORITY_HIGH)) { vpath->tx_intr_coalesce = 0; } if (vdev->ifp->if_capenable & IFCAP_LRO) { err = tcp_lro_init(lro); if (err != 0) { device_printf(vdev->ndev, "LRO Initialization failed!\n"); break; } vpath->lro_enable = TRUE; lro->ifp = vdev->ifp; } } return (err); } void vxge_tso_config(vxge_dev_t *vdev) { u32 func_id, priority; vxge_hal_status_e status = VXGE_HAL_OK; vdev->ifp->if_capabilities |= IFCAP_TSO4; status = vxge_bw_priority_get(vdev, NULL); if (status == VXGE_HAL_OK) { func_id = vdev->config.hw_info.func_id; priority = vdev->config.bw_info[func_id].priority; if (priority != VXGE_DEFAULT_VPATH_PRIORITY_HIGH) vdev->ifp->if_capabilities &= ~IFCAP_TSO4; } #if __FreeBSD_version >= 800000 if (vdev->ifp->if_capabilities & IFCAP_TSO4) vdev->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif } vxge_hal_status_e vxge_bw_priority_get(vxge_dev_t *vdev, vxge_bw_info_t *bw_info) { u32 priority, bandwidth; u32 vpath_count; u64 func_id, func_mode, vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_status_e status = VXGE_HAL_OK; func_id = vdev->config.hw_info.func_id; if (bw_info) { func_id = bw_info->func_id; func_mode = vdev->config.hw_info.function_mode; if ((is_single_func(func_mode)) && (func_id > 0)) return (VXGE_HAL_FAIL); } if (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0)) { status = vxge_hal_vf_rx_bw_get(vdev->devh, func_id, &bandwidth, &priority); } else { status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status == VXGE_HAL_OK) { status = vxge_hal_bw_priority_get(vdev->devh, vpath_list[0], &bandwidth, &priority); } } if (status == VXGE_HAL_OK) { if (bw_info) { bw_info->priority = priority; bw_info->bandwidth = bandwidth; } else { vdev->config.bw_info[func_id].priority = priority; vdev->config.bw_info[func_id].bandwidth = bandwidth; } } return (status); } /* * close vpaths */ void vxge_vpath_close(vxge_dev_t *vdev) { int i; vxge_vpath_t *vpath; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (vpath->handle) vxge_hal_vpath_close(vpath->handle); #if __FreeBSD_version >= 800000 if (vpath->br != NULL) buf_ring_free(vpath->br, M_DEVBUF); #endif /* Free LRO memory */ if (vpath->lro_enable) tcp_lro_free(&vpath->lro); if (vpath->dma_tag_rx) { bus_dmamap_destroy(vpath->dma_tag_rx, vpath->extra_dma_map); bus_dma_tag_destroy(vpath->dma_tag_rx); } if (vpath->dma_tag_tx) bus_dma_tag_destroy(vpath->dma_tag_tx); vpath->handle = NULL; vpath->is_open = FALSE; } } /* * reset vpaths */ void vxge_vpath_reset(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_status_e status = VXGE_HAL_OK; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_reset(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "failed to reset vpath :%d\n", i); } } static inline int vxge_vpath_get(vxge_dev_t *vdev, mbuf_t mhead) { struct tcphdr *th = NULL; struct udphdr *uh = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct ether_vlan_header *eth = NULL; void *ulp = NULL; int ehdrlen, iphlen = 0; u8 ipproto = 0; u16 etype, src_port, dst_port; u16 queue_len, counter = 0; src_port = dst_port = 0; queue_len = vdev->no_of_vpath; eth = mtod(mhead, struct ether_vlan_header *); if (eth->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eth->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eth->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { case ETHERTYPE_IP: ip = (struct ip *) (mhead->m_data + ehdrlen); iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; th = (struct tcphdr *) ((caddr_t)ip + iphlen); uh = (struct udphdr *) ((caddr_t)ip + iphlen); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *) (mhead->m_data + ehdrlen); iphlen = sizeof(struct ip6_hdr); ipproto = ip6->ip6_nxt; ulp = mtod(mhead, char *) + iphlen; th = ((struct tcphdr *) (ulp)); uh = ((struct udphdr *) (ulp)); break; default: break; } switch (ipproto) { case IPPROTO_TCP: src_port = th->th_sport; dst_port = th->th_dport; break; case IPPROTO_UDP: src_port = uh->uh_sport; dst_port = uh->uh_dport; break; default: break; } counter = (ntohs(src_port) + ntohs(dst_port)) & vpath_selector[queue_len - 1]; if (counter >= queue_len) counter = queue_len - 1; return (counter); } static inline vxge_hal_vpath_h vxge_vpath_handle_get(vxge_dev_t *vdev, int i) { return (vdev->vpaths[i].is_open ? vdev->vpaths[i].handle : NULL); } int vxge_firmware_verify(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if (vdev->fw_upgrade) { status = vxge_firmware_upgrade(vdev); if (status == VXGE_HAL_OK) { err = ENXIO; goto _exit0; } } if ((vdev->config.function_mode != VXGE_DEFAULT_CONFIG_VALUE) && (vdev->config.hw_info.function_mode != (u64) vdev->config.function_mode)) { status = vxge_func_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } /* l2_switch configuration */ active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_L2SwitchEnabled, &active_config); if (status == VXGE_HAL_OK) { vdev->l2_switch = active_config; if (vdev->config.l2_switch != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.l2_switch != active_config) { status = vxge_l2switch_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } } } if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { if (vxge_port_mode_update(vdev) == ENXIO) err = ENXIO; } _exit0: if (err == ENXIO) device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); return (err); } vxge_hal_status_e vxge_firmware_upgrade(vxge_dev_t *vdev) { u8 *fw_buffer; u32 fw_size; vxge_hal_device_hw_info_t *hw_info; vxge_hal_status_e status = VXGE_HAL_OK; hw_info = &vdev->config.hw_info; fw_size = sizeof(VXGE_FW_ARRAY_NAME); fw_buffer = (u8 *) VXGE_FW_ARRAY_NAME; device_printf(vdev->ndev, "Current firmware version : %s (%s)\n", hw_info->fw_version.version, hw_info->fw_date.date); device_printf(vdev->ndev, "Upgrading firmware to %d.%d.%d\n", VXGE_MIN_FW_MAJOR_VERSION, VXGE_MIN_FW_MINOR_VERSION, VXGE_MIN_FW_BUILD_NUMBER); /* Call HAL API to upgrade firmware */ status = vxge_hal_mrpcim_fw_upgrade(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], fw_buffer, fw_size); device_printf(vdev->ndev, "firmware upgrade %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); return (status); } vxge_hal_status_e vxge_func_mode_set(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_mrpcim_pcie_func_mode_set(vdev->devh, vdev->config.function_mode); device_printf(vdev->ndev, "function mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); /* * If in MF + DP mode * if user changes to SF, change port_mode to single port mode */ if (((is_multi_func(vdev->config.hw_info.function_mode)) && is_single_func(vdev->config.function_mode)) && (active_config == VXGE_HAL_DP_NP_MODE_DUAL_PORT)) { vdev->config.port_mode = VXGE_HAL_DP_NP_MODE_SINGLE_PORT; status = vxge_port_mode_set(vdev); } } return (status); } vxge_hal_status_e vxge_port_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_port_mode(vdev->devh, vdev->config.port_mode); device_printf(vdev->ndev, "port mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); /* Configure vpath_mapping for active-active mode only */ if (vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) { status = vxge_hal_config_vpath_map(vdev->devh, VXGE_DUAL_PORT_MAP); device_printf(vdev->ndev, "dual port map change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); } } return (status); } int vxge_port_mode_update(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if ((vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) && is_single_func(vdev->config.hw_info.function_mode)) { device_printf(vdev->ndev, "Adapter in SF mode, dual port mode is not allowed\n"); err = EPERM; goto _exit0; } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_mode = active_config; if (vdev->config.port_mode != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.port_mode != vdev->port_mode) { status = vxge_port_mode_set(vdev); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } err = ENXIO; vdev->port_mode = vdev->config.port_mode; } } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_failure = active_config; /* * active/active mode : set to NoMove * active/passive mode: set to Failover-Failback */ if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_NoMove; else if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_ACTIVE_PASSIVE) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_OtherPortBackOnRestore; if ((vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) && (vdev->config.port_failure != vdev->port_failure)) { status = vxge_port_behavior_on_failure_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } _exit0: return (err); } vxge_hal_status_e vxge_port_mode_get(vxge_dev_t *vdev, vxge_port_info_t *port_info) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_mode = active_config; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_failure = active_config; _exit0: return (err); } vxge_hal_status_e vxge_port_behavior_on_failure_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_behavior_on_failure(vdev->devh, vdev->config.port_failure); device_printf(vdev->ndev, "port behaviour on failure change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } void vxge_active_port_update(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_ActivePort, &active_config); if (status == VXGE_HAL_OK) vdev->active_port = active_config; } vxge_hal_status_e vxge_l2switch_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_l2switch_mode(vdev->devh, vdev->config.l2_switch); device_printf(vdev->ndev, "L2 switch %s\n", (status == VXGE_HAL_OK) ? (vdev->config.l2_switch) ? "enable" : "disable" : "change failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } /* * vxge_promisc_set * Enable Promiscuous Mode */ void vxge_promisc_set(vxge_dev_t *vdev) { int i; ifnet_t ifp; vxge_hal_vpath_h vpath_handle; if (!vdev->is_initialized) return; ifp = vdev->ifp; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; if (ifp->if_flags & IFF_PROMISC) vxge_hal_vpath_promisc_enable(vpath_handle); else vxge_hal_vpath_promisc_disable(vpath_handle); } } /* * vxge_change_mtu * Change interface MTU to a requested valid size */ int vxge_change_mtu(vxge_dev_t *vdev, unsigned long new_mtu) { int err = EINVAL; if ((new_mtu < VXGE_HAL_MIN_MTU) || (new_mtu > VXGE_HAL_MAX_MTU)) goto _exit0; (vdev->ifp)->if_mtu = new_mtu; device_printf(vdev->ndev, "MTU changed to %ld\n", (vdev->ifp)->if_mtu); if (vdev->is_initialized) { if_down(vdev->ifp); vxge_reset(vdev); if_up(vdev->ifp); } err = 0; _exit0: return (err); } /* * Creates DMA tags for both Tx and Rx */ int vxge_dma_tags_create(vxge_vpath_t *vpath) { int err = 0; bus_size_t max_size, boundary; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; max_size = ifp->if_mtu + VXGE_HAL_MAC_HEADER_MAX_SIZE + VXGE_HAL_HEADER_ETHERNET_II_802_3_ALIGN; VXGE_BUFFER_ALIGN(max_size, 128) if (max_size <= MCLBYTES) vdev->rx_mbuf_sz = MCLBYTES; else vdev->rx_mbuf_sz = (max_size > MJUMPAGESIZE) ? MJUM9BYTES : MJUMPAGESIZE; boundary = (max_size > PAGE_SIZE) ? 0 : PAGE_SIZE; /* DMA tag for Tx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, VXGE_TSO_SIZE, VXGE_MAX_SEGS, PAGE_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_tx)); if (err != 0) goto _exit0; /* DMA tag for Rx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, boundary, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, vdev->rx_mbuf_sz, 1, vdev->rx_mbuf_sz, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_rx)); if (err != 0) goto _exit1; /* Create DMA map for this descriptor */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &vpath->extra_dma_map); if (err == 0) goto _exit0; bus_dma_tag_destroy(vpath->dma_tag_rx); _exit1: bus_dma_tag_destroy(vpath->dma_tag_tx); _exit0: return (err); } static inline int vxge_dma_mbuf_coalesce(bus_dma_tag_t dma_tag_tx, bus_dmamap_t dma_map, mbuf_t * m_headp, bus_dma_segment_t * dma_buffers, int *num_segs) { int err = 0; mbuf_t mbuf_pkt = NULL; retry: err = bus_dmamap_load_mbuf_sg(dma_tag_tx, dma_map, *m_headp, dma_buffers, num_segs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* try to defrag, too many segments */ mbuf_pkt = m_defrag(*m_headp, M_NOWAIT); if (mbuf_pkt == NULL) { err = ENOBUFS; goto _exit0; } *m_headp = mbuf_pkt; goto retry; } _exit0: return (err); } int vxge_device_hw_info_get(vxge_dev_t *vdev) { int i, err = ENXIO; u64 vpath_mask = 0; u32 max_supported_vpath = 0; u32 fw_ver_maj_min; vxge_firmware_upgrade_e fw_option; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_device_hw_info_t *hw_info; status = vxge_hal_device_hw_info_get(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], &vdev->config.hw_info); if (status != VXGE_HAL_OK) goto _exit0; hw_info = &vdev->config.hw_info; vpath_mask = hw_info->vpath_mask; if (vpath_mask == 0) { device_printf(vdev->ndev, "No vpaths available in device\n"); goto _exit0; } fw_option = vdev->config.fw_option; /* Check how many vpaths are available */ for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!((vpath_mask) & mBIT(i))) continue; max_supported_vpath++; } vdev->max_supported_vpath = max_supported_vpath; status = vxge_hal_device_is_privileged(hw_info->host_type, hw_info->func_id); vdev->is_privilaged = (status == VXGE_HAL_OK) ? TRUE : FALSE; vdev->hw_fw_version = VXGE_FW_VERSION( hw_info->fw_version.major, hw_info->fw_version.minor, hw_info->fw_version.build); fw_ver_maj_min = VXGE_FW_MAJ_MIN_VERSION(hw_info->fw_version.major, hw_info->fw_version.minor); if ((fw_option >= VXGE_FW_UPGRADE_FORCE) || (vdev->hw_fw_version != VXGE_DRV_FW_VERSION)) { /* For fw_ver 1.8.1 and above ignore build number. */ if ((fw_option == VXGE_FW_UPGRADE_ALL) && ((vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 1)) && (fw_ver_maj_min == VXGE_DRV_FW_MAJ_MIN_VERSION))) { goto _exit1; } if (vdev->hw_fw_version < VXGE_BASE_FW_VERSION) { device_printf(vdev->ndev, "Upgrade driver through vxge_update, " "Unable to load the driver.\n"); goto _exit0; } vdev->fw_upgrade = TRUE; } _exit1: err = 0; _exit0: return (err); } /* * vxge_device_hw_info_print * Print device and driver information */ void vxge_device_hw_info_print(vxge_dev_t *vdev) { u32 i; device_t ndev; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; char pmd_type[2][VXGE_PMD_INFO_LEN]; vxge_hal_device_t *hldev; vxge_hal_device_hw_info_t *hw_info; vxge_hal_device_pmd_info_t *pmd_port; hldev = vdev->devh; ndev = vdev->ndev; ctx = device_get_sysctl_ctx(ndev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ndev)); hw_info = &(vdev->config.hw_info); snprintf(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]), "%d.%d.%d.%d", XGELL_VERSION_MAJOR, XGELL_VERSION_MINOR, XGELL_VERSION_FIX, XGELL_VERSION_BUILD); /* Print PCI-e bus type/speed/width info */ snprintf(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]), "x%d", hldev->link_width); if (hldev->link_width <= VXGE_HAL_PCI_E_LINK_WIDTH_X4) device_printf(ndev, "For optimal performance a x8 " "PCI-Express slot is required.\n"); vxge_null_terminate((char *) hw_info->serial_number, sizeof(hw_info->serial_number)); vxge_null_terminate((char *) hw_info->part_number, sizeof(hw_info->part_number)); snprintf(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]), "%s", hw_info->serial_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_PART_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PART_NO]), "%s", hw_info->part_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]), "%s", hw_info->fw_version.version); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_DATE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_DATE]), "%s", hw_info->fw_date.date); pmd_port = &(hw_info->pmd_port0); for (i = 0; i < hw_info->ports; i++) { vxge_pmd_port_type_get(vdev, pmd_port->type, pmd_type[i], sizeof(pmd_type[i])); strncpy(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], "vendor=??, sn=??, pn=??, type=??", sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i])); vxge_null_terminate(pmd_port->vendor, sizeof(pmd_port->vendor)); if (strlen(pmd_port->vendor) == 0) { pmd_port = &(hw_info->pmd_port1); continue; } vxge_null_terminate(pmd_port->ser_num, sizeof(pmd_port->ser_num)); vxge_null_terminate(pmd_port->part_num, sizeof(pmd_port->part_num)); snprintf(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i]), "vendor=%s, sn=%s, pn=%s, type=%s", pmd_port->vendor, pmd_port->ser_num, pmd_port->part_num, pmd_type[i]); pmd_port = &(hw_info->pmd_port1); } switch (hw_info->function_mode) { case VXGE_HAL_PCIE_FUNC_MODE_SF1_VP17: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Single Function - 1 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF2_VP8: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 2 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF4_VP4: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 4 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8P_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function (DirectIO) - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; } snprintf(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]), "%s", ((vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) ? "MSI-X" : "INTA")); snprintf(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], sizeof(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]), "%d", vdev->no_of_vpath); snprintf(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE], sizeof(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]), "%lu", vdev->ifp->if_mtu); snprintf(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]), "%s", ((vdev->config.lro_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]), "%s", ((vdev->config.rth_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]), "%s", ((vdev->ifp->if_capenable & IFCAP_TSO4) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], sizeof(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]), "%s", ((hw_info->ports == 1) ? "Single Port" : "Dual Port")); if (vdev->is_privilaged) { if (hw_info->ports > 1) { snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]), "%s", vxge_port_mode[vdev->port_mode]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]), "%s", vxge_port_failure[vdev->port_failure]); vxge_active_port_update(vdev); snprintf(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT], sizeof(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]), "%lld", vdev->active_port); } if (!is_single_func(hw_info->function_mode)) { snprintf(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]), "%s", ((vdev->l2_switch) ? "Enabled" : "Disabled")); } } device_printf(ndev, "Driver version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]); device_printf(ndev, "Serial number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]); device_printf(ndev, "Part number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PART_NO]); device_printf(ndev, "Firmware version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]); device_printf(ndev, "Firmware date\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_DATE]); device_printf(ndev, "Link width\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]); if (vdev->is_privilaged) { device_printf(ndev, "Function mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]); } device_printf(ndev, "Interrupt type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]); device_printf(ndev, "VPath(s) opened\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]); device_printf(ndev, "Adapter Type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]); device_printf(ndev, "PMD Port 0\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0]); if (hw_info->ports > 1) { device_printf(ndev, "PMD Port 1\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1]); if (vdev->is_privilaged) { device_printf(ndev, "Port Mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) device_printf(ndev, "Port Failure\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]); device_printf(vdev->ndev, "Active Port\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]); } } if (vdev->is_privilaged && !is_single_func(hw_info->function_mode)) { device_printf(vdev->ndev, "L2 Switch\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]); } device_printf(ndev, "MTU is %s\n", vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]); device_printf(ndev, "LRO %s\n", vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]); device_printf(ndev, "RTH %s\n", vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]); device_printf(ndev, "TSO %s\n", vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Driver version", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], + vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], 0, "Driver version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Serial number", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], + vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], 0, "Serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Part number", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_PART_NO], + vdev->config.nic_attr[VXGE_PRINT_PART_NO], 0, "Part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware version", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], + vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], 0, "Firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware date", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_FW_DATE], + vdev->config.nic_attr[VXGE_PRINT_FW_DATE], 0, "Firmware date"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Link width", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], + vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], 0, "Link width"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Function mode", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], + vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], 0, "Function mode"); } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Interrupt type", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], + vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], 0, "Interrupt type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "VPath(s) opened", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], + vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], 0, "VPath(s) opened"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Adapter Type", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], + vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], 0, "Adapter Type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 0", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0], + vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0], 0, "pmd port"); if (hw_info->ports > 1) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 1", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1], + vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1], 0, "pmd port"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Mode", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], + vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], 0, "Port Mode"); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Failure", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], + vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], 0, "Port Failure"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "L2 Switch", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], + vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], 0, "L2 Switch"); } } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "LRO mode", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], + vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], 0, "LRO mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "RTH mode", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], + vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], 0, "RTH mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "TSO mode", CTLFLAG_RD, - &vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], + vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], 0, "TSO mode"); } void vxge_pmd_port_type_get(vxge_dev_t *vdev, u32 port_type, char *ifm_name, u8 ifm_len) { vdev->ifm_optics = IFM_UNKNOWN; switch (port_type) { case VXGE_HAL_DEVICE_PMD_TYPE_10G_SR: vdev->ifm_optics = IFM_10G_SR; strlcpy(ifm_name, "10GbE SR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LR: vdev->ifm_optics = IFM_10G_LR; strlcpy(ifm_name, "10GbE LR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LRM: vdev->ifm_optics = IFM_10G_LRM; strlcpy(ifm_name, "10GbE LRM", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_DIRECT: vdev->ifm_optics = IFM_10G_TWINAX; strlcpy(ifm_name, "10GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_CX4: vdev->ifm_optics = IFM_10G_CX4; strlcpy(ifm_name, "10GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_BASE_T: #if __FreeBSD_version >= 800000 vdev->ifm_optics = IFM_10G_T; #endif strlcpy(ifm_name, "10GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_OTHER: strlcpy(ifm_name, "10GbE Other", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_SX: vdev->ifm_optics = IFM_1000_SX; strlcpy(ifm_name, "1GbE SX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_LX: vdev->ifm_optics = IFM_1000_LX; strlcpy(ifm_name, "1GbE LX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX: vdev->ifm_optics = IFM_1000_CX; strlcpy(ifm_name, "1GbE CX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_BASE_T: vdev->ifm_optics = IFM_1000_T; strlcpy(ifm_name, "1GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_DIRECT: strlcpy(ifm_name, "1GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX4: strlcpy(ifm_name, "1GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_OTHER: strlcpy(ifm_name, "1GbE Other", ifm_len); break; default: case VXGE_HAL_DEVICE_PMD_TYPE_UNKNOWN: strlcpy(ifm_name, "UNSUP", ifm_len); break; } } u32 vxge_ring_length_get(u32 buffer_mode) { return (VXGE_DEFAULT_RING_BLOCK * vxge_hal_ring_rxds_per_block_get(buffer_mode)); } /* * Removes trailing spaces padded * and NULL terminates strings */ static inline void vxge_null_terminate(char *str, size_t len) { len--; while (*str && (*str != ' ') && (len != 0)) ++str; --len; if (*str) *str = '\0'; } /* * vxge_ioctl * Callback to control the device */ int vxge_ioctl(ifnet_t ifp, u_long command, caddr_t data) { int mask, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; if (!vdev->is_active) return (EBUSY); switch (command) { /* Set/Get ifnet address */ case SIOCSIFADDR: case SIOCGIFADDR: ether_ioctl(ifp, command, data); break; /* Set Interface MTU */ case SIOCSIFMTU: err = vxge_change_mtu(vdev, (unsigned long)ifr->ifr_mtu); break; /* Set Interface Flags */ case SIOCSIFFLAGS: VXGE_DRV_LOCK(vdev); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ vdev->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) vxge_promisc_set(vdev); } else { vxge_init_locked(vdev); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vxge_stop_locked(vdev); } vdev->if_flags = ifp->if_flags; VXGE_DRV_UNLOCK(vdev); break; /* Add/delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: break; /* Get/Set Interface Media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &vdev->media, command); break; /* Set Capabilities */ case SIOCSIFCAP: VXGE_DRV_LOCK(vdev); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if ((ifp->if_capenable & IFCAP_TSO) && !(ifp->if_capenable & IFCAP_TXCSUM)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO) { if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= CSUM_TSO; if_printf(ifp, "TSO Enabled\n"); } else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable tx checksum offload \ first.\n"); err = EAGAIN; } } else { ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_MTU) ifp->if_capenable ^= IFCAP_VLAN_MTU; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #if __FreeBSD_version >= 800000 if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; #endif #if defined(VLAN_CAPABILITIES) VLAN_CAPABILITIES(ifp); #endif VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_0: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_stats(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_1: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_regs(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; default: err = ether_ioctl(ifp, command, data); break; } return (err); } /* * vxge_ioctl_regs * IOCTL to get registers */ int vxge_ioctl_regs(vxge_dev_t *vdev, struct ifreq *ifr) { u64 value = 0x0; u32 vp_id = 0; u32 offset, reqd_size = 0; int i, err = EINVAL; char *command = (char *) ifr->ifr_data; void *reg_info = (void *) ifr->ifr_data; vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_mgmt_reg_type_e regs_type; switch (*command) { case vxge_hal_mgmt_reg_type_pcicfgmgmt: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_pcicfgmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_pcicfgmgmt; } break; case vxge_hal_mgmt_reg_type_mrpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_mrpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_mrpcim; } break; case vxge_hal_mgmt_reg_type_srpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_srpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_srpcim; } break; case vxge_hal_mgmt_reg_type_memrepair: if (vdev->is_privilaged) { /* reqd_size = sizeof(vxge_hal_memrepair_reg_t); */ regs_type = vxge_hal_mgmt_reg_type_memrepair; } break; case vxge_hal_mgmt_reg_type_legacy: reqd_size = sizeof(vxge_hal_legacy_reg_t); regs_type = vxge_hal_mgmt_reg_type_legacy; break; case vxge_hal_mgmt_reg_type_toc: reqd_size = sizeof(vxge_hal_toc_reg_t); regs_type = vxge_hal_mgmt_reg_type_toc; break; case vxge_hal_mgmt_reg_type_common: reqd_size = sizeof(vxge_hal_common_reg_t); regs_type = vxge_hal_mgmt_reg_type_common; break; case vxge_hal_mgmt_reg_type_vpmgmt: reqd_size = sizeof(vxge_hal_vpmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpmgmt; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case vxge_hal_mgmt_reg_type_vpath: reqd_size = sizeof(vxge_hal_vpath_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpath; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case VXGE_GET_VPATH_COUNT: *((u32 *) reg_info) = vdev->no_of_vpath; err = 0; break; default: reqd_size = 0; break; } if (reqd_size) { for (i = 0, offset = 0; offset < reqd_size; i++, offset += 0x0008) { value = 0x0; status = vxge_hal_mgmt_reg_read(vdev->devh, regs_type, vp_id, offset, &value); err = (status != VXGE_HAL_OK) ? EINVAL : 0; if (err == EINVAL) break; *((u64 *) ((u64 *) reg_info + i)) = value; } } return (err); } /* * vxge_ioctl_stats * IOCTL to get statistics */ int vxge_ioctl_stats(vxge_dev_t *vdev, struct ifreq *ifr) { int i, retsize, err = EINVAL; u32 bufsize; vxge_vpath_t *vpath; vxge_bw_info_t *bw_info; vxge_port_info_t *port_info; vxge_drv_stats_t *drv_stat; char *buffer = NULL; char *command = (char *) ifr->ifr_data; vxge_hal_status_e status = VXGE_HAL_OK; switch (*command) { case VXGE_GET_PCI_CONF: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_pci_config_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) err = copyout(buffer, ifr->ifr_data, retsize); else device_printf(vdev->ndev, "failed pciconfig statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_MRPCIM_STATS: if (!vdev->is_privilaged) break; bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_mrpcim_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) err = copyout(buffer, ifr->ifr_data, retsize); else device_printf(vdev->ndev, "failed mrpcim statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_STATS: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_device_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) err = copyout(buffer, ifr->ifr_data, retsize); else device_printf(vdev->ndev, "failed device statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_HWINFO: bufsize = sizeof(vxge_device_hw_info_t); buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { vxge_os_memcpy( &(((vxge_device_hw_info_t *) buffer)->hw_info), &vdev->config.hw_info, sizeof(vxge_hal_device_hw_info_t)); ((vxge_device_hw_info_t *) buffer)->port_mode = vdev->port_mode; ((vxge_device_hw_info_t *) buffer)->port_failure = vdev->port_failure; err = copyout(buffer, ifr->ifr_data, bufsize); if (err != 0) device_printf(vdev->ndev, "failed device hardware info query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DRIVER_STATS: bufsize = sizeof(vxge_drv_stats_t) * vdev->no_of_vpath; drv_stat = (vxge_drv_stats_t *) vxge_mem_alloc(bufsize); if (drv_stat != NULL) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vpath->driver_stats.rx_lro_queued += vpath->lro.lro_queued; vpath->driver_stats.rx_lro_flushed += vpath->lro.lro_flushed; vxge_os_memcpy(&drv_stat[i], &(vpath->driver_stats), sizeof(vxge_drv_stats_t)); } err = copyout(drv_stat, ifr->ifr_data, bufsize); if (err != 0) device_printf(vdev->ndev, "failed driver statistics query\n"); vxge_mem_free(drv_stat, bufsize); } break; case VXGE_GET_BANDWIDTH: bw_info = (vxge_bw_info_t *) ifr->ifr_data; if ((vdev->config.hw_info.func_id != 0) && (vdev->hw_fw_version < VXGE_FW_VERSION(1, 8, 0))) break; if (vdev->config.hw_info.func_id != 0) bw_info->func_id = vdev->config.hw_info.func_id; status = vxge_bw_priority_get(vdev, bw_info); if (status != VXGE_HAL_OK) break; err = copyout(bw_info, ifr->ifr_data, sizeof(vxge_bw_info_t)); break; case VXGE_SET_BANDWIDTH: if (vdev->is_privilaged) err = vxge_bw_priority_set(vdev, ifr); break; case VXGE_SET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { port_info = (vxge_port_info_t *) ifr->ifr_data; vdev->config.port_mode = port_info->port_mode; err = vxge_port_mode_update(vdev); if (err != ENXIO) err = VXGE_HAL_FAIL; else { err = VXGE_HAL_OK; device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); } } } break; case VXGE_GET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { port_info = (vxge_port_info_t *) ifr->ifr_data; err = vxge_port_mode_get(vdev, port_info); if (err == VXGE_HAL_OK) { err = copyout(port_info, ifr->ifr_data, sizeof(vxge_port_info_t)); } } } break; default: break; } return (err); } int vxge_bw_priority_config(vxge_dev_t *vdev) { u32 i; int err = EINVAL; for (i = 0; i < vdev->no_of_func; i++) { err = vxge_bw_priority_update(vdev, i, TRUE); if (err != 0) break; } return (err); } int vxge_bw_priority_set(vxge_dev_t *vdev, struct ifreq *ifr) { int err; u32 func_id; vxge_bw_info_t *bw_info; bw_info = (vxge_bw_info_t *) ifr->ifr_data; func_id = bw_info->func_id; vdev->config.bw_info[func_id].priority = bw_info->priority; vdev->config.bw_info[func_id].bandwidth = bw_info->bandwidth; err = vxge_bw_priority_update(vdev, func_id, FALSE); return (err); } int vxge_bw_priority_update(vxge_dev_t *vdev, u32 func_id, bool binit) { u32 i, set = 0; u32 bandwidth, priority, vpath_count; u64 vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_device_t *hldev; vxge_hal_vp_config_t *vp_config; vxge_hal_status_e status = VXGE_HAL_OK; hldev = vdev->devh; status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status != VXGE_HAL_OK) return (status); for (i = 0; i < vpath_count; i++) { vp_config = &(hldev->config.vp_config[vpath_list[i]]); /* Configure Bandwidth */ if (vdev->config.bw_info[func_id].bandwidth != VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) { set = 1; bandwidth = vdev->config.bw_info[func_id].bandwidth; if (bandwidth < VXGE_HAL_VPATH_BW_LIMIT_MIN || bandwidth > VXGE_HAL_VPATH_BW_LIMIT_MAX) { bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; } vp_config->bandwidth = bandwidth; } /* * If b/w limiting is enabled on any of the * VFs, then for remaining VFs set the priority to 3 * and b/w limiting to max i.e 10 Gb) */ if (vp_config->bandwidth == VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) vp_config->bandwidth = VXGE_HAL_VPATH_BW_LIMIT_MAX; if (binit && vdev->config.low_latency) { if (func_id == 0) vdev->config.bw_info[func_id].priority = VXGE_DEFAULT_VPATH_PRIORITY_HIGH; } /* Configure Priority */ if (vdev->config.bw_info[func_id].priority != VXGE_HAL_VPATH_PRIORITY_DEFAULT) { set = 1; priority = vdev->config.bw_info[func_id].priority; if (priority < VXGE_HAL_VPATH_PRIORITY_MIN || priority > VXGE_HAL_VPATH_PRIORITY_MAX) { priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; } vp_config->priority = priority; } else if (vdev->config.low_latency) { set = 1; vp_config->priority = VXGE_DEFAULT_VPATH_PRIORITY_LOW; } if (set == 1) { status = vxge_hal_rx_bw_priority_set(vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; if (vpath_list[i] < VXGE_HAL_TX_BW_VPATH_LIMIT) { status = vxge_hal_tx_bw_priority_set( vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; } } } return ((status == VXGE_HAL_OK) ? 0 : EINVAL); } /* * vxge_intr_coalesce_tx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_tx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->tx_intr_coalesce) return; vpath->tx_interrupts++; if (ticks > vpath->tx_ticks + hz/100) { vpath->tx_ticks = ticks; timer = vpath->tti_rtimer_val; if (vpath->tx_interrupts > VXGE_MAX_TX_INTERRUPT_COUNT) { if (timer != VXGE_TTI_RTIMER_ADAPT_VAL) { vpath->tti_rtimer_val = VXGE_TTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } else { if (timer != 0) { vpath->tti_rtimer_val = 0; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } vpath->tx_interrupts = 0; } } /* * vxge_intr_coalesce_rx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_rx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->rx_intr_coalesce) return; vpath->rx_interrupts++; if (ticks > vpath->rx_ticks + hz/100) { vpath->rx_ticks = ticks; timer = vpath->rti_rtimer_val; if (vpath->rx_interrupts > VXGE_MAX_RX_INTERRUPT_COUNT) { if (timer != VXGE_RTI_RTIMER_ADAPT_VAL) { vpath->rti_rtimer_val = VXGE_RTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } else { if (timer != 0) { vpath->rti_rtimer_val = 0; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } vpath->rx_interrupts = 0; } } /* * vxge_methods FreeBSD device interface entry points */ static device_method_t vxge_methods[] = { DEVMETHOD(device_probe, vxge_probe), DEVMETHOD(device_attach, vxge_attach), DEVMETHOD(device_detach, vxge_detach), DEVMETHOD(device_shutdown, vxge_shutdown), {0, 0} }; static driver_t vxge_driver = { "vxge", vxge_methods, sizeof(vxge_dev_t), }; static devclass_t vxge_devclass; DRIVER_MODULE(vxge, pci, vxge_driver, vxge_devclass, 0, 0); Index: stable/9/sys/dev/xen/netfront/netfront.c =================================================================== --- stable/9/sys/dev/xen/netfront/netfront.c (revision 273911) +++ stable/9/sys/dev/xen/netfront/netfront.c (revision 273912) @@ -1,2237 +1,2237 @@ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 700000 #include #include #endif #include #include #include /* for DELAY */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /* Features supported by all backends. TSO and LRO can be negotiated */ #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) #if __FreeBSD_version >= 700000 /* * Should the driver do LRO on the RX end * this can be toggled on the fly, but the * interface must be reset (down/up) for it * to take effect. */ static int xn_enable_lro = 1; TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); #else #define IFCAP_TSO4 0 #define CSUM_TSO 0 #endif #ifdef CONFIG_XEN static int MODPARM_rx_copy = 0; module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); static int MODPARM_rx_flip = 0; module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); #else static const int MODPARM_rx_copy = 1; static const int MODPARM_rx_flip = 0; #endif /** * \brief The maximum allowed data fragments in a single transmit * request. * * This limit is imposed by the backend driver. We assume here that * we are dealing with a Linux driver domain and have set our limit * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) #define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) #define RX_COPY_THRESHOLD 256 #define net_ratelimit() 0 struct netfront_info; struct netfront_rx_info; static void xn_txeof(struct netfront_info *); static void xn_rxeof(struct netfront_info *); static void network_alloc_rx_buffers(struct netfront_info *); static void xn_tick_locked(struct netfront_info *); static void xn_tick(void *); static void xn_intr(void *); static inline int xn_count_frags(struct mbuf *m); static int xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head); static void xn_start_locked(struct ifnet *); static void xn_start(struct ifnet *); static int xn_ioctl(struct ifnet *, u_long, caddr_t); static void xn_ifinit_locked(struct netfront_info *); static void xn_ifinit(void *); static void xn_stop(struct netfront_info *); static void xn_query_features(struct netfront_info *np); static int xn_configure_features(struct netfront_info *np); #ifdef notyet static void xn_watchdog(struct ifnet *); #endif #ifdef notyet static void netfront_closing(device_t dev); #endif static void netif_free(struct netfront_info *info); static int netfront_detach(device_t dev); static int talk_to_backend(device_t dev, struct netfront_info *info); static int create_netdev(device_t dev); static void netif_disconnect_backend(struct netfront_info *info); static int setup_device(device_t dev, struct netfront_info *info); static void free_ring(int *ref, void *ring_ptr_ref); static int xn_ifmedia_upd(struct ifnet *ifp); static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); /* Xenolinux helper functions */ int network_connect(struct netfront_info *); static void xn_free_rx_ring(struct netfront_info *); static void xn_free_tx_ring(struct netfront_info *); static int xennet_get_responses(struct netfront_info *np, struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, struct mbuf **list, int *pages_flipped_p); #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) #define INVALID_P2M_ENTRY (~0UL) /* * Mbuf pointers. We need these to keep track of the virtual addresses * of our mbuf chains since we can only convert from virtual to physical, * not the other way around. The size must track the free index arrays. */ struct xn_chain_data { struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1]; int xn_tx_chain_cnt; struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1]; }; struct net_device_stats { u_long rx_packets; /* total packets received */ u_long tx_packets; /* total packets transmitted */ u_long rx_bytes; /* total bytes received */ u_long tx_bytes; /* total bytes transmitted */ u_long rx_errors; /* bad packets received */ u_long tx_errors; /* packet transmit problems */ u_long rx_dropped; /* no space in linux buffers */ u_long tx_dropped; /* no space available in linux */ u_long multicast; /* multicast packets received */ u_long collisions; /* detailed rx_errors: */ u_long rx_length_errors; u_long rx_over_errors; /* receiver ring buff overflow */ u_long rx_crc_errors; /* recved pkt with crc error */ u_long rx_frame_errors; /* recv'd frame alignment error */ u_long rx_fifo_errors; /* recv'r fifo overrun */ u_long rx_missed_errors; /* receiver missed packet */ /* detailed tx_errors */ u_long tx_aborted_errors; u_long tx_carrier_errors; u_long tx_fifo_errors; u_long tx_heartbeat_errors; u_long tx_window_errors; /* for cslip etc */ u_long rx_compressed; u_long tx_compressed; }; struct netfront_info { struct ifnet *xn_ifp; #if __FreeBSD_version >= 700000 struct lro_ctrl xn_lro; #endif struct net_device_stats stats; u_int tx_full; netif_tx_front_ring_t tx; netif_rx_front_ring_t rx; struct mtx tx_lock; struct mtx rx_lock; struct mtx sc_lock; u_int handle; u_int irq; u_int copying_receiver; u_int carrier; u_int maxfrags; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 32 #define RX_MAX_TARGET NET_RX_RING_SIZE int rx_min_target; int rx_max_target; int rx_target; grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; device_t xbdev; int tx_ring_ref; int rx_ring_ref; uint8_t mac[ETHER_ADDR_LEN]; struct xn_chain_data xn_cdata; /* mbufs */ struct mbuf_head xn_rx_batch; /* head of the batch queue */ int xn_if_flags; struct callout xn_stat_ch; u_long rx_pfn_array[NET_RX_RING_SIZE]; multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; mmu_update_t rx_mmu[NET_RX_RING_SIZE]; struct ifmedia sc_media; }; #define rx_mbufs xn_cdata.xn_rx_chain #define tx_mbufs xn_cdata.xn_tx_chain #define XN_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \ mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF); \ mtx_init(&(_sc)->sc_lock, #_name"_sc", "netfront softc lock", MTX_DEF) #define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock) #define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock) #define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock) #define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock) #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); #define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED); #define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED); #define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_lock); \ mtx_destroy(&(_sc)->tx_lock); \ mtx_destroy(&(_sc)->sc_lock); struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; #define netfront_carrier_on(netif) ((netif)->carrier = 1) #define netfront_carrier_off(netif) ((netif)->carrier = 0) #define netfront_carrier_ok(netif) ((netif)->carrier) /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ static inline void add_id_to_freelist(struct mbuf **list, uintptr_t id) { KASSERT(id != 0, ("%s: the head item (0) must always be free.", __func__)); list[id] = list[0]; list[0] = (struct mbuf *)id; } static inline unsigned short get_id_from_freelist(struct mbuf **list) { uintptr_t id; id = (uintptr_t)list[0]; KASSERT(id != 0, ("%s: the head item (0) must always remain free.", __func__)); list[0] = list[id]; return (id); } static inline int xennet_rxidx(RING_IDX idx) { return idx & (NET_RX_RING_SIZE - 1); } static inline struct mbuf * xennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri) { int i = xennet_rxidx(ri); struct mbuf *m; m = np->rx_mbufs[i]; np->rx_mbufs[i] = NULL; return (m); } static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri) { int i = xennet_rxidx(ri); grant_ref_t ref = np->grant_rx_ref[i]; KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); np->grant_rx_ref[i] = GRANT_REF_INVALID; return ref; } #define IPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #ifdef INVARIANTS #define WPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #else #define WPRINTK(fmt, args...) #endif #ifdef DEBUG #define DPRINTK(fmt, args...) \ printf("[XEN] %s: " fmt, __func__, ##args) #else #define DPRINTK(fmt, args...) #endif /** * Read the 'mac' node at the given device's node in the store, and parse that * as colon-separated octets, placing result the given mac array. mac must be * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). * Return 0 on success, or errno on error. */ static int xen_net_read_mac(device_t dev, uint8_t mac[]) { int error, i; char *s, *e, *macstr; const char *path; path = xenbus_get_node(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); if (error == ENOENT) { /* * Deal with missing mac XenStore nodes on devices with * HVM emulation (the 'ioemu' configuration attribute) * enabled. * * The HVM emulator may execute in a stub device model * domain which lacks the permission, only given to Dom0, * to update the guest's XenStore tree. For this reason, * the HVM emulator doesn't even attempt to write the * front-side mac node, even when operating in Dom0. * However, there should always be a mac listed in the * backend tree. Fallback to this version if our query * of the front side XenStore location doesn't find * anything. */ path = xenbus_get_otherend_path(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); } if (error != 0) { xenbus_dev_fatal(dev, error, "parsing %s/mac", path); return (error); } s = macstr; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac[i] = strtoul(s, &e, 16); if (s == e || (e[0] != ':' && e[0] != 0)) { free(macstr, M_XENBUS); return (ENOENT); } s = &e[1]; } free(macstr, M_XENBUS); return (0); } /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. Switch to * Connected state. */ static int netfront_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vif")) { device_set_desc(dev, "Virtual Network Interface"); return (0); } return (ENXIO); } static int netfront_attach(device_t dev) { int err; err = create_netdev(dev); if (err) { xenbus_dev_fatal(dev, err, "creating netdev"); return (err); } #if __FreeBSD_version >= 700000 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW, + OID_AUTO, "enable_lro", CTLFLAG_RW, &xn_enable_lro, 0, "Large Receive Offload"); #endif return (0); } static int netfront_suspend(device_t dev) { struct netfront_info *info = device_get_softc(dev); XN_RX_LOCK(info); XN_TX_LOCK(info); netfront_carrier_off(info); XN_TX_UNLOCK(info); XN_RX_UNLOCK(info); return (0); } /** * We are reconnecting to the backend, due to a suspend/resume, or a backend * driver restart. We tear down our netif structure and recreate it, but * leave the device-layer structures intact so that this is transparent to the * rest of the kernel. */ static int netfront_resume(device_t dev) { struct netfront_info *info = device_get_softc(dev); netif_disconnect_backend(info); return (0); } /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(device_t dev, struct netfront_info *info) { const char *message; struct xs_transaction xst; const char *node = xenbus_get_node(dev); int err; err = xen_net_read_mac(dev, info->mac); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", node); goto out; } /* Create shared ring, alloc event channel. */ err = setup_device(dev, info); if (err) goto out; again: err = xs_transaction_start(&xst); if (err) { xenbus_dev_fatal(dev, err, "starting transaction"); goto destroy_ring; } err = xs_printf(xst, node, "tx-ring-ref","%u", info->tx_ring_ref); if (err) { message = "writing tx ring-ref"; goto abort_transaction; } err = xs_printf(xst, node, "rx-ring-ref","%u", info->rx_ring_ref); if (err) { message = "writing rx ring-ref"; goto abort_transaction; } err = xs_printf(xst, node, "event-channel", "%u", irq_to_evtchn_port(info->irq)); if (err) { message = "writing event-channel"; goto abort_transaction; } err = xs_printf(xst, node, "request-rx-copy", "%u", info->copying_receiver); if (err) { message = "writing request-rx-copy"; goto abort_transaction; } err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); if (err) { message = "writing feature-rx-notify"; goto abort_transaction; } err = xs_printf(xst, node, "feature-sg", "%d", 1); if (err) { message = "writing feature-sg"; goto abort_transaction; } #if __FreeBSD_version >= 700000 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); if (err) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } #endif err = xs_transaction_end(xst, 0); if (err) { if (err == EAGAIN) goto again; xenbus_dev_fatal(dev, err, "completing transaction"); goto destroy_ring; } return 0; abort_transaction: xs_transaction_end(xst, 1); xenbus_dev_fatal(dev, err, "%s", message); destroy_ring: netif_free(info); out: return err; } static int setup_device(device_t dev, struct netfront_info *info) { netif_tx_sring_t *txs; netif_rx_sring_t *rxs; int error; struct ifnet *ifp; ifp = info->xn_ifp; info->tx_ring_ref = GRANT_REF_INVALID; info->rx_ring_ref = GRANT_REF_INVALID; info->rx.sring = NULL; info->tx.sring = NULL; info->irq = 0; txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (!txs) { error = ENOMEM; xenbus_dev_fatal(dev, error, "allocating tx ring page"); goto fail; } SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref); if (error) goto fail; rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (!rxs) { error = ENOMEM; xenbus_dev_fatal(dev, error, "allocating rx ring page"); goto fail; } SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref); if (error) goto fail; error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); if (error) { xenbus_dev_fatal(dev, error, "bind_evtchn_to_irqhandler failed"); goto fail; } return (0); fail: netif_free(info); return (error); } #ifdef INET /** * If this interface has an ipv4 address, send an arp for it. This * helps to get the network going again after migrating hosts. */ static void netfront_send_fake_arp(device_t dev, struct netfront_info *info) { struct ifnet *ifp; struct ifaddr *ifa; ifp = info->xn_ifp; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) { arp_ifinit(ifp, ifa); } } } #endif /** * Callback received when the backend's state changes. */ static void netfront_backend_changed(device_t dev, XenbusState newstate) { struct netfront_info *sc = device_get_softc(dev); DPRINTK("newstate=%d\n", newstate); switch (newstate) { case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateConnected: case XenbusStateUnknown: case XenbusStateClosed: case XenbusStateReconfigured: case XenbusStateReconfiguring: break; case XenbusStateInitWait: if (xenbus_get_state(dev) != XenbusStateInitialising) break; if (network_connect(sc) != 0) break; xenbus_set_state(dev, XenbusStateConnected); #ifdef INET netfront_send_fake_arp(dev, sc); #endif break; case XenbusStateClosing: xenbus_set_state(dev, XenbusStateClosed); break; } } static void xn_free_rx_ring(struct netfront_info *sc) { #if 0 int i; for (i = 0; i < NET_RX_RING_SIZE; i++) { if (sc->xn_cdata.rx_mbufs[i] != NULL) { m_freem(sc->rx_mbufs[i]); sc->rx_mbufs[i] = NULL; } } sc->rx.rsp_cons = 0; sc->xn_rx_if->req_prod = 0; sc->xn_rx_if->event = sc->rx.rsp_cons ; #endif } static void xn_free_tx_ring(struct netfront_info *sc) { #if 0 int i; for (i = 0; i < NET_TX_RING_SIZE; i++) { if (sc->tx_mbufs[i] != NULL) { m_freem(sc->tx_mbufs[i]); sc->xn_cdata.xn_tx_chain[i] = NULL; } } return; #endif } /** * \brief Verify that there is sufficient space in the Tx ring * buffer for a maximally sized request to be enqueued. * * A transmit request requires a transmit descriptor for each packet * fragment, plus up to 2 entries for "options" (e.g. TSO). */ static inline int xn_tx_slot_available(struct netfront_info *np) { return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2)); } static void netif_release_tx_bufs(struct netfront_info *np) { int i; for (i = 1; i <= NET_TX_RING_SIZE; i++) { struct mbuf *m; m = np->tx_mbufs[i]; /* * We assume that no kernel addresses are * less than NET_TX_RING_SIZE. Any entry * in the table that is below this number * must be an index from free-list tracking. */ if (((uintptr_t)m) <= NET_TX_RING_SIZE) continue; gnttab_end_foreign_access_ref(np->grant_tx_ref[i]); gnttab_release_grant_reference(&np->gref_tx_head, np->grant_tx_ref[i]); np->grant_tx_ref[i] = GRANT_REF_INVALID; add_id_to_freelist(np->tx_mbufs, i); np->xn_cdata.xn_tx_chain_cnt--; if (np->xn_cdata.xn_tx_chain_cnt < 0) { panic("%s: tx_chain_cnt must be >= 0", __func__); } m_free(m); } } static void network_alloc_rx_buffers(struct netfront_info *sc) { int otherend_id = xenbus_get_otherend_id(sc->xbdev); unsigned short id; struct mbuf *m_new; int i, batch_target, notify; RING_IDX req_prod; struct xen_memory_reservation reservation; grant_ref_t ref; int nr_flips; netif_rx_request_t *req; vm_offset_t vaddr; u_long pfn; req_prod = sc->rx.req_prod_pvt; if (unlikely(sc->carrier == 0)) return; /* * Allocate mbufs greedily, even though we batch updates to the * receive ring. This creates a less bursty demand on the memory * allocator, and so should reduce the chance of failed allocation * requests both for ourself and for other kernel subsystems. * * Here we attempt to maintain rx_target buffers in flight, counting * buffers that we have yet to process in the receive ring. */ batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { printf("%s: MGETHDR failed\n", __func__); goto no_mbuf; } m_cljget(m_new, M_NOWAIT, MJUMPAGESIZE); if ((m_new->m_flags & M_EXT) == 0) { printf("%s: m_cljget failed\n", __func__); m_freem(m_new); no_mbuf: if (i != 0) goto refill; /* * XXX set timer */ break; } m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; /* queue the mbufs allocated */ mbufq_tail(&sc->xn_rx_batch, m_new); } /* * If we've allocated at least half of our target number of entries, * submit them to the backend - we have enough to make the overhead * of submission worthwhile. Otherwise wait for more mbufs and * request entries to become available. */ if (i < (sc->rx_target/2)) { if (req_prod >sc->rx.sring->req_prod) goto push; return; } /* * Double floating fill target if we risked having the backend * run out of empty buffers for receive traffic. We define "running * low" as having less than a fourth of our target buffers free * at the time we refilled the queue. */ if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) { sc->rx_target *= 2; if (sc->rx_target > sc->rx_max_target) sc->rx_target = sc->rx_max_target; } refill: for (nr_flips = i = 0; ; i++) { if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) break; m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); id = xennet_rxidx(req_prod + i); KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain")); sc->rx_mbufs[id] = m_new; ref = gnttab_claim_grant_reference(&sc->gref_rx_head); KASSERT(ref != GNTTAB_LIST_END, ("reserved grant references exhuasted")); sc->grant_rx_ref[id] = ref; vaddr = mtod(m_new, vm_offset_t); pfn = vtophys(vaddr) >> PAGE_SHIFT; req = RING_GET_REQUEST(&sc->rx, req_prod + i); if (sc->copying_receiver == 0) { gnttab_grant_foreign_transfer_ref(ref, otherend_id, pfn); sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn); if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Remove this page before passing * back to Xen. */ set_phys_to_machine(pfn, INVALID_P2M_ENTRY); MULTI_update_va_mapping(&sc->rx_mcl[i], vaddr, 0, 0); } nr_flips++; } else { gnttab_grant_foreign_access_ref(ref, otherend_id, PFNTOMFN(pfn), 0); } req->id = id; req->gref = ref; sc->rx_pfn_array[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; } KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed")); /* * We may have allocated buffers which have entries outstanding * in the page * update queue -- make sure we flush those first! */ PT_UPDATES_FLUSH(); if (nr_flips != 0) { #ifdef notyet /* Tell the ballon driver what is going on. */ balloon_update_driver_allowance(i); #endif set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array); reservation.nr_extents = i; reservation.extent_order = 0; reservation.address_bits = 0; reservation.domid = DOMID_SELF; if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* After all PTEs have been zapped, flush the TLB. */ sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; /* Give away a batch of pages. */ sc->rx_mcl[i].op = __HYPERVISOR_memory_op; sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation; sc->rx_mcl[i].args[1] = (u_long)&reservation; /* Zap PTEs and give away pages in one big multicall. */ (void)HYPERVISOR_multicall(sc->rx_mcl, i+1); if (unlikely(sc->rx_mcl[i].result != i || HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != i)) panic("%s: unable to reduce memory " "reservation\n", __func__); } } else { wmb(); } /* Above is a suitable barrier to ensure backend will see requests. */ sc->rx.req_prod_pvt = req_prod + i; push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); if (notify) notify_remote_via_irq(sc->irq); } static void xn_rxeof(struct netfront_info *np) { struct ifnet *ifp; #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6)) struct lro_ctrl *lro = &np->xn_lro; struct lro_entry *queued; #endif struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; multicall_entry_t *mcl; struct mbuf *m; struct mbuf_head rxq, errq; int err, pages_flipped = 0, work_to_do; do { XN_RX_LOCK_ASSERT(np); if (!netfront_carrier_ok(np)) return; mbufq_init(&errq); mbufq_init(&rxq); ifp = np->xn_ifp; rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ i = np->rx.rsp_cons; while ((i != rp)) { memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); memset(extras, 0, sizeof(rinfo.extras)); m = NULL; err = xennet_get_responses(np, &rinfo, rp, &i, &m, &pages_flipped); if (unlikely(err)) { if (m) mbufq_tail(&errq, m); np->stats.rx_errors++; continue; } m->m_pkthdr.rcvif = ifp; if ( rx->flags & NETRXF_data_validated ) { /* Tell the stack the checksums are okay */ /* * XXX this isn't necessarily the case - need to add * check */ m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } np->stats.rx_packets++; np->stats.rx_bytes += m->m_pkthdr.len; mbufq_tail(&rxq, m); np->rx.rsp_cons = i; } if (pages_flipped) { /* Some pages are no longer absent... */ #ifdef notyet balloon_update_driver_allowance(-pages_flipped); #endif /* Do all the remapping work, and M->P updates, in one big * hypercall. */ if (!!xen_feature(XENFEAT_auto_translated_physmap)) { mcl = np->rx_mcl + pages_flipped; mcl->op = __HYPERVISOR_mmu_update; mcl->args[0] = (u_long)np->rx_mmu; mcl->args[1] = pages_flipped; mcl->args[2] = 0; mcl->args[3] = DOMID_SELF; (void)HYPERVISOR_multicall(np->rx_mcl, pages_flipped + 1); } } while ((m = mbufq_dequeue(&errq))) m_freem(m); /* * Process all the mbufs after the remapping is complete. * Break the mbuf chain first though. */ while ((m = mbufq_dequeue(&rxq)) != NULL) { ifp->if_ipackets++; /* * Do we really need to drop the rx lock? */ XN_RX_UNLOCK(np); #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6)) /* Use LRO if possible */ if ((ifp->if_capenable & IFCAP_LRO) == 0 || lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { /* * If LRO fails, pass up to the stack * directly. */ (*ifp->if_input)(ifp, m); } #else (*ifp->if_input)(ifp, m); #endif XN_RX_LOCK(np); } np->rx.rsp_cons = i; #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6)) /* * Flush any outstanding LRO work */ while (!SLIST_EMPTY(&lro->lro_active)) { queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif #if 0 /* If we get a callback with very few responses, reduce fill target. */ /* NB. Note exponential increase, linear decrease. */ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) np->rx_target = np->rx_min_target; #endif network_alloc_rx_buffers(np); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do); } while (work_to_do); } static void xn_txeof(struct netfront_info *np) { RING_IDX i, prod; unsigned short id; struct ifnet *ifp; netif_tx_response_t *txr; struct mbuf *m; XN_TX_LOCK_ASSERT(np); if (!netfront_carrier_ok(np)) return; ifp = np->xn_ifp; do { prod = np->tx.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ for (i = np->tx.rsp_cons; i != prod; i++) { txr = RING_GET_RESPONSE(&np->tx, i); if (txr->status == NETIF_RSP_NULL) continue; if (txr->status != NETIF_RSP_OKAY) { printf("%s: WARNING: response is %d!\n", __func__, txr->status); } id = txr->id; m = np->tx_mbufs[id]; KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); KASSERT((uintptr_t)m > NET_TX_RING_SIZE, ("mbuf already on the free list, but we're " "trying to free it again!")); M_ASSERTVALID(m); /* * Increment packet count if this is the last * mbuf of the chain. */ if (!m->m_next) ifp->if_opackets++; if (unlikely(gnttab_query_foreign_access( np->grant_tx_ref[id]) != 0)) { panic("%s: grant id %u still in use by the " "backend", __func__, id); } gnttab_end_foreign_access_ref( np->grant_tx_ref[id]); gnttab_release_grant_reference( &np->gref_tx_head, np->grant_tx_ref[id]); np->grant_tx_ref[id] = GRANT_REF_INVALID; np->tx_mbufs[id] = NULL; add_id_to_freelist(np->tx_mbufs, id); np->xn_cdata.xn_tx_chain_cnt--; m_free(m); /* Only mark the queue active if we've freed up at least one slot to try */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } np->tx.rsp_cons = prod; /* * Set a new event, then check for race with update of * tx_cons. Note that it is essential to schedule a * callback, no matter how few buffers are pending. Even if * there is space in the transmit ring, higher layers may * be blocked because too much data is outstanding: in such * cases notification from Xen is likely to be the only kick * that we'll get. */ np->tx.sring->rsp_event = prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; mb(); } while (prod != np->tx.sring->rsp_prod); if (np->tx_full && ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { np->tx_full = 0; #if 0 if (np->user_state == UST_OPEN) netif_wake_queue(dev); #endif } } static void xn_intr(void *xsc) { struct netfront_info *np = xsc; struct ifnet *ifp = np->xn_ifp; #if 0 if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && likely(netfront_carrier_ok(np)) && ifp->if_drv_flags & IFF_DRV_RUNNING)) return; #endif if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) { XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); } XN_RX_LOCK(np); xn_rxeof(np); XN_RX_UNLOCK(np); if (ifp->if_drv_flags & IFF_DRV_RUNNING && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) xn_start(ifp); } static void xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, grant_ref_t ref) { int new = xennet_rxidx(np->rx.req_prod_pvt); KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL")); np->rx_mbufs[new] = m; np->grant_rx_ref[new] = ref; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; np->rx.req_prod_pvt++; } static int xennet_get_extras(struct netfront_info *np, struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) { struct netif_extra_info *extra; int err = 0; do { struct mbuf *m; grant_ref_t ref; if (unlikely(*cons + 1 == rp)) { #if 0 if (net_ratelimit()) WPRINTK("Missing extra info\n"); #endif err = EINVAL; break; } extra = (struct netif_extra_info *) RING_GET_RESPONSE(&np->rx, ++(*cons)); if (unlikely(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { #if 0 if (net_ratelimit()) WPRINTK("Invalid extra type: %d\n", extra->type); #endif err = EINVAL; } else { memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); } m = xennet_get_rx_mbuf(np, *cons); ref = xennet_get_rx_ref(np, *cons); xennet_move_rx_slot(np, m, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); return err; } static int xennet_get_responses(struct netfront_info *np, struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, struct mbuf **list, int *pages_flipped_p) { int pages_flipped = *pages_flipped_p; struct mmu_update *mmu; struct multicall_entry *mcl; struct netif_rx_response *rx = &rinfo->rx; struct netif_extra_info *extras = rinfo->extras; struct mbuf *m, *m0, *m_prev; grant_ref_t ref = xennet_get_rx_ref(np, *cons); RING_IDX ref_cons = *cons; int frags = 1; int err = 0; u_long ret; m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons); if (rx->flags & NETRXF_extra_info) { err = xennet_get_extras(np, extras, rp, cons); } if (m0 != NULL) { m0->m_pkthdr.len = 0; m0->m_next = NULL; } for (;;) { u_long mfn; #if 0 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", rx->status, rx->offset, frags); #endif if (unlikely(rx->status < 0 || rx->offset + rx->status > PAGE_SIZE)) { #if 0 if (net_ratelimit()) WPRINTK("rx->offset: %x, size: %u\n", rx->offset, rx->status); #endif xennet_move_rx_slot(np, m, ref); if (m0 == m) m0 = NULL; m = NULL; err = EINVAL; goto next_skip_queue; } /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ if (ref == GRANT_REF_INVALID) { #if 0 if (net_ratelimit()) WPRINTK("Bad rx response id %d.\n", rx->id); #endif printf("%s: Bad rx response id %d.\n", __func__,rx->id); err = EINVAL; goto next; } if (!np->copying_receiver) { /* Memory pressure, insufficient buffer * headroom, ... */ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", rx->id, rx->status); xennet_move_rx_slot(np, m, ref); err = ENOMEM; goto next; } if (!xen_feature( XENFEAT_auto_translated_physmap)) { /* Remap the page. */ void *vaddr = mtod(m, void *); uint32_t pfn; mcl = np->rx_mcl + pages_flipped; mmu = np->rx_mmu + pages_flipped; MULTI_update_va_mapping(mcl, (u_long)vaddr, (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW | PG_V | PG_M | PG_A, 0); pfn = (uintptr_t)m->m_ext.ext_arg1; mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = pfn; set_phys_to_machine(pfn, mfn); } pages_flipped++; } else { ret = gnttab_end_foreign_access_ref(ref); KASSERT(ret, ("ret != 0")); } gnttab_release_grant_reference(&np->gref_rx_head, ref); next: if (m == NULL) break; m->m_len = rx->status; m->m_data += rx->offset; m0->m_pkthdr.len += rx->status; next_skip_queue: if (!(rx->flags & NETRXF_more_data)) break; if (*cons + frags == rp) { if (net_ratelimit()) WPRINTK("Need more frags\n"); err = ENOENT; printf("%s: cons %u frags %u rp %u, not enough frags\n", __func__, *cons, frags, rp); break; } /* * Note that m can be NULL, if rx->status < 0 or if * rx->offset + rx->status > PAGE_SIZE above. */ m_prev = m; rx = RING_GET_RESPONSE(&np->rx, *cons + frags); m = xennet_get_rx_mbuf(np, *cons + frags); /* * m_prev == NULL can happen if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m_prev != NULL) m_prev->m_next = m; /* * m0 can be NULL if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m0 == NULL) m0 = m; m->m_next = NULL; ref = xennet_get_rx_ref(np, *cons + frags); ref_cons = *cons + frags; frags++; } *list = m0; *cons += frags; *pages_flipped_p = pages_flipped; return (err); } static void xn_tick_locked(struct netfront_info *sc) { XN_RX_LOCK_ASSERT(sc); callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); /* XXX placeholder for printing debug information */ } static void xn_tick(void *xsc) { struct netfront_info *sc; sc = xsc; XN_RX_LOCK(sc); xn_tick_locked(sc); XN_RX_UNLOCK(sc); } /** * \brief Count the number of fragments in an mbuf chain. * * Surprisingly, there isn't an M* macro for this. */ static inline int xn_count_frags(struct mbuf *m) { int nfrags; for (nfrags = 0; m != NULL; m = m->m_next) nfrags++; return (nfrags); } /** * Given an mbuf chain, make sure we have enough room and then push * it onto the transmit ring. */ static int xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) { struct ifnet *ifp; struct mbuf *m; u_int nfrags; netif_extra_info_t *extra; int otherend_id; ifp = sc->xn_ifp; /** * Defragment the mbuf if necessary. */ nfrags = xn_count_frags(m_head); /* * Check to see whether this request is longer than netback * can handle, and try to defrag it. */ /** * It is a bit lame, but the netback driver in Linux can't * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of * the Linux network stack. */ if (nfrags > sc->maxfrags) { m = m_defrag(m_head, M_NOWAIT); if (!m) { /* * Defrag failed, so free the mbuf and * therefore drop the packet. */ m_freem(m_head); return (EMSGSIZE); } m_head = m; } /* Determine how many fragments now exist */ nfrags = xn_count_frags(m_head); /* * Check to see whether the defragmented packet has too many * segments for the Linux netback driver. */ /** * The FreeBSD TCP stack, with TSO enabled, can produce a chain * of mbufs longer than Linux can handle. Make sure we don't * pass a too-long chain over to the other side by dropping the * packet. It doesn't look like there is currently a way to * tell the TCP stack to generate a shorter chain of packets. */ if (nfrags > MAX_TX_REQ_FRAGS) { #ifdef DEBUG printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " "won't be able to handle it, dropping\n", __func__, nfrags, MAX_TX_REQ_FRAGS); #endif m_freem(m_head); return (EMSGSIZE); } /* * This check should be redundant. We've already verified that we * have enough slots in the ring to handle a packet of maximum * size, and that our packet is less than the maximum size. Keep * it in here as an assert for now just to make certain that * xn_tx_chain_cnt is accurate. */ KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE, ("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt, (int) nfrags, (int) NET_TX_RING_SIZE)); /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; extra = NULL; otherend_id = xenbus_get_otherend_id(sc->xbdev); for (m = m_head; m; m = m->m_next) { netif_tx_request_t *tx; uintptr_t id; grant_ref_t ref; u_long mfn; /* XXX Wrong type? */ tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt); id = get_id_from_freelist(sc->tx_mbufs); if (id == 0) panic("%s: was allocated the freelist head!\n", __func__); sc->xn_cdata.xn_tx_chain_cnt++; if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE) panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", __func__); sc->tx_mbufs[id] = m; tx->id = id; ref = gnttab_claim_grant_reference(&sc->gref_tx_head); KASSERT((short)ref >= 0, ("Negative ref")); mfn = virt_to_mfn(mtod(m, vm_offset_t)); gnttab_grant_foreign_access_ref(ref, otherend_id, mfn, GNTMAP_readonly); tx->gref = sc->grant_tx_ref[id] = ref; tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); tx->flags = 0; if (m == m_head) { /* * The first fragment has the entire packet * size, subsequent fragments have just the * fragment size. The backend works out the * true size of the first fragment by * subtracting the sizes of the other * fragments. */ tx->size = m->m_pkthdr.len; /* * The first fragment contains the checksum flags * and is optionally followed by extra data for * TSO etc. */ /** * CSUM_TSO requires checksum offloading. * Some versions of FreeBSD fail to * set CSUM_TCP in the CSUM_TSO case, * so we have to test for CSUM_TSO * explicitly. */ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) { tx->flags |= (NETTXF_csum_blank | NETTXF_data_validated); } #if __FreeBSD_version >= 700000 if (m->m_pkthdr.csum_flags & CSUM_TSO) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&sc->tx, ++sc->tx.req_prod_pvt); tx->flags |= NETTXF_extra_info; gso->u.gso.size = m->m_pkthdr.tso_segsz; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; } #endif } else { tx->size = m->m_len; } if (m->m_next) tx->flags |= NETTXF_more_data; sc->tx.req_prod_pvt++; } BPF_MTAP(ifp, m_head); sc->stats.tx_bytes += m_head->m_pkthdr.len; sc->stats.tx_packets++; return (0); } static void xn_start_locked(struct ifnet *ifp) { struct netfront_info *sc; struct mbuf *m_head; int notify; sc = ifp->if_softc; if (!netfront_carrier_ok(sc)) return; /* * While we have enough transmit slots available for at least one * maximum-sized packet, pull mbufs off the queue and put them on * the transmit ring. */ while (xn_tx_slot_available(sc)) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (xn_assemble_tx_request(sc, m_head) != 0) break; } RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); if (notify) notify_remote_via_irq(sc->irq); if (RING_FULL(&sc->tx)) { sc->tx_full = 1; #if 0 netif_stop_queue(dev); #endif } } static void xn_start(struct ifnet *ifp) { struct netfront_info *sc; sc = ifp->if_softc; XN_TX_LOCK(sc); xn_start_locked(ifp); XN_TX_UNLOCK(sc); } /* equivalent of network_open() in Linux */ static void xn_ifinit_locked(struct netfront_info *sc) { struct ifnet *ifp; XN_LOCK_ASSERT(sc); ifp = sc->xn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; xn_stop(sc); network_alloc_rx_buffers(sc); sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); } static void xn_ifinit(void *xsc) { struct netfront_info *sc = xsc; XN_LOCK(sc); xn_ifinit_locked(sc); XN_UNLOCK(sc); } static int xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct netfront_info *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif int mask, error = 0; switch(cmd) { case SIOCSIFADDR: case SIOCGIFADDR: #ifdef INET XN_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) xn_ifinit_locked(sc); arp_ifinit(ifp, ifa); XN_UNLOCK(sc); } else { XN_UNLOCK(sc); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFMTU: /* XXX can we alter the MTU on a VN ?*/ #ifdef notyet if (ifr->ifr_mtu > XN_JUMBO_MTU) error = EINVAL; else #endif { ifp->if_mtu = ifr->ifr_mtu; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xn_ifinit(sc); } break; case SIOCSIFFLAGS: XN_LOCK(sc); if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* No promiscuous mode with Xen */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->xn_if_flags & IFF_PROMISC)) { XN_SETBIT(sc, XN_RX_MODE, XN_RXMODE_RX_PROMISC); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->xn_if_flags & IFF_PROMISC) { XN_CLRBIT(sc, XN_RX_MODE, XN_RXMODE_RX_PROMISC); } else #endif xn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { xn_stop(sc); } } sc->xn_if_flags = ifp->if_flags; XN_UNLOCK(sc); error = 0; break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); } } if (mask & IFCAP_RXCSUM) { ifp->if_capenable ^= IFCAP_RXCSUM; } #if __FreeBSD_version >= 700000 if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } else { IPRINTK("Xen requires tx checksum offload" " be enabled to use TSO\n"); error = EINVAL; } } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; } #endif error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet if (ifp->if_drv_flags & IFF_DRV_RUNNING) { XN_LOCK(sc); xn_setmulti(sc); XN_UNLOCK(sc); error = 0; } #endif /* FALLTHROUGH */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); } return (error); } static void xn_stop(struct netfront_info *sc) { struct ifnet *ifp; XN_LOCK_ASSERT(sc); ifp = sc->xn_ifp; callout_stop(&sc->xn_stat_ch); xn_free_rx_ring(sc); xn_free_tx_ring(sc); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } /* START of Xenolinux helper functions adapted to FreeBSD */ int network_connect(struct netfront_info *np) { int i, requeue_idx, error; grant_ref_t ref; netif_rx_request_t *req; u_int feature_rx_copy, feature_rx_flip; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-copy", NULL, "%u", &feature_rx_copy); if (error) feature_rx_copy = 0; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-flip", NULL, "%u", &feature_rx_flip); if (error) feature_rx_flip = 1; /* * Copy packets on receive path if: * (a) This was requested by user, and the backend supports it; or * (b) Flipping was requested, but this is unsupported by the backend. */ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || (MODPARM_rx_flip && !feature_rx_flip)); /* Recovery procedure: */ error = talk_to_backend(np->xbdev, np); if (error) return (error); /* Step 1: Reinitialise variables. */ xn_query_features(np); xn_configure_features(np); netif_release_tx_bufs(np); /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { struct mbuf *m; u_long pfn; if (np->rx_mbufs[i] == NULL) continue; m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i); ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); req = RING_GET_REQUEST(&np->rx, requeue_idx); pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; if (!np->copying_receiver) { gnttab_grant_foreign_transfer_ref(ref, xenbus_get_otherend_id(np->xbdev), pfn); } else { gnttab_grant_foreign_access_ref(ref, xenbus_get_otherend_id(np->xbdev), PFNTOMFN(pfn), 0); } req->gref = ref; req->id = requeue_idx; requeue_idx++; } np->rx.req_prod_pvt = requeue_idx; /* Step 3: All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. */ netfront_carrier_on(np); notify_remote_via_irq(np->irq); XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); network_alloc_rx_buffers(np); return (0); } static void xn_query_features(struct netfront_info *np) { int val; device_printf(np->xbdev, "backend features:"); if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-sg", NULL, "%d", &val) < 0) val = 0; np->maxfrags = 1; if (val) { np->maxfrags = MAX_TX_REQ_FRAGS; printf(" feature-sg"); } if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-gso-tcpv4", NULL, "%d", &val) < 0) val = 0; np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); if (val) { np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; printf(" feature-gso-tcp4"); } printf("\n"); } static int xn_configure_features(struct netfront_info *np) { int err; err = 0; #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6)) if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0) tcp_lro_free(&np->xn_lro); #endif np->xn_ifp->if_capenable = np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4); np->xn_ifp->if_hwassist &= ~CSUM_TSO; #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6)) if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) { err = tcp_lro_init(&np->xn_lro); if (err) { device_printf(np->xbdev, "LRO initialization failed\n"); } else { np->xn_lro.ifp = np->xn_ifp; np->xn_ifp->if_capenable |= IFCAP_LRO; } } if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) { np->xn_ifp->if_capenable |= IFCAP_TSO4; np->xn_ifp->if_hwassist |= CSUM_TSO; } #endif return (err); } /** Create a network device. * @param handle device handle */ int create_netdev(device_t dev) { int i; struct netfront_info *np; int err; struct ifnet *ifp; np = device_get_softc(dev); np->xbdev = dev; XN_LOCK_INIT(np, xennetif); ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); np->rx_target = RX_MIN_TARGET; np->rx_min_target = RX_MIN_TARGET; np->rx_max_target = RX_MAX_TARGET; /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ for (i = 0; i <= NET_TX_RING_SIZE; i++) { np->tx_mbufs[i] = (void *) ((u_long) i+1); np->grant_tx_ref[i] = GRANT_REF_INVALID; } np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0; for (i = 0; i <= NET_RX_RING_SIZE; i++) { np->rx_mbufs[i] = NULL; np->grant_rx_ref[i] = GRANT_REF_INVALID; } /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, &np->gref_tx_head) != 0) { IPRINTK("#### netfront can't alloc tx grant refs\n"); err = ENOMEM; goto exit; } /* A grant for every rx ring slot */ if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) != 0) { WPRINTK("#### netfront can't alloc rx grant refs\n"); gnttab_free_grant_references(np->gref_tx_head); err = ENOMEM; goto exit; } err = xen_net_read_mac(dev, np->mac); if (err) goto out; /* Set up ifnet structure */ ifp = np->xn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = np; if_initname(ifp, "xn", device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xn_ioctl; ifp->if_output = ether_output; ifp->if_start = xn_start; #ifdef notyet ifp->if_watchdog = xn_watchdog; #endif ifp->if_init = xn_ifinit; ifp->if_mtu = ETHERMTU; ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; ifp->if_hwassist = XN_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_hw_tsomax = NF_TSO_MAXBURST; ether_ifattach(ifp, np->mac); callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); netfront_carrier_off(np); return (0); exit: gnttab_free_grant_references(np->gref_tx_head); out: return (err); } /** * Handle the change of state of the backend to Closing. We must delete our * device-layer structures now, to ensure that writes are flushed through to * the backend. Once is this done, we can switch to Closed in * acknowledgement. */ #if 0 static void netfront_closing(device_t dev) { #if 0 struct netfront_info *info = dev->dev_driver_data; DPRINTK("netfront_closing: %s removed\n", dev->nodename); close_netdev(info); #endif xenbus_switch_state(dev, XenbusStateClosed); } #endif static int netfront_detach(device_t dev) { struct netfront_info *info = device_get_softc(dev); DPRINTK("%s\n", xenbus_get_node(dev)); netif_free(info); return 0; } static void netif_free(struct netfront_info *info) { XN_LOCK(info); xn_stop(info); XN_UNLOCK(info); callout_drain(&info->xn_stat_ch); netif_disconnect_backend(info); if (info->xn_ifp != NULL) { ether_ifdetach(info->xn_ifp); if_free(info->xn_ifp); info->xn_ifp = NULL; } ifmedia_removeall(&info->sc_media); } static void netif_disconnect_backend(struct netfront_info *info) { XN_RX_LOCK(info); XN_TX_LOCK(info); netfront_carrier_off(info); XN_TX_UNLOCK(info); XN_RX_UNLOCK(info); free_ring(&info->tx_ring_ref, &info->tx.sring); free_ring(&info->rx_ring_ref, &info->rx.sring); if (info->irq) unbind_from_irqhandler(info->irq); info->irq = 0; } static void free_ring(int *ref, void *ring_ptr_ref) { void **ring_ptr_ptr = ring_ptr_ref; if (*ref != GRANT_REF_INVALID) { /* This API frees the associated storage. */ gnttab_end_foreign_access(*ref, *ring_ptr_ptr); *ref = GRANT_REF_INVALID; } *ring_ptr_ptr = NULL; } static int xn_ifmedia_upd(struct ifnet *ifp) { return (0); } static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /* ** Driver registration ** */ static device_method_t netfront_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netfront_probe), DEVMETHOD(device_attach, netfront_attach), DEVMETHOD(device_detach, netfront_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, netfront_suspend), DEVMETHOD(device_resume, netfront_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), DEVMETHOD_END }; static driver_t netfront_driver = { "xn", netfront_methods, sizeof(struct netfront_info), }; devclass_t netfront_devclass; DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, NULL); Index: stable/9/sys/dev =================================================================== --- stable/9/sys/dev (revision 273911) +++ stable/9/sys/dev (revision 273912) Property changes on: stable/9/sys/dev ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/dev:r263710,273377-273378,273423,273455 Index: stable/9/sys/fs/devfs/devfs_devs.c =================================================================== --- stable/9/sys/fs/devfs/devfs_devs.c (revision 273911) +++ stable/9/sys/fs/devfs/devfs_devs.c (revision 273912) @@ -1,709 +1,709 @@ /*- * Copyright (c) 2000,2004 * Poul-Henning Kamp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36 * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The one true (but secret) list of active devices in the system. * Locked by dev_lock()/devmtx */ struct cdev_priv_list cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list); struct unrhdr *devfs_inos; static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2"); static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3"); static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage"); static SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem"); static unsigned devfs_generation; SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD, &devfs_generation, 0, "DEVFS generation number"); unsigned devfs_rule_depth = 1; SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW, &devfs_rule_depth, 0, "Max depth of ruleset include"); /* * Helper sysctl for devname(3). We're given a dev_t and return the * name, if any, registered by the device driver. */ static int sysctl_devname(SYSCTL_HANDLER_ARGS) { int error; dev_t ud; struct cdev_priv *cdp; struct cdev *dev; error = SYSCTL_IN(req, &ud, sizeof (ud)); if (error) return (error); if (ud == NODEV) return (EINVAL); dev = NULL; dev_lock(); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) if (cdp->cdp_inode == ud) { dev = &cdp->cdp_c; dev_refl(dev); break; } dev_unlock(); if (dev == NULL) return (ENOENT); error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1); dev_rel(dev); return (error); } SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE, NULL, 0, sysctl_devname, "", "devname(3) handler"); SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD, - 0, sizeof(struct cdev), "sizeof(struct cdev)"); + SYSCTL_NULL_INT_PTR, sizeof(struct cdev), "sizeof(struct cdev)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD, - 0, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); + SYSCTL_NULL_INT_PTR, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); struct cdev * devfs_alloc(int flags) { struct cdev_priv *cdp; struct cdev *cdev; struct timespec ts; cdp = malloc(sizeof *cdp, M_CDEVP, M_USE_RESERVE | M_ZERO | ((flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK)); if (cdp == NULL) return (NULL); cdp->cdp_dirents = &cdp->cdp_dirent0; cdp->cdp_dirent0 = NULL; cdp->cdp_maxdirent = 0; cdp->cdp_inode = 0; cdev = &cdp->cdp_c; cdev->si_name = cdev->__si_namebuf; LIST_INIT(&cdev->si_children); vfs_timestamp(&ts); cdev->si_atime = cdev->si_mtime = cdev->si_ctime = ts; cdev->si_cred = NULL; return (cdev); } int devfs_dev_exists(const char *name) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { if ((cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (devfs_pathpath(cdp->cdp_c.si_name, name) != 0) return (1); if (devfs_pathpath(name, cdp->cdp_c.si_name) != 0) return (1); } if (devfs_dir_find(name) != 0) return (1); return (0); } void devfs_free(struct cdev *cdev) { struct cdev_priv *cdp; cdp = cdev2priv(cdev); if (cdev->si_cred != NULL) crfree(cdev->si_cred); devfs_free_cdp_inode(cdp->cdp_inode); if (cdp->cdp_maxdirent > 0) free(cdp->cdp_dirents, M_DEVFS2); free(cdp, M_CDEVP); } struct devfs_dirent * devfs_find(struct devfs_dirent *dd, const char *name, int namelen, int type) { struct devfs_dirent *de; TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (namelen != de->de_dirent->d_namlen) continue; if (type != 0 && type != de->de_dirent->d_type) continue; if (bcmp(name, de->de_dirent->d_name, namelen) != 0) continue; break; } KASSERT(de == NULL || (de->de_flags & DE_DOOMED) == 0, ("devfs_find: returning a doomed entry")); return (de); } struct devfs_dirent * devfs_newdirent(char *name, int namelen) { int i; struct devfs_dirent *de; struct dirent d; d.d_namlen = namelen; i = sizeof (*de) + GENERIC_DIRSIZ(&d); de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO); de->de_dirent = (struct dirent *)(de + 1); de->de_dirent->d_namlen = namelen; de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d); bcopy(name, de->de_dirent->d_name, namelen); de->de_dirent->d_name[namelen] = '\0'; vfs_timestamp(&de->de_ctime); de->de_mtime = de->de_atime = de->de_ctime; de->de_links = 1; de->de_holdcnt = 1; #ifdef MAC mac_devfs_init(de); #endif return (de); } struct devfs_dirent * devfs_parent_dirent(struct devfs_dirent *de) { if (de->de_dirent->d_type != DT_DIR) return (de->de_dir); if (de->de_flags & (DE_DOT | DE_DOTDOT)) return (NULL); de = TAILQ_FIRST(&de->de_dlist); /* "." */ if (de == NULL) return (NULL); de = TAILQ_NEXT(de, de_list); /* ".." */ if (de == NULL) return (NULL); return (de->de_dir); } struct devfs_dirent * devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; /* Create the new directory */ dd = devfs_newdirent(name, namelen); TAILQ_INIT(&dd->de_dlist); dd->de_dirent->d_type = DT_DIR; dd->de_mode = 0555; dd->de_links = 2; dd->de_dir = dd; if (inode != 0) dd->de_inode = inode; else dd->de_inode = alloc_unr(devfs_inos); /* * "." and ".." are always the two first entries in the * de_dlist list. * * Create the "." entry in the new directory. */ de = devfs_newdirent(".", 1); de->de_dirent->d_type = DT_DIR; de->de_flags |= DE_DOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); de->de_dir = dd; /* Create the ".." entry in the new directory. */ de = devfs_newdirent("..", 2); de->de_dirent->d_type = DT_DIR; de->de_flags |= DE_DOTDOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); if (dotdot == NULL) { de->de_dir = dd; } else { de->de_dir = dotdot; sx_assert(&dmp->dm_lock, SX_XLOCKED); TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list); dotdot->de_links++; devfs_rules_apply(dmp, dd); } #ifdef MAC mac_devfs_create_directory(dmp->dm_mount, name, namelen, dd); #endif return (dd); } void devfs_dirent_free(struct devfs_dirent *de) { free(de, M_DEVFS3); } /* * Removes a directory if it is empty. Also empty parent directories are * removed recursively. */ static void devfs_rmdir_empty(struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_dirent *dd, *de_dot, *de_dotdot; sx_assert(&dm->dm_lock, SX_XLOCKED); for (;;) { KASSERT(de->de_dirent->d_type == DT_DIR, ("devfs_rmdir_empty: de is not a directory")); if ((de->de_flags & DE_DOOMED) != 0 || de == dm->dm_rootdir) return; de_dot = TAILQ_FIRST(&de->de_dlist); KASSERT(de_dot != NULL, ("devfs_rmdir_empty: . missing")); de_dotdot = TAILQ_NEXT(de_dot, de_list); KASSERT(de_dotdot != NULL, ("devfs_rmdir_empty: .. missing")); /* Return if the directory is not empty. */ if (TAILQ_NEXT(de_dotdot, de_list) != NULL) return; dd = devfs_parent_dirent(de); KASSERT(dd != NULL, ("devfs_rmdir_empty: NULL dd")); TAILQ_REMOVE(&de->de_dlist, de_dot, de_list); TAILQ_REMOVE(&de->de_dlist, de_dotdot, de_list); TAILQ_REMOVE(&dd->de_dlist, de, de_list); DEVFS_DE_HOLD(dd); devfs_delete(dm, de, DEVFS_DEL_NORECURSE); devfs_delete(dm, de_dot, DEVFS_DEL_NORECURSE); devfs_delete(dm, de_dotdot, DEVFS_DEL_NORECURSE); if (DEVFS_DE_DROP(dd)) { devfs_dirent_free(dd); return; } de = dd; } } /* * The caller needs to hold the dm for the duration of the call since * dm->dm_lock may be temporary dropped. */ void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int flags) { struct devfs_dirent *dd; struct vnode *vp; KASSERT((de->de_flags & DE_DOOMED) == 0, ("devfs_delete doomed dirent")); de->de_flags |= DE_DOOMED; if ((flags & DEVFS_DEL_NORECURSE) == 0) { dd = devfs_parent_dirent(de); if (dd != NULL) DEVFS_DE_HOLD(dd); if (de->de_flags & DE_USER) { KASSERT(dd != NULL, ("devfs_delete: NULL dd")); devfs_dir_unref_de(dm, dd); } } else dd = NULL; mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); vholdl(vp); sx_unlock(&dm->dm_lock); if ((flags & DEVFS_DEL_VNLOCKED) == 0) vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); else VI_UNLOCK(vp); vgone(vp); if ((flags & DEVFS_DEL_VNLOCKED) == 0) VOP_UNLOCK(vp, 0); vdrop(vp); sx_xlock(&dm->dm_lock); } else mtx_unlock(&devfs_de_interlock); if (de->de_symlink) { free(de->de_symlink, M_DEVFS); de->de_symlink = NULL; } #ifdef MAC mac_devfs_destroy(de); #endif if (de->de_inode > DEVFS_ROOTINO) { devfs_free_cdp_inode(de->de_inode); de->de_inode = 0; } if (DEVFS_DE_DROP(de)) devfs_dirent_free(de); if (dd != NULL) { if (DEVFS_DE_DROP(dd)) devfs_dirent_free(dd); else devfs_rmdir_empty(dm, dd); } } /* * Called on unmount. * Recursively removes the entire tree. * The caller needs to hold the dm for the duration of the call. */ static void devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd) { struct devfs_dirent *de; sx_assert(&dm->dm_lock, SX_XLOCKED); DEVFS_DE_HOLD(dd); for (;;) { /* * Use TAILQ_LAST() to remove "." and ".." last. * We might need ".." to resolve a path in * devfs_dir_unref_de(). */ de = TAILQ_LAST(&dd->de_dlist, devfs_dlist_head); if (de == NULL) break; TAILQ_REMOVE(&dd->de_dlist, de, de_list); if (de->de_flags & DE_USER) devfs_dir_unref_de(dm, dd); if (de->de_flags & (DE_DOT | DE_DOTDOT)) devfs_delete(dm, de, DEVFS_DEL_NORECURSE); else if (de->de_dirent->d_type == DT_DIR) devfs_purge(dm, de); else devfs_delete(dm, de, DEVFS_DEL_NORECURSE); } if (DEVFS_DE_DROP(dd)) devfs_dirent_free(dd); else if ((dd->de_flags & DE_DOOMED) == 0) devfs_delete(dm, dd, DEVFS_DEL_NORECURSE); } /* * Each cdev_priv has an array of pointers to devfs_dirent which is indexed * by the mount points dm_idx. * This function extends the array when necessary, taking into account that * the default array is 1 element and not malloc'ed. */ static void devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm) { struct devfs_dirent **dep; int siz; siz = (dm->dm_idx + 1) * sizeof *dep; dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO); dev_lock(); if (dm->dm_idx <= cdp->cdp_maxdirent) { /* We got raced */ dev_unlock(); free(dep, M_DEVFS2); return; } memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep); if (cdp->cdp_maxdirent > 0) free(cdp->cdp_dirents, M_DEVFS2); cdp->cdp_dirents = dep; /* * XXX: if malloc told us how much we actually got this could * XXX: be optimized. */ cdp->cdp_maxdirent = dm->dm_idx; dev_unlock(); } /* * The caller needs to hold the dm for the duration of the call. */ static int devfs_populate_loop(struct devfs_mount *dm, int cleanup) { struct cdev_priv *cdp; struct devfs_dirent *de; struct devfs_dirent *dd; struct cdev *pdev; int de_flags, j; char *q, *s; sx_assert(&dm->dm_lock, SX_XLOCKED); dev_lock(); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents")); /* * If we are unmounting, or the device has been destroyed, * clean up our dirent. */ if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) && dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; cdp->cdp_dirents[dm->dm_idx] = NULL; KASSERT(cdp == de->de_cdp, ("%s %d %s %p %p", __func__, __LINE__, cdp->cdp_c.si_name, cdp, de->de_cdp)); KASSERT(de->de_dir != NULL, ("Null de->de_dir")); dev_unlock(); TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); de->de_cdp = NULL; de->de_inode = 0; devfs_delete(dm, de, 0); dev_lock(); cdp->cdp_inuse--; dev_unlock(); return (1); } /* * GC any lingering devices */ if (!(cdp->cdp_flags & CDP_ACTIVE)) { if (cdp->cdp_inuse > 0) continue; TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); return (1); } /* * Don't create any new dirents if we are unmounting */ if (cleanup) continue; KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); if (dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; KASSERT(cdp == de->de_cdp, ("inconsistent cdp")); continue; } cdp->cdp_inuse++; dev_unlock(); if (dm->dm_idx > cdp->cdp_maxdirent) devfs_metoo(cdp, dm); dd = dm->dm_rootdir; s = cdp->cdp_c.si_name; for (;;) { for (q = s; *q != '/' && *q != '\0'; q++) continue; if (*q != '/') break; de = devfs_find(dd, s, q - s, 0); if (de == NULL) de = devfs_vmkdir(dm, s, q - s, dd, 0); else if (de->de_dirent->d_type == DT_LNK) { de = devfs_find(dd, s, q - s, DT_DIR); if (de == NULL) de = devfs_vmkdir(dm, s, q - s, dd, 0); de->de_flags |= DE_COVERED; } s = q + 1; dd = de; KASSERT(dd->de_dirent->d_type == DT_DIR && (dd->de_flags & (DE_DOT | DE_DOTDOT)) == 0, ("%s: invalid directory (si_name=%s)", __func__, cdp->cdp_c.si_name)); } de_flags = 0; de = devfs_find(dd, s, q - s, DT_LNK); if (de != NULL) de_flags |= DE_COVERED; de = devfs_newdirent(s, q - s); if (cdp->cdp_c.si_flags & SI_ALIAS) { de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_dirent->d_type = DT_LNK; pdev = cdp->cdp_c.si_parent; j = strlen(pdev->si_name) + 1; de->de_symlink = malloc(j, M_DEVFS, M_WAITOK); bcopy(pdev->si_name, de->de_symlink, j); } else { de->de_uid = cdp->cdp_c.si_uid; de->de_gid = cdp->cdp_c.si_gid; de->de_mode = cdp->cdp_c.si_mode; de->de_dirent->d_type = DT_CHR; } de->de_flags |= de_flags; de->de_inode = cdp->cdp_inode; de->de_cdp = cdp; #ifdef MAC mac_devfs_create_device(cdp->cdp_c.si_cred, dm->dm_mount, &cdp->cdp_c, de); #endif de->de_dir = dd; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); devfs_rules_apply(dm, de); dev_lock(); /* XXX: could check that cdp is still active here */ KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL, ("%s %d\n", __func__, __LINE__)); cdp->cdp_dirents[dm->dm_idx] = de; KASSERT(de->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); dev_unlock(); return (1); } dev_unlock(); return (0); } /* * The caller needs to hold the dm for the duration of the call. */ void devfs_populate(struct devfs_mount *dm) { unsigned gen; sx_assert(&dm->dm_lock, SX_XLOCKED); gen = devfs_generation; if (dm->dm_generation == gen) return; while (devfs_populate_loop(dm, 0)) continue; dm->dm_generation = gen; } /* * The caller needs to hold the dm for the duration of the call. */ void devfs_cleanup(struct devfs_mount *dm) { sx_assert(&dm->dm_lock, SX_XLOCKED); while (devfs_populate_loop(dm, 1)) continue; devfs_purge(dm, dm->dm_rootdir); } /* * devfs_create() and devfs_destroy() are called from kern_conf.c and * in both cases the devlock() mutex is held, so no further locking * is necesary and no sleeping allowed. */ void devfs_create(struct cdev *dev) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); cdp = cdev2priv(dev); cdp->cdp_flags |= CDP_ACTIVE; cdp->cdp_inode = alloc_unrl(devfs_inos); dev_refl(dev); TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list); devfs_generation++; } void devfs_destroy(struct cdev *dev) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); cdp = cdev2priv(dev); cdp->cdp_flags &= ~CDP_ACTIVE; devfs_generation++; } ino_t devfs_alloc_cdp_inode(void) { return (alloc_unr(devfs_inos)); } void devfs_free_cdp_inode(ino_t ino) { if (ino > 0) free_unr(devfs_inos, ino); } static void devfs_devs_init(void *junk __unused) { devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); } SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); Index: stable/9/sys/fs/nfsserver/nfs_nfsdkrpc.c =================================================================== --- stable/9/sys/fs/nfsserver/nfs_nfsdkrpc.c (revision 273911) +++ stable/9/sys/fs/nfsserver/nfs_nfsdkrpc.c (revision 273912) @@ -1,519 +1,522 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); +#include +#include + #include "opt_inet6.h" #include "opt_kgssapi.h" #include #include #include #include #include #include NFSDLOCKMUTEX; NFSV4ROOTLOCKMUTEX; struct nfsv4lock nfsd_suspend_lock; /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ int newnfs_nfsv3_procid[NFS_V3NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, }; SYSCTL_DECL(_vfs_nfsd); SVCPOOL *nfsrvd_pool; static int nfs_privport = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "Only allow clients using a privileged port for NFSv2 and 3"); static int nfs_minvers = NFS_VER2; SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, CTLFLAG_RW, &nfs_minvers, 0, "The lowest version of NFS handled by the server"); static int nfs_maxvers = NFS_VER4; SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, CTLFLAG_RW, &nfs_maxvers, 0, "The highest version of NFS handled by the server"); static int nfs_proc(struct nfsrv_descript *, u_int32_t, SVCXPRT *xprt, struct nfsrvcache **); extern u_long sb_max_adj; extern int newnfs_numnfsd; extern struct proc *nfsd_master_proc; /* * NFS server system calls */ static void nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) { struct nfsrv_descript nd; struct nfsrvcache *rp = NULL; int cacherep, credflavor; memset(&nd, 0, sizeof(nd)); if (rqst->rq_vers == NFS_VER2) { if (rqst->rq_proc > NFSV2PROC_STATFS) { svcerr_noproc(rqst); svc_freereq(rqst); goto out; } nd.nd_procnum = newnfs_nfsv3_procid[rqst->rq_proc]; nd.nd_flag = ND_NFSV2; } else if (rqst->rq_vers == NFS_VER3) { if (rqst->rq_proc >= NFS_V3NPROCS) { svcerr_noproc(rqst); svc_freereq(rqst); goto out; } nd.nd_procnum = rqst->rq_proc; nd.nd_flag = ND_NFSV3; } else { if (rqst->rq_proc != NFSPROC_NULL && rqst->rq_proc != NFSV4PROC_COMPOUND) { svcerr_noproc(rqst); svc_freereq(rqst); goto out; } nd.nd_procnum = rqst->rq_proc; nd.nd_flag = ND_NFSV4; } /* * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 - * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP * mounts. */ nd.nd_mrep = rqst->rq_args; rqst->rq_args = NULL; newnfs_realign(&nd.nd_mrep, M_WAITOK); nd.nd_md = nd.nd_mrep; nd.nd_dpos = mtod(nd.nd_md, caddr_t); nd.nd_nam = svc_getrpccaller(rqst); nd.nd_nam2 = rqst->rq_addr; nd.nd_mreq = NULL; nd.nd_cred = NULL; if (nfs_privport && (nd.nd_flag & ND_NFSV4) == 0) { /* Check if source port is privileged */ u_short port; struct sockaddr *nam = nd.nd_nam; struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; /* * INET/INET6 - same code: * sin_port and sin6_port are at same offset */ port = ntohs(sin->sin_port); if (port >= IPPORT_RESERVED && nd.nd_procnum != NFSPROC_NULL) { #ifdef INET6 char b6[INET6_ADDRSTRLEN]; #if defined(KLD_MODULE) /* Do not use ip6_sprintf: the nfs module should work without INET6. */ #define ip6_sprintf(buf, a) \ (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \ (a)->s6_addr16[0], (a)->s6_addr16[1], \ (a)->s6_addr16[2], (a)->s6_addr16[3], \ (a)->s6_addr16[4], (a)->s6_addr16[5], \ (a)->s6_addr16[6], (a)->s6_addr16[7]), \ (buf)) #endif #endif printf("NFS request from unprivileged port (%s:%d)\n", #ifdef INET6 sin->sin_family == AF_INET6 ? ip6_sprintf(b6, &satosin6(sin)->sin6_addr) : #if defined(KLD_MODULE) #undef ip6_sprintf #endif #endif inet_ntoa(sin->sin_addr), port); svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } } if (nd.nd_procnum != NFSPROC_NULL) { if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) { svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } /* Set the flag based on credflavor */ if (credflavor == RPCSEC_GSS_KRB5) { nd.nd_flag |= ND_GSS; } else if (credflavor == RPCSEC_GSS_KRB5I) { nd.nd_flag |= (ND_GSS | ND_GSSINTEGRITY); } else if (credflavor == RPCSEC_GSS_KRB5P) { nd.nd_flag |= (ND_GSS | ND_GSSPRIVACY); } else if (credflavor != AUTH_SYS) { svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } #ifdef MAC mac_cred_associate_nfsd(nd.nd_cred); #endif /* * Get a refcnt (shared lock) on nfsd_suspend_lock. * NFSSVC_SUSPENDNFSD will take an exclusive lock on * nfsd_suspend_lock to suspend these threads. * This must be done here, before the check of * nfsv4root exports by nfsvno_v4rootexport(). */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsd_suspend_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); if ((nd.nd_flag & ND_NFSV4) != 0) { nd.nd_repstat = nfsvno_v4rootexport(&nd); if (nd.nd_repstat != 0) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsd_suspend_lock); NFSUNLOCKV4ROOTMUTEX(); svcerr_weakauth(rqst); svc_freereq(rqst); m_freem(nd.nd_mrep); goto out; } } cacherep = nfs_proc(&nd, rqst->rq_xid, xprt, &rp); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsd_suspend_lock); NFSUNLOCKV4ROOTMUTEX(); } else { NFSMGET(nd.nd_mreq); nd.nd_mreq->m_len = 0; cacherep = RC_REPLY; } if (nd.nd_mrep != NULL) m_freem(nd.nd_mrep); if (nd.nd_cred != NULL) crfree(nd.nd_cred); if (cacherep == RC_DROPIT) { if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); svc_freereq(rqst); goto out; } if (nd.nd_mreq == NULL) { svcerr_decode(rqst); svc_freereq(rqst); goto out; } if (nd.nd_repstat & NFSERR_AUTHERR) { svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) { svcerr_systemerr(rqst); } if (rp != NULL) { nfsrvd_sentcache(rp, (rqst->rq_reply_seq != 0 || SVC_ACK(xprt, NULL)), rqst->rq_reply_seq); } svc_freereq(rqst); out: NFSEXITCODE(0); } /* * Check the cache and, optionally, do the RPC. * Return the appropriate cache response. */ static int nfs_proc(struct nfsrv_descript *nd, u_int32_t xid, SVCXPRT *xprt, struct nfsrvcache **rpp) { struct thread *td = curthread; int cacherep = RC_DOIT, isdgram; uint32_t ack; *rpp = NULL; if (nd->nd_nam2 == NULL) { nd->nd_flag |= ND_STREAMSOCK; isdgram = 0; } else { isdgram = 1; } /* * Two cases: * 1 - For NFSv2 over UDP, if we are near our malloc/mget * limit, just drop the request. There is no * NFSERR_RESOURCE or NFSERR_DELAY for NFSv2 and the * client will timeout/retry over UDP in a little while. * 2 - nd_repstat == 0 && nd_mreq == NULL, which * means a normal nfs rpc, so check the cache */ if ((nd->nd_flag & ND_NFSV2) && nd->nd_nam2 != NULL && nfsrv_mallocmget_limit()) { cacherep = RC_DROPIT; } else { /* * For NFSv3, play it safe and assume that the client is * doing retries on the same TCP connection. */ if ((nd->nd_flag & (ND_NFSV4 | ND_STREAMSOCK)) == ND_STREAMSOCK) nd->nd_flag |= ND_SAMETCPCONN; nd->nd_retxid = xid; nd->nd_tcpconntime = NFSD_MONOSEC; nd->nd_sockref = xprt->xp_sockref; cacherep = nfsrvd_getcache(nd); ack = 0; SVC_ACK(xprt, &ack); nfsrc_trimcache(xprt->xp_sockref, ack, 0); } /* * Handle the request. There are three cases. * RC_DOIT - do the RPC * RC_REPLY - return the reply already created * RC_DROPIT - just throw the request away */ if (cacherep == RC_DOIT) { nfsrvd_dorpc(nd, isdgram, td); if (nd->nd_repstat == NFSERR_DONTREPLY) cacherep = RC_DROPIT; else cacherep = RC_REPLY; *rpp = nfsrvd_updatecache(nd); } NFSEXITCODE2(0, nd); return (cacherep); } static void nfssvc_loss(SVCXPRT *xprt) { uint32_t ack; ack = 0; SVC_ACK(xprt, &ack); nfsrc_trimcache(xprt->xp_sockref, ack, 1); } /* * Adds a socket to the list for servicing by nfsds. */ int nfsrvd_addsock(struct file *fp) { int siz; struct socket *so; int error = 0; SVCXPRT *xprt; static u_int64_t sockref = 0; so = fp->f_data; siz = sb_max_adj; error = soreserve(so, siz, siz); if (error) goto out; /* * Steal the socket from userland so that it doesn't close * unexpectedly. */ if (so->so_type == SOCK_DGRAM) xprt = svc_dg_create(nfsrvd_pool, so, 0, 0); else xprt = svc_vc_create(nfsrvd_pool, so, 0, 0); if (xprt) { fp->f_ops = &badfileops; fp->f_data = NULL; xprt->xp_sockref = ++sockref; if (nfs_minvers == NFS_VER2) svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); if (nfs_minvers <= NFS_VER3 && nfs_maxvers >= NFS_VER3) svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); if (nfs_maxvers >= NFS_VER4) svc_reg(xprt, NFS_PROG, NFS_VER4, nfssvc_program, NULL); if (so->so_type == SOCK_STREAM) svc_loss_reg(xprt, nfssvc_loss); SVC_RELEASE(xprt); } out: NFSEXITCODE(error); return (error); } /* * Called by nfssvc() for nfsds. Just loops around servicing rpc requests * until it is killed by a signal. */ int nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) { char principal[MAXHOSTNAMELEN + 5]; int error = 0; bool_t ret2, ret3, ret4; error = copyinstr(args->principal, principal, sizeof (principal), NULL); if (error) goto out; /* * Only the first nfsd actually does any work. The RPC code * adds threads to it as needed. Any extra processes offered * by nfsd just exit. If nfsd is new enough, it will call us * once with a structure that specifies how many threads to * use. */ NFSD_LOCK(); if (newnfs_numnfsd == 0) { newnfs_numnfsd++; NFSD_UNLOCK(); /* An empty string implies AUTH_SYS only. */ if (principal[0] != '\0') { ret2 = rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); ret3 = rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); ret4 = rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER4); if (!ret2 || !ret3 || !ret4) printf("nfsd: can't register svc name\n"); } nfsrvd_pool->sp_minthreads = args->minthreads; nfsrvd_pool->sp_maxthreads = args->maxthreads; svc_run(nfsrvd_pool); if (principal[0] != '\0') { rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER4); } NFSD_LOCK(); newnfs_numnfsd--; nfsrvd_init(1); } NFSD_UNLOCK(); out: NFSEXITCODE(error); return (error); } /* * Initialize the data structures for the server. * Handshake with any new nfsds starting up to avoid any chance of * corruption. */ void nfsrvd_init(int terminating) { NFSD_LOCK_ASSERT(); if (terminating) { nfsd_master_proc = NULL; NFSD_UNLOCK(); svcpool_destroy(nfsrvd_pool); nfsrvd_pool = NULL; NFSD_LOCK(); } NFSD_UNLOCK(); nfsrvd_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsd)); nfsrvd_pool->sp_rcache = NULL; nfsrvd_pool->sp_assign = fhanew_assign; nfsrvd_pool->sp_done = fha_nd_complete; NFSD_LOCK(); } Index: stable/9/sys/fs/ntfs =================================================================== --- stable/9/sys/fs/ntfs (revision 273911) +++ stable/9/sys/fs/ntfs (revision 273912) Property changes on: stable/9/sys/fs/ntfs ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/fs/ntfs:r263710,273377-273378,273423,273455 Index: stable/9/sys/fs =================================================================== --- stable/9/sys/fs (revision 273911) +++ stable/9/sys/fs (revision 273912) Property changes on: stable/9/sys/fs ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/fs:r263710,273377-273378,273423,273455 Index: stable/9/sys/geom/geom_kern.c =================================================================== --- stable/9/sys/geom/geom_kern.c (revision 273911) +++ stable/9/sys/geom/geom_kern.c (revision 273912) @@ -1,224 +1,224 @@ /*- * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_GEOM, "GEOM", "Geom data structures"); struct sx topology_lock; static struct proc *g_proc; static struct thread *g_up_td; static struct thread *g_down_td; static struct thread *g_event_td; int g_debugflags; int g_collectstats = 1; int g_shutdown; /* * G_UP and G_DOWN are the two threads which push I/O through the * stack. * * Things are procesed in a FIFO order, but these threads could be * part of I/O prioritization by deciding which bios/bioqs to service * in what order. * * We have only one thread in each direction, it is belived that until * a very non-trivial workload in the UP/DOWN path this will be enough, * but more than one can actually be run without problems. * * Holding the "mymutex" is a debugging feature: It prevents people * from sleeping in the UP/DOWN I/O path by mistake or design (doing * so almost invariably result in deadlocks since it stalls all I/O * processing in the given direction. */ static void g_up_procbody(void *arg) { mtx_assert(&Giant, MA_NOTOWNED); thread_lock(g_up_td); sched_prio(g_up_td, PRIBIO); thread_unlock(g_up_td); for(;;) { g_io_schedule_up(g_up_td); } } static void g_down_procbody(void *arg) { mtx_assert(&Giant, MA_NOTOWNED); thread_lock(g_down_td); sched_prio(g_down_td, PRIBIO); thread_unlock(g_down_td); for(;;) { g_io_schedule_down(g_down_td); } } static void g_event_procbody(void *arg) { mtx_assert(&Giant, MA_NOTOWNED); thread_lock(g_event_td); sched_prio(g_event_td, PRIBIO); thread_unlock(g_event_td); g_run_events(); /* NOTREACHED */ } static void geom_shutdown(void *foo __unused) { g_shutdown = 1; } void g_init(void) { g_trace(G_T_TOPOLOGY, "g_ignition"); sx_init(&topology_lock, "GEOM topology"); g_io_init(); g_event_init(); g_ctl_init(); mtx_lock(&Giant); kproc_kthread_add(g_event_procbody, NULL, &g_proc, &g_event_td, RFHIGHPID, 0, "geom", "g_event"); kproc_kthread_add(g_up_procbody, NULL, &g_proc, &g_up_td, RFHIGHPID, 0, "geom", "g_up"); kproc_kthread_add(g_down_procbody, NULL, &g_proc, &g_down_td, RFHIGHPID, 0, "geom", "g_down"); mtx_unlock(&Giant); EVENTHANDLER_REGISTER(shutdown_pre_sync, geom_shutdown, NULL, SHUTDOWN_PRI_FIRST); } static int sysctl_kern_geom_conftxt(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; sb = sbuf_new_auto(); g_waitfor_event(g_conftxt, sb, M_WAITOK, NULL); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return error; } static int sysctl_kern_geom_confdot(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; sb = sbuf_new_auto(); g_waitfor_event(g_confdot, sb, M_WAITOK, NULL); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return error; } static int sysctl_kern_geom_confxml(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; sb = sbuf_new_auto(); g_waitfor_event(g_confxml, sb, M_WAITOK, NULL); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return error; } SYSCTL_NODE(_kern, OID_AUTO, geom, CTLFLAG_RW, 0, "GEOMetry management"); SYSCTL_PROC(_kern_geom, OID_AUTO, confxml, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_kern_geom_confxml, "", "Dump the GEOM config in XML"); SYSCTL_PROC(_kern_geom, OID_AUTO, confdot, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_kern_geom_confdot, "", "Dump the GEOM config in dot"); SYSCTL_PROC(_kern_geom, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RD, 0, 0, sysctl_kern_geom_conftxt, "", "Dump the GEOM config in txt"); TUNABLE_INT("kern.geom.debugflags", &g_debugflags); SYSCTL_INT(_kern_geom, OID_AUTO, debugflags, CTLFLAG_RW, &g_debugflags, 0, "Set various trace levels for GEOM debugging"); SYSCTL_INT(_kern_geom, OID_AUTO, collectstats, CTLFLAG_RW, &g_collectstats, 0, "Control statistics collection on GEOM providers and consumers"); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_class, CTLFLAG_RD, - 0, sizeof(struct g_class), "sizeof(struct g_class)"); + SYSCTL_NULL_INT_PTR, sizeof(struct g_class), "sizeof(struct g_class)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_geom, CTLFLAG_RD, - 0, sizeof(struct g_geom), "sizeof(struct g_geom)"); + SYSCTL_NULL_INT_PTR, sizeof(struct g_geom), "sizeof(struct g_geom)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_provider, CTLFLAG_RD, - 0, sizeof(struct g_provider), "sizeof(struct g_provider)"); + SYSCTL_NULL_INT_PTR, sizeof(struct g_provider), "sizeof(struct g_provider)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_consumer, CTLFLAG_RD, - 0, sizeof(struct g_consumer), "sizeof(struct g_consumer)"); + SYSCTL_NULL_INT_PTR, sizeof(struct g_consumer), "sizeof(struct g_consumer)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_bioq, CTLFLAG_RD, - 0, sizeof(struct g_bioq), "sizeof(struct g_bioq)"); + SYSCTL_NULL_INT_PTR, sizeof(struct g_bioq), "sizeof(struct g_bioq)"); Index: stable/9/sys/kern/kern_cpuset.c =================================================================== --- stable/9/sys/kern/kern_cpuset.c (revision 273911) +++ stable/9/sys/kern/kern_cpuset.c (revision 273912) @@ -1,1185 +1,1185 @@ /*- * Copyright (c) 2008, Jeffrey Roberson * All rights reserved. * * Copyright (c) 2008 Nokia Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* DDB */ /* * cpusets provide a mechanism for creating and manipulating sets of * processors for the purpose of constraining the scheduling of threads to * specific processors. * * Each process belongs to an identified set, by default this is set 1. Each * thread may further restrict the cpus it may run on to a subset of this * named set. This creates an anonymous set which other threads and processes * may not join by number. * * The named set is referred to herein as the 'base' set to avoid ambiguity. * This set is usually a child of a 'root' set while the anonymous set may * simply be referred to as a mask. In the syscall api these are referred to * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. * * Threads inherit their set from their creator whether it be anonymous or * not. This means that anonymous sets are immutable because they may be * shared. To modify an anonymous set a new set is created with the desired * mask and the same parent as the existing anonymous set. This gives the * illusion of each thread having a private mask. * * Via the syscall apis a user may ask to retrieve or modify the root, base, * or mask that is discovered via a pid, tid, or setid. Modifying a set * modifies all numbered and anonymous child sets to comply with the new mask. * Modifying a pid or tid's mask applies only to that tid but must still * exist within the assigned parent set. * * A thread may not be assigned to a group separate from other threads in * the process. This is to remove ambiguity when the setid is queried with * a pid argument. There is no other technical limitation. * * This somewhat complex arrangement is intended to make it easy for * applications to query available processors and bind their threads to * specific processors while also allowing administrators to dynamically * reprovision by changing sets which apply to groups of processes. * * A simple application should not concern itself with sets at all and * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id * meaning 'curthread'. It may query available cpus for that tid with a * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). */ static uma_zone_t cpuset_zone; static struct mtx cpuset_lock; static struct setlist cpuset_ids; static struct unrhdr *cpuset_unr; static struct cpuset *cpuset_zero; /* Return the size of cpuset_t at the kernel level */ SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD, - 0, sizeof(cpuset_t), "sizeof(cpuset_t)"); + SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)"); cpuset_t *cpuset_root; /* * Acquire a reference to a cpuset, all pointers must be tracked with refs. */ struct cpuset * cpuset_ref(struct cpuset *set) { refcount_acquire(&set->cs_ref); return (set); } /* * Walks up the tree from 'set' to find the root. Returns the root * referenced. */ static struct cpuset * cpuset_refroot(struct cpuset *set) { for (; set->cs_parent != NULL; set = set->cs_parent) if (set->cs_flags & CPU_SET_ROOT) break; cpuset_ref(set); return (set); } /* * Find the first non-anonymous set starting from 'set'. Returns this set * referenced. May return the passed in set with an extra ref if it is * not anonymous. */ static struct cpuset * cpuset_refbase(struct cpuset *set) { if (set->cs_id == CPUSET_INVALID) set = set->cs_parent; cpuset_ref(set); return (set); } /* * Release a reference in a context where it is safe to allocate. */ void cpuset_rel(struct cpuset *set) { cpusetid_t id; if (refcount_release(&set->cs_ref) == 0) return; mtx_lock_spin(&cpuset_lock); LIST_REMOVE(set, cs_siblings); id = set->cs_id; if (id != CPUSET_INVALID) LIST_REMOVE(set, cs_link); mtx_unlock_spin(&cpuset_lock); cpuset_rel(set->cs_parent); uma_zfree(cpuset_zone, set); if (id != CPUSET_INVALID) free_unr(cpuset_unr, id); } /* * Deferred release must be used when in a context that is not safe to * allocate/free. This places any unreferenced sets on the list 'head'. */ static void cpuset_rel_defer(struct setlist *head, struct cpuset *set) { if (refcount_release(&set->cs_ref) == 0) return; mtx_lock_spin(&cpuset_lock); LIST_REMOVE(set, cs_siblings); if (set->cs_id != CPUSET_INVALID) LIST_REMOVE(set, cs_link); LIST_INSERT_HEAD(head, set, cs_link); mtx_unlock_spin(&cpuset_lock); } /* * Complete a deferred release. Removes the set from the list provided to * cpuset_rel_defer. */ static void cpuset_rel_complete(struct cpuset *set) { LIST_REMOVE(set, cs_link); cpuset_rel(set->cs_parent); uma_zfree(cpuset_zone, set); } /* * Find a set based on an id. Returns it with a ref. */ static struct cpuset * cpuset_lookup(cpusetid_t setid, struct thread *td) { struct cpuset *set; if (setid == CPUSET_INVALID) return (NULL); mtx_lock_spin(&cpuset_lock); LIST_FOREACH(set, &cpuset_ids, cs_link) if (set->cs_id == setid) break; if (set) cpuset_ref(set); mtx_unlock_spin(&cpuset_lock); KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); if (set != NULL && jailed(td->td_ucred)) { struct cpuset *jset, *tset; jset = td->td_ucred->cr_prison->pr_cpuset; for (tset = set; tset != NULL; tset = tset->cs_parent) if (tset == jset) break; if (tset == NULL) { cpuset_rel(set); set = NULL; } } return (set); } /* * Create a set in the space provided in 'set' with the provided parameters. * The set is returned with a single ref. May return EDEADLK if the set * will have no valid cpu based on restrictions from the parent. */ static int _cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask, cpusetid_t id) { if (!CPU_OVERLAP(&parent->cs_mask, mask)) return (EDEADLK); CPU_COPY(mask, &set->cs_mask); LIST_INIT(&set->cs_children); refcount_init(&set->cs_ref, 1); set->cs_flags = 0; mtx_lock_spin(&cpuset_lock); CPU_AND(&set->cs_mask, &parent->cs_mask); set->cs_id = id; set->cs_parent = cpuset_ref(parent); LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); if (set->cs_id != CPUSET_INVALID) LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); mtx_unlock_spin(&cpuset_lock); return (0); } /* * Create a new non-anonymous set with the requested parent and mask. May * return failures if the mask is invalid or a new number can not be * allocated. */ static int cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask) { struct cpuset *set; cpusetid_t id; int error; id = alloc_unr(cpuset_unr); if (id == -1) return (ENFILE); *setp = set = uma_zalloc(cpuset_zone, M_WAITOK); error = _cpuset_create(set, parent, mask, id); if (error == 0) return (0); free_unr(cpuset_unr, id); uma_zfree(cpuset_zone, set); return (error); } /* * Recursively check for errors that would occur from applying mask to * the tree of sets starting at 'set'. Checks for sets that would become * empty as well as RDONLY flags. */ static int cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask) { struct cpuset *nset; cpuset_t newmask; int error; mtx_assert(&cpuset_lock, MA_OWNED); if (set->cs_flags & CPU_SET_RDONLY) return (EPERM); if (check_mask) { if (!CPU_OVERLAP(&set->cs_mask, mask)) return (EDEADLK); CPU_COPY(&set->cs_mask, &newmask); CPU_AND(&newmask, mask); } else CPU_COPY(mask, &newmask); error = 0; LIST_FOREACH(nset, &set->cs_children, cs_siblings) if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0) break; return (error); } /* * Applies the mask 'mask' without checking for empty sets or permissions. */ static void cpuset_update(struct cpuset *set, cpuset_t *mask) { struct cpuset *nset; mtx_assert(&cpuset_lock, MA_OWNED); CPU_AND(&set->cs_mask, mask); LIST_FOREACH(nset, &set->cs_children, cs_siblings) cpuset_update(nset, &set->cs_mask); return; } /* * Modify the set 'set' to use a copy of the mask provided. Apply this new * mask to restrict all children in the tree. Checks for validity before * applying the changes. */ static int cpuset_modify(struct cpuset *set, cpuset_t *mask) { struct cpuset *root; int error; error = priv_check(curthread, PRIV_SCHED_CPUSET); if (error) return (error); /* * In case we are called from within the jail * we do not allow modifying the dedicated root * cpuset of the jail but may still allow to * change child sets. */ if (jailed(curthread->td_ucred) && set->cs_flags & CPU_SET_ROOT) return (EPERM); /* * Verify that we have access to this set of * cpus. */ root = set->cs_parent; if (root && !CPU_SUBSET(&root->cs_mask, mask)) return (EINVAL); mtx_lock_spin(&cpuset_lock); error = cpuset_testupdate(set, mask, 0); if (error) goto out; CPU_COPY(mask, &set->cs_mask); cpuset_update(set, mask); out: mtx_unlock_spin(&cpuset_lock); return (error); } /* * Resolve the 'which' parameter of several cpuset apis. * * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also * checks for permission via p_cansched(). * * For WHICH_SET returns a valid set with a new reference. * * -1 may be supplied for any argument to mean the current proc/thread or * the base set of the current thread. May fail with ESRCH/EPERM. */ static int cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, struct cpuset **setp) { struct cpuset *set; struct thread *td; struct proc *p; int error; *pp = p = NULL; *tdp = td = NULL; *setp = set = NULL; switch (which) { case CPU_WHICH_PID: if (id == -1) { PROC_LOCK(curproc); p = curproc; break; } if ((p = pfind(id)) == NULL) return (ESRCH); break; case CPU_WHICH_TID: if (id == -1) { PROC_LOCK(curproc); p = curproc; td = curthread; break; } td = tdfind(id, -1); if (td == NULL) return (ESRCH); p = td->td_proc; break; case CPU_WHICH_CPUSET: if (id == -1) { thread_lock(curthread); set = cpuset_refbase(curthread->td_cpuset); thread_unlock(curthread); } else set = cpuset_lookup(id, curthread); if (set) { *setp = set; return (0); } return (ESRCH); case CPU_WHICH_JAIL: { /* Find `set' for prison with given id. */ struct prison *pr; sx_slock(&allprison_lock); pr = prison_find_child(curthread->td_ucred->cr_prison, id); sx_sunlock(&allprison_lock); if (pr == NULL) return (ESRCH); cpuset_ref(pr->pr_cpuset); *setp = pr->pr_cpuset; mtx_unlock(&pr->pr_mtx); return (0); } case CPU_WHICH_IRQ: return (0); default: return (EINVAL); } error = p_cansched(curthread, p); if (error) { PROC_UNLOCK(p); return (error); } if (td == NULL) td = FIRST_THREAD_IN_PROC(p); *pp = p; *tdp = td; return (0); } /* * Create an anonymous set with the provided mask in the space provided by * 'fset'. If the passed in set is anonymous we use its parent otherwise * the new set is a child of 'set'. */ static int cpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask) { struct cpuset *parent; if (set->cs_id == CPUSET_INVALID) parent = set->cs_parent; else parent = set; if (!CPU_SUBSET(&parent->cs_mask, mask)) return (EDEADLK); return (_cpuset_create(fset, parent, mask, CPUSET_INVALID)); } /* * Handle two cases for replacing the base set or mask of an entire process. * * 1) Set is non-null and mask is null. This reparents all anonymous sets * to the provided set and replaces all non-anonymous td_cpusets with the * provided set. * 2) Mask is non-null and set is null. This replaces or creates anonymous * sets for every thread with the existing base as a parent. * * This is overly complicated because we can't allocate while holding a * spinlock and spinlocks must be held while changing and examining thread * state. */ static int cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask) { struct setlist freelist; struct setlist droplist; struct cpuset *tdset; struct cpuset *nset; struct thread *td; struct proc *p; int threads; int nfree; int error; /* * The algorithm requires two passes due to locking considerations. * * 1) Lookup the process and acquire the locks in the required order. * 2) If enough cpusets have not been allocated release the locks and * allocate them. Loop. */ LIST_INIT(&freelist); LIST_INIT(&droplist); nfree = 0; for (;;) { error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); if (error) goto out; if (nfree >= p->p_numthreads) break; threads = p->p_numthreads; PROC_UNLOCK(p); for (; nfree < threads; nfree++) { nset = uma_zalloc(cpuset_zone, M_WAITOK); LIST_INSERT_HEAD(&freelist, nset, cs_link); } } PROC_LOCK_ASSERT(p, MA_OWNED); /* * Now that the appropriate locks are held and we have enough cpusets, * make sure the operation will succeed before applying changes. The * proc lock prevents td_cpuset from changing between calls. */ error = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); tdset = td->td_cpuset; /* * Verify that a new mask doesn't specify cpus outside of * the set the thread is a member of. */ if (mask) { if (tdset->cs_id == CPUSET_INVALID) tdset = tdset->cs_parent; if (!CPU_SUBSET(&tdset->cs_mask, mask)) error = EDEADLK; /* * Verify that a new set won't leave an existing thread * mask without a cpu to run on. It can, however, restrict * the set. */ } else if (tdset->cs_id == CPUSET_INVALID) { if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask)) error = EDEADLK; } thread_unlock(td); if (error) goto unlock_out; } /* * Replace each thread's cpuset while using deferred release. We * must do this because the thread lock must be held while operating * on the thread and this limits the type of operations allowed. */ FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); /* * If we presently have an anonymous set or are applying a * mask we must create an anonymous shadow set. That is * either parented to our existing base or the supplied set. * * If we have a base set with no anonymous shadow we simply * replace it outright. */ tdset = td->td_cpuset; if (tdset->cs_id == CPUSET_INVALID || mask) { nset = LIST_FIRST(&freelist); LIST_REMOVE(nset, cs_link); if (mask) error = cpuset_shadow(tdset, nset, mask); else error = _cpuset_create(nset, set, &tdset->cs_mask, CPUSET_INVALID); if (error) { LIST_INSERT_HEAD(&freelist, nset, cs_link); thread_unlock(td); break; } } else nset = cpuset_ref(set); cpuset_rel_defer(&droplist, tdset); td->td_cpuset = nset; sched_affinity(td); thread_unlock(td); } unlock_out: PROC_UNLOCK(p); out: while ((nset = LIST_FIRST(&droplist)) != NULL) cpuset_rel_complete(nset); while ((nset = LIST_FIRST(&freelist)) != NULL) { LIST_REMOVE(nset, cs_link); uma_zfree(cpuset_zone, nset); } return (error); } /* * Calculate the ffs() of the cpuset. */ int cpusetobj_ffs(const cpuset_t *set) { size_t i; int cbit; cbit = 0; for (i = 0; i < _NCPUWORDS; i++) { if (set->__bits[i] != 0) { cbit = ffsl(set->__bits[i]); cbit += i * _NCPUBITS; break; } } return (cbit); } /* * Return a string representing a valid layout for a cpuset_t object. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. */ char * cpusetobj_strprint(char *buf, const cpuset_t *set) { char *tbuf; size_t i, bytesp, bufsiz; tbuf = buf; bytesp = 0; bufsiz = CPUSETBUFSIZ; for (i = _NCPUWORDS - 1; i > 0; i--) { bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]); bufsiz -= bytesp; tbuf += bytesp; } snprintf(tbuf, bufsiz, "%lx", set->__bits[0]); return (buf); } /* * Build a valid cpuset_t object from a string representation. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. */ int cpusetobj_strscan(cpuset_t *set, const char *buf) { u_int nwords; int i, ret; if (strlen(buf) > CPUSETBUFSIZ - 1) return (-1); /* Allow to pass a shorter version of the mask when necessary. */ nwords = 1; for (i = 0; buf[i] != '\0'; i++) if (buf[i] == ',') nwords++; if (nwords > _NCPUWORDS) return (-1); CPU_ZERO(set); for (i = nwords - 1; i > 0; i--) { ret = sscanf(buf, "%lx, ", &set->__bits[i]); if (ret == 0 || ret == -1) return (-1); buf = strstr(buf, " "); if (buf == NULL) return (-1); buf++; } ret = sscanf(buf, "%lx", &set->__bits[0]); if (ret == 0 || ret == -1) return (-1); return (0); } /* * Apply an anonymous mask to a single thread. */ int cpuset_setthread(lwpid_t id, cpuset_t *mask) { struct cpuset *nset; struct cpuset *set; struct thread *td; struct proc *p; int error; nset = uma_zalloc(cpuset_zone, M_WAITOK); error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); if (error) goto out; set = NULL; thread_lock(td); error = cpuset_shadow(td->td_cpuset, nset, mask); if (error == 0) { set = td->td_cpuset; td->td_cpuset = nset; sched_affinity(td); nset = NULL; } thread_unlock(td); PROC_UNLOCK(p); if (set) cpuset_rel(set); out: if (nset) uma_zfree(cpuset_zone, nset); return (error); } /* * Creates the cpuset for thread0. We make two sets: * * 0 - The root set which should represent all valid processors in the * system. It is initially created with a mask of all processors * because we don't know what processors are valid until cpuset_init() * runs. This set is immutable. * 1 - The default set which all processes are a member of until changed. * This allows an administrator to move all threads off of given cpus to * dedicate them to high priority tasks or save power etc. */ struct cpuset * cpuset_thread0(void) { struct cpuset *set; int error; cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); /* * Create the root system set for the whole machine. Doesn't use * cpuset_create() due to NULL parent. */ set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); CPU_FILL(&set->cs_mask); LIST_INIT(&set->cs_children); LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); set->cs_ref = 1; set->cs_flags = CPU_SET_ROOT; cpuset_zero = set; cpuset_root = &set->cs_mask; /* * Now derive a default, modifiable set from that to give out. */ set = uma_zalloc(cpuset_zone, M_WAITOK); error = _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask, 1); KASSERT(error == 0, ("Error creating default set: %d\n", error)); /* * Initialize the unit allocator. 0 and 1 are allocated above. */ cpuset_unr = new_unrhdr(2, INT_MAX, NULL); return (set); } /* * Create a cpuset, which would be cpuset_create() but * mark the new 'set' as root. * * We are not going to reparent the td to it. Use cpuset_setproc_update_set() * for that. * * In case of no error, returns the set in *setp locked with a reference. */ int cpuset_create_root(struct prison *pr, struct cpuset **setp) { struct cpuset *set; int error; KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__)); KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask); if (error) return (error); KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data", __func__, __LINE__)); /* Mark the set as root. */ set = *setp; set->cs_flags |= CPU_SET_ROOT; return (0); } int cpuset_setproc_update_set(struct proc *p, struct cpuset *set) { int error; KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); cpuset_ref(set); error = cpuset_setproc(p->p_pid, set, NULL); if (error) return (error); cpuset_rel(set); return (0); } /* * This is called once the final set of system cpus is known. Modifies * the root set and all children and mark the root read-only. */ static void cpuset_init(void *arg) { cpuset_t mask; mask = all_cpus; if (cpuset_modify(cpuset_zero, &mask)) panic("Can't set initial cpuset mask.\n"); cpuset_zero->cs_flags |= CPU_SET_RDONLY; } SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL); #ifndef _SYS_SYSPROTO_H_ struct cpuset_args { cpusetid_t *setid; }; #endif int sys_cpuset(struct thread *td, struct cpuset_args *uap) { struct cpuset *root; struct cpuset *set; int error; thread_lock(td); root = cpuset_refroot(td->td_cpuset); thread_unlock(td); error = cpuset_create(&set, root, &root->cs_mask); cpuset_rel(root); if (error) return (error); error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); if (error == 0) error = cpuset_setproc(-1, set, NULL); cpuset_rel(set); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setid_args { cpuwhich_t which; id_t id; cpusetid_t setid; }; #endif int sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap) { struct cpuset *set; int error; /* * Presently we only support per-process sets. */ if (uap->which != CPU_WHICH_PID) return (EINVAL); set = cpuset_lookup(uap->setid, td); if (set == NULL) return (ESRCH); error = cpuset_setproc(uap->id, set, NULL); cpuset_rel(set); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getid_args { cpulevel_t level; cpuwhich_t which; id_t id; cpusetid_t *setid; #endif int sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap) { struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; cpusetid_t id; int error; if (uap->level == CPU_LEVEL_WHICH && uap->which != CPU_WHICH_CPUSET) return (EINVAL); error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); if (error) return (error); switch (uap->which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_refbase(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: return (EINVAL); } switch (uap->level) { case CPU_LEVEL_ROOT: nset = cpuset_refroot(set); cpuset_rel(set); set = nset; break; case CPU_LEVEL_CPUSET: break; case CPU_LEVEL_WHICH: break; } id = set->cs_id; cpuset_rel(set); if (error == 0) error = copyout(&id, uap->setid, sizeof(id)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getaffinity_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t cpusetsize; cpuset_t *mask; }; #endif int sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) { struct thread *ttd; struct cpuset *nset; struct cpuset *set; struct proc *p; cpuset_t *mask; int error; size_t size; if (uap->cpusetsize < sizeof(cpuset_t) || uap->cpusetsize > CPU_MAXSIZE / NBBY) return (ERANGE); size = uap->cpusetsize; mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO); error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); if (error) goto out; switch (uap->level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: switch (uap->which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: error = EINVAL; goto out; } if (uap->level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); CPU_COPY(&nset->cs_mask, mask); cpuset_rel(nset); break; case CPU_LEVEL_WHICH: switch (uap->which) { case CPU_WHICH_TID: thread_lock(ttd); CPU_COPY(&ttd->td_cpuset->cs_mask, mask); thread_unlock(ttd); break; case CPU_WHICH_PID: FOREACH_THREAD_IN_PROC(p, ttd) { thread_lock(ttd); CPU_OR(mask, &ttd->td_cpuset->cs_mask); thread_unlock(ttd); } break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: CPU_COPY(&set->cs_mask, mask); break; case CPU_WHICH_IRQ: error = intr_getaffinity(uap->id, mask); break; } break; default: error = EINVAL; break; } if (set) cpuset_rel(set); if (p) PROC_UNLOCK(p); if (error == 0) error = copyout(mask, uap->mask, size); out: free(mask, M_TEMP); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setaffinity_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t cpusetsize; const cpuset_t *mask; }; #endif int sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) { struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; cpuset_t *mask; int error; if (uap->cpusetsize < sizeof(cpuset_t) || uap->cpusetsize > CPU_MAXSIZE / NBBY) return (ERANGE); mask = malloc(uap->cpusetsize, M_TEMP, M_WAITOK | M_ZERO); error = copyin(uap->mask, mask, uap->cpusetsize); if (error) goto out; /* * Verify that no high bits are set. */ if (uap->cpusetsize > sizeof(cpuset_t)) { char *end; char *cp; end = cp = (char *)&mask->__bits; end += uap->cpusetsize; cp += sizeof(cpuset_t); while (cp != end) if (*cp++ != 0) { error = EINVAL; goto out; } } switch (uap->level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); if (error) break; switch (uap->which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: error = EINVAL; goto out; } if (uap->level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); error = cpuset_modify(nset, mask); cpuset_rel(nset); cpuset_rel(set); break; case CPU_LEVEL_WHICH: switch (uap->which) { case CPU_WHICH_TID: error = cpuset_setthread(uap->id, mask); break; case CPU_WHICH_PID: error = cpuset_setproc(uap->id, NULL, mask); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); if (error == 0) { error = cpuset_modify(set, mask); cpuset_rel(set); } break; case CPU_WHICH_IRQ: error = intr_setaffinity(uap->id, mask); break; default: error = EINVAL; break; } break; default: error = EINVAL; break; } out: free(mask, M_TEMP); return (error); } #ifdef DDB void ddb_display_cpuset(const cpuset_t *set) { int cpu, once; for (once = 0, cpu = 0; cpu < CPU_SETSIZE; cpu++) { if (CPU_ISSET(cpu, set)) { if (once == 0) { db_printf("%d", cpu); once = 1; } else db_printf(",%d", cpu); } } if (once == 0) db_printf(""); } DB_SHOW_COMMAND(cpusets, db_show_cpusets) { struct cpuset *set; LIST_FOREACH(set, &cpuset_ids, cs_link) { db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", set, set->cs_id, set->cs_ref, set->cs_flags, (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); db_printf(" mask="); ddb_display_cpuset(&set->cs_mask); db_printf("\n"); if (db_pager_quit) break; } } #endif /* DDB */ Index: stable/9/sys/kern/kern_mib.c =================================================================== --- stable/9/sys/kern/kern_mib.c (revision 273911) +++ stable/9/sys/kern/kern_mib.c (revision 273912) @@ -1,556 +1,557 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD * project, to make these variables more userfriendly. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_posix.h" #include "opt_config.h" #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0, "High kernel, proc, limits &c"); SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0, "Virtual memory"); SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW, 0, "File system"); SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW, 0, "Network, (see socket.h)"); SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW, 0, "Debugging"); SYSCTL_NODE(_debug, OID_AUTO, sizeof, CTLFLAG_RW, 0, "Sizeof various things"); SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW, 0, "hardware"); SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0, "machine dependent"); SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0, "user-level"); SYSCTL_NODE(, CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0, "p1003_1b, (see p1003_1b.h)"); SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0, "Compatibility code"); SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0, "Security"); #ifdef REGRESSION SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0, "Regression test MIB"); #endif SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE, kern_ident, 0, "Kernel identifier"); SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE| CTLFLAG_CAPRD, osrelease, 0, "Operating system release"); SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, BSD, "Operating system revision"); + SYSCTL_NULL_INT_PTR, BSD, "Operating system revision"); SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE, version, 0, "Kernel version"); SYSCTL_STRING(_kern, OID_AUTO, compiler_version, CTLFLAG_RD|CTLFLAG_MPSAFE, compiler_version, 0, "Version of compiler used to compile kernel"); SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE| CTLFLAG_CAPRD, ostype, 0, "Operating system type"); /* * NOTICE: The *userland* release date is available in * /usr/include/osreldate.h */ SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD|CTLFLAG_CAPRD, &osreldate, 0, "Kernel release date"); SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN, &maxproc, 0, "Maximum number of processes"); SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, &maxprocperuid, 0, "Maximum processes allowed per userid"); SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RDTUN, &maxusers, 0, "Hint for kernel tuning"); SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, ARG_MAX, "Maximum bytes of argument to execve(2)"); + SYSCTL_NULL_INT_PTR, ARG_MAX, "Maximum bytes of argument to execve(2)"); SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, _POSIX_VERSION, "Version of POSIX attempting to comply to"); + SYSCTL_NULL_INT_PTR, _POSIX_VERSION, "Version of POSIX attempting to comply to"); SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN|CTLFLAG_CAPRD, &ngroups_max, 0, "Maximum number of supplemental groups a user can belong to"); SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, 1, "Whether job control is available"); + SYSCTL_NULL_INT_PTR, 1, "Whether job control is available"); #ifdef _POSIX_SAVED_IDS SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, 1, "Whether saved set-group/user ID is available"); + SYSCTL_NULL_INT_PTR, 1, "Whether saved set-group/user ID is available"); #else SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, 0, "Whether saved set-group/user ID is available"); + SYSCTL_NULL_INT_PTR, 0, "Whether saved set-group/user ID is available"); #endif char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */ SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW, kernelname, sizeof kernelname, "Name of kernel file booted"); SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_ncpus, 0, "Number of active CPUs"); SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, BYTE_ORDER, "System byte order"); + SYSCTL_NULL_INT_PTR, BYTE_ORDER, "System byte order"); SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD|CTLFLAG_CAPRD, - 0, PAGE_SIZE, "System memory page size"); + SYSCTL_NULL_INT_PTR, PAGE_SIZE, "System memory page size"); static int sysctl_kern_arnd(SYSCTL_HANDLER_ARGS) { char buf[256]; size_t len; len = req->oldlen; if (len > sizeof(buf)) len = sizeof(buf); arc4rand(buf, len, 0); return (SYSCTL_OUT(req, buf, len)); } SYSCTL_PROC(_kern, KERN_ARND, arandom, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, NULL, 0, sysctl_kern_arnd, "", "arc4rand"); static int sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { u_long val; val = ctob(physmem); return (sysctl_handle_long(oidp, &val, 0, req)); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "LU", ""); static int sysctl_hw_realmem(SYSCTL_HANDLER_ARGS) { u_long val; val = ctob(realmem); return (sysctl_handle_long(oidp, &val, 0, req)); } SYSCTL_PROC(_hw, HW_REALMEM, realmem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_realmem, "LU", ""); static int sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { u_long val; val = ctob(physmem - cnt.v_wire_count); return (sysctl_handle_long(oidp, &val, 0, req)); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "LU", ""); SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, ""); u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE }; static int sysctl_hw_pagesizes(SYSCTL_HANDLER_ARGS) { int error; #ifdef SCTL_MASK32 int i; uint32_t pagesizes32[MAXPAGESIZES]; if (req->flags & SCTL_MASK32) { /* * Recreate the "pagesizes" array with 32-bit elements. Truncate * any page size greater than UINT32_MAX to zero. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; error = SYSCTL_OUT(req, pagesizes32, sizeof(pagesizes32)); } else #endif error = SYSCTL_OUT(req, pagesizes, sizeof(pagesizes)); return (error); } SYSCTL_PROC(_hw, OID_AUTO, pagesizes, CTLTYPE_ULONG | CTLFLAG_RD, NULL, 0, sysctl_hw_pagesizes, "LU", "Supported page sizes"); #ifdef SCTL_MASK32 int adaptive_machine_arch = 1; SYSCTL_INT(_debug, OID_AUTO, adaptive_machine_arch, CTLFLAG_RW, &adaptive_machine_arch, 1, "Adapt reported machine architecture to the ABI of the binary"); #endif static int sysctl_hw_machine_arch(SYSCTL_HANDLER_ARGS) { int error; static const char machine_arch[] = MACHINE_ARCH; #ifdef SCTL_MASK32 static const char machine_arch32[] = MACHINE_ARCH32; if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) error = SYSCTL_OUT(req, machine_arch32, sizeof(machine_arch32)); else #endif error = SYSCTL_OUT(req, machine_arch, sizeof(machine_arch)); return (error); } SYSCTL_PROC(_hw, HW_MACHINE_ARCH, machine_arch, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_hw_machine_arch, "A", "System architecture"); SYSCTL_STRING(_kern, OID_AUTO, supported_archs, CTLFLAG_RD | CTLFLAG_MPSAFE, #ifdef COMPAT_FREEBSD32 MACHINE_ARCH " " MACHINE_ARCH32, 0, "Supported architectures for binaries"); #else MACHINE_ARCH, 0, "Supported architectures for binaries"); #endif static int sysctl_hostname(SYSCTL_HANDLER_ARGS) { struct prison *pr, *cpr; size_t pr_offset; char tmpname[MAXHOSTNAMELEN]; int descend, error, len; /* * This function can set: hostname domainname hostuuid. * Keep that in mind when comments say "hostname". */ pr_offset = (size_t)arg1; len = arg2; KASSERT(len <= sizeof(tmpname), ("length %d too long for %s", len, __func__)); pr = req->td->td_ucred->cr_prison; if (!(pr->pr_allow & PR_ALLOW_SET_HOSTNAME) && req->newptr) return (EPERM); /* * Make a local copy of hostname to get/set so we don't have to hold * the jail mutex during the sysctl copyin/copyout activities. */ mtx_lock(&pr->pr_mtx); bcopy((char *)pr + pr_offset, tmpname, len); mtx_unlock(&pr->pr_mtx); error = sysctl_handle_string(oidp, tmpname, len, req); if (req->newptr != NULL && error == 0) { /* * Copy the locally set hostname to all jails that share * this host info. */ sx_slock(&allprison_lock); while (!(pr->pr_flags & PR_HOST)) pr = pr->pr_parent; mtx_lock(&pr->pr_mtx); bcopy(tmpname, (char *)pr + pr_offset, len); FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend) if (cpr->pr_flags & PR_HOST) descend = 0; else bcopy(tmpname, (char *)cpr + pr_offset, len); mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); } return (error); } SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, (void *)(offsetof(struct prison, pr_hostname)), MAXHOSTNAMELEN, sysctl_hostname, "A", "Hostname"); SYSCTL_PROC(_kern, KERN_NISDOMAINNAME, domainname, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, (void *)(offsetof(struct prison, pr_domainname)), MAXHOSTNAMELEN, sysctl_hostname, "A", "Name of the current YP/NIS domain"); SYSCTL_PROC(_kern, KERN_HOSTUUID, hostuuid, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, (void *)(offsetof(struct prison, pr_hostuuid)), HOSTUUIDLEN, sysctl_hostname, "A", "Host UUID"); static int regression_securelevel_nonmonotonic = 0; #ifdef REGRESSION SYSCTL_INT(_regression, OID_AUTO, securelevel_nonmonotonic, CTLFLAG_RW, ®ression_securelevel_nonmonotonic, 0, "securelevel may be lowered"); #endif static int sysctl_kern_securelvl(SYSCTL_HANDLER_ARGS) { struct prison *pr, *cpr; int descend, error, level; pr = req->td->td_ucred->cr_prison; /* * Reading the securelevel is easy, since the current jail's level * is known to be at least as secure as any higher levels. Perform * a lockless read since the securelevel is an integer. */ level = pr->pr_securelevel; error = sysctl_handle_int(oidp, &level, 0, req); if (error || !req->newptr) return (error); /* Permit update only if the new securelevel exceeds the old. */ sx_slock(&allprison_lock); mtx_lock(&pr->pr_mtx); if (!regression_securelevel_nonmonotonic && level < pr->pr_securelevel) { mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); return (EPERM); } pr->pr_securelevel = level; /* * Set all child jails to be at least this level, but do not lower * them (even if regression_securelevel_nonmonotonic). */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend) { if (cpr->pr_securelevel < level) cpr->pr_securelevel = level; } mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); return (error); } SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_kern_securelvl, "I", "Current secure level"); #ifdef INCLUDE_CONFIG_FILE /* Actual kernel configuration options. */ extern char kernconfstring[]; static int sysctl_kern_config(SYSCTL_HANDLER_ARGS) { return (sysctl_handle_string(oidp, kernconfstring, strlen(kernconfstring), req)); } SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW, 0, 0, sysctl_kern_config, "", "Kernel configuration file"); #endif static int sysctl_hostid(SYSCTL_HANDLER_ARGS) { struct prison *pr, *cpr; u_long tmpid; int descend, error; /* * Like sysctl_hostname, except it operates on a u_long * instead of a string, and is used only for hostid. */ pr = req->td->td_ucred->cr_prison; if (!(pr->pr_allow & PR_ALLOW_SET_HOSTNAME) && req->newptr) return (EPERM); tmpid = pr->pr_hostid; error = sysctl_handle_long(oidp, &tmpid, 0, req); if (req->newptr != NULL && error == 0) { sx_slock(&allprison_lock); while (!(pr->pr_flags & PR_HOST)) pr = pr->pr_parent; mtx_lock(&pr->pr_mtx); pr->pr_hostid = tmpid; FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend) if (cpr->pr_flags & PR_HOST) descend = 0; else cpr->pr_hostid = tmpid; mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); } return (error); } SYSCTL_PROC(_kern, KERN_HOSTID, hostid, CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_hostid, "LU", "Host ID"); SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features"); #ifdef COMPAT_FREEBSD4 FEATURE(compat_freebsd4, "Compatible with FreeBSD 4"); #endif #ifdef COMPAT_FREEBSD5 FEATURE(compat_freebsd5, "Compatible with FreeBSD 5"); #endif #ifdef COMPAT_FREEBSD6 FEATURE(compat_freebsd6, "Compatible with FreeBSD 6"); #endif #ifdef COMPAT_FREEBSD7 FEATURE(compat_freebsd7, "Compatible with FreeBSD 7"); #endif /* * This is really cheating. These actually live in the libc, something * which I'm not quite sure is a good idea anyway, but in order for * getnext and friends to actually work, we define dummies here. * * XXXRW: These probably should be CTLFLAG_CAPRD. */ SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD, "", 0, "PATH that finds all the standard utilities"); SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD, - 0, 0, "Max ibase/obase values in bc(1)"); + SYSCTL_NULL_INT_PTR, 0, "Max ibase/obase values in bc(1)"); SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD, - 0, 0, "Max array size in bc(1)"); + SYSCTL_NULL_INT_PTR, 0, "Max array size in bc(1)"); SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD, - 0, 0, "Max scale value in bc(1)"); + SYSCTL_NULL_INT_PTR, 0, "Max scale value in bc(1)"); SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD, - 0, 0, "Max string length in bc(1)"); + SYSCTL_NULL_INT_PTR, 0, "Max string length in bc(1)"); SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD, - 0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry"); -SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, ""); + SYSCTL_NULL_INT_PTR, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry"); +SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, + SYSCTL_NULL_INT_PTR, 0, ""); SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD, - 0, 0, "Max length (bytes) of a text-processing utility's input line"); + SYSCTL_NULL_INT_PTR, 0, "Max length (bytes) of a text-processing utility's input line"); SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD, - 0, 0, "Maximum number of repeats of a regexp permitted"); + SYSCTL_NULL_INT_PTR, 0, "Maximum number of repeats of a regexp permitted"); SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD, - 0, 0, + SYSCTL_NULL_INT_PTR, 0, "The version of POSIX 1003.2 with which the system attempts to comply"); SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD, - 0, 0, "Whether C development supports the C bindings option"); + SYSCTL_NULL_INT_PTR, 0, "Whether C development supports the C bindings option"); SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD, - 0, 0, "Whether system supports the C development utilities option"); + SYSCTL_NULL_INT_PTR, 0, "Whether system supports the C development utilities option"); SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD, - 0, 0, ""); + SYSCTL_NULL_INT_PTR, 0, ""); SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD, - 0, 0, "Whether system supports FORTRAN development utilities"); + SYSCTL_NULL_INT_PTR, 0, "Whether system supports FORTRAN development utilities"); SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD, - 0, 0, "Whether system supports FORTRAN runtime utilities"); + SYSCTL_NULL_INT_PTR, 0, "Whether system supports FORTRAN runtime utilities"); SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD, - 0, 0, "Whether system supports creation of locales"); + SYSCTL_NULL_INT_PTR, 0, "Whether system supports creation of locales"); SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD, - 0, 0, "Whether system supports software development utilities"); + SYSCTL_NULL_INT_PTR, 0, "Whether system supports software development utilities"); SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD, - 0, 0, "Whether system supports the user portability utilities"); + SYSCTL_NULL_INT_PTR, 0, "Whether system supports the user portability utilities"); SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD, - 0, 0, "Min Maximum number of streams a process may have open at one time"); + SYSCTL_NULL_INT_PTR, 0, "Min Maximum number of streams a process may have open at one time"); SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD, - 0, 0, "Min Maximum number of types supported for timezone names"); + SYSCTL_NULL_INT_PTR, 0, "Min Maximum number of types supported for timezone names"); #include SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD, - 0, sizeof(struct vnode), "sizeof(struct vnode)"); + SYSCTL_NULL_INT_PTR, sizeof(struct vnode), "sizeof(struct vnode)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD, - 0, sizeof(struct proc), "sizeof(struct proc)"); + SYSCTL_NULL_INT_PTR, sizeof(struct proc), "sizeof(struct proc)"); static int sysctl_kern_pid_max(SYSCTL_HANDLER_ARGS) { int error, pm; pm = pid_max; error = sysctl_handle_int(oidp, &pm, 0, req); if (error || !req->newptr) return (error); sx_xlock(&proctree_lock); sx_xlock(&allproc_lock); /* * Only permit the values less then PID_MAX. * As a safety measure, do not allow to limit the pid_max too much. */ if (pm < 300 || pm > PID_MAX) error = EINVAL; else pid_max = pm; sx_xunlock(&allproc_lock); sx_xunlock(&proctree_lock); return (error); } SYSCTL_PROC(_kern, OID_AUTO, pid_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_TUN | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_pid_max, "I", "Maximum allowed pid"); #include #include SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD, - 0, sizeof(struct bio), "sizeof(struct bio)"); + SYSCTL_NULL_INT_PTR, sizeof(struct bio), "sizeof(struct bio)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD, - 0, sizeof(struct buf), "sizeof(struct buf)"); + SYSCTL_NULL_INT_PTR, sizeof(struct buf), "sizeof(struct buf)"); #include SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD, - 0, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)"); + SYSCTL_NULL_INT_PTR, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)"); /* XXX compatibility, remove for 6.0 */ #include #include SYSCTL_INT(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW, &__elfN(fallback_brand), sizeof(__elfN(fallback_brand)), "compatibility for kern.fallback_elf_brand"); Index: stable/9/sys/kern/kern_synch.c =================================================================== --- stable/9/sys/kern/kern_synch.c (revision 273911) +++ stable/9/sys/kern/kern_synch.c (revision 273912) @@ -1,623 +1,622 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdtrace.h" #include "opt_ktrace.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include #ifdef XEN #include #include #include #endif #define KTDSTATE(td) \ (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL); int hogticks; static int pause_wchan; static struct callout loadav_callout; struct loadavg averunnable = { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ /* * Constants for averages over 1, 5, and 15 minutes * when sampling at 5 second intervals. */ static fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ }; /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ -static int fscale __unused = FSCALE; -SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); +SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, ""); static void loadav(void *arg); SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE(sched, , , preempt); /* * These probes reference Solaris features that are not implemented in FreeBSD. * Create the probes anyway for compatibility with existing D scripts; they'll * just never fire. */ SDT_PROBE_DEFINE(sched, , , cpucaps__sleep); SDT_PROBE_DEFINE(sched, , , cpucaps__wakeup); SDT_PROBE_DEFINE(sched, , , schedctl__nopreempt); SDT_PROBE_DEFINE(sched, , , schedctl__preempt); SDT_PROBE_DEFINE(sched, , , schedctl__yield); void sleepinit(void) { hogticks = (hz / 10) * 2; /* Default only. */ init_sleepqueues(); } /* * General sleep call. Suspends the current thread until a wakeup is * performed on the specified identifier. The thread will then be made * runnable with the specified priority. Sleeps at most timo/hz seconds * (0 means no timeout). If pri includes PCATCH flag, signals are checked * before and after sleeping, else signals are not checked. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal needs to be delivered, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). * * The lock argument is unlocked before the caller is suspended, and * re-locked before _sleep() returns. If priority includes the PDROP * flag the lock is not re-locked before returning. */ int _sleep(void *ident, struct lock_object *lock, int priority, const char *wmesg, int timo) { struct thread *td; struct proc *p; struct lock_class *class; int catch, flags, lock_state, pri, rval; WITNESS_SAVE_DECL(lock_witness); td = curthread; p = td->td_proc; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, wmesg); #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); KASSERT(timo != 0 || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); if (priority & PDROP) KASSERT(lock != NULL && lock != &Giant.lock_object, ("PDROP requires a non-Giant lock")); if (lock != NULL) class = LOCK_CLASS(lock); else class = NULL; if (cold || SCHEDULER_STOPPED()) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, * in case this is the idle thread and already asleep. * XXX: this used to do "s = splhigh(); splx(safepri); * splx(s);" to give interrupts a chance, but there is * no way to give interrupts a chance now. */ if (lock != NULL && priority & PDROP) class->lc_unlock(lock); return (0); } catch = priority & PCATCH; pri = priority & PRIMASK; /* * If we are already on a sleep queue, then remove us from that * sleep queue first. We have to do this to handle recursive * sleeps. */ if (TD_ON_SLEEPQ(td)) sleepq_remove(td, td->td_wchan); if (ident == &pause_wchan) flags = SLEEPQ_PAUSE; else flags = SLEEPQ_SLEEP; if (catch) flags |= SLEEPQ_INTERRUPTIBLE; if (priority & PBDRY) flags |= SLEEPQ_STOP_ON_BDRY; sleepq_lock(ident); CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, p->p_pid, td->td_name, wmesg, ident); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(class->lc_flags & LC_SLEEPABLE)) { WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); } else /* GCC needs to follow the Yellow Brick Road */ lock_state = -1; /* * We put ourselves on the sleep queue and start our timeout * before calling thread_suspend_check, as we could stop there, * and a wakeup or a SIGCONT (or both) could occur while we were * stopped without resuming us. Thus, we must be ready for sleep * when cursig() is called. If the wakeup happens while we're * stopped, then td will no longer be on a sleep queue upon * return from cursig(). */ sleepq_add(ident, lock, wmesg, flags, 0); if (timo) sleepq_set_timeout(ident, timo); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); sleepq_lock(ident); } if (timo && catch) rval = sleepq_timedwait_sig(ident, pri); else if (timo) rval = sleepq_timedwait(ident, pri); else if (catch) rval = sleepq_wait_sig(ident, pri); else { sleepq_wait(ident, pri); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, wmesg); #endif PICKUP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } int msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo) { struct thread *td; struct proc *p; int rval; WITNESS_SAVE_DECL(mtx); td = curthread; p = td->td_proc; KASSERT(mtx != NULL, ("sleeping without a mutex")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); if (cold || SCHEDULER_STOPPED()) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, * in case this is the idle thread and already asleep. * XXX: this used to do "s = splhigh(); splx(safepri); * splx(s);" to give interrupts a chance, but there is * no way to give interrupts a chance now. */ return (0); } sleepq_lock(ident); CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, p->p_pid, td->td_name, wmesg, ident); DROP_GIANT(); mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); WITNESS_SAVE(&mtx->lock_object, mtx); mtx_unlock_spin(mtx); /* * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); if (timo) sleepq_set_timeout(ident, timo); /* * Can't call ktrace with any spin locks held so it can lock the * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold * any spin lock. Thus, we have to drop the sleepq spin lock while * we handle those requests. This is safe since we have placed our * thread on the sleep queue already. */ #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) { sleepq_release(ident); ktrcsw(1, 0, wmesg); sleepq_lock(ident); } #endif #ifdef WITNESS sleepq_release(ident); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"", wmesg); sleepq_lock(ident); #endif if (timo) rval = sleepq_timedwait(ident, 0); else { sleepq_wait(ident, 0); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, wmesg); #endif PICKUP_GIANT(); mtx_lock_spin(mtx); WITNESS_RESTORE(&mtx->lock_object, mtx); return (rval); } /* * pause() delays the calling thread by the given number of system ticks. * During cold bootup, pause() uses the DELAY() function instead of * the tsleep() function to do the waiting. The "timo" argument must be * greater than or equal to zero. A "timo" value of zero is equivalent * to a "timo" value of one. */ int pause(const char *wmesg, int timo) { KASSERT(timo >= 0, ("pause: timo must be >= 0")); /* silently convert invalid timeouts */ if (timo < 1) timo = 1; if (cold) { /* * We delay one HZ at a time to avoid overflowing the * system specific DELAY() function(s): */ while (timo >= hz) { DELAY(1000000); timo -= hz; } if (timo > 0) DELAY(timo * tick); return (0); } return (tsleep(&pause_wchan, 0, wmesg, timo)); } /* * Make all threads sleeping on the specified identifier runnable. */ void wakeup(void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) { KASSERT(ident != &proc0, ("wakeup and wakeup_swapper and proc0")); kick_proc0(); } } /* * Make a thread sleeping on the specified identifier runnable. * May wake more than one thread if a target thread is currently * swapped out. */ void wakeup_one(void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) kick_proc0(); } static void kdb_switch(void) { thread_unlock(curthread); kdb_backtrace(); kdb_reenter(); panic("%s: did not reenter debugger", __func__); } /* * The machine independent parts of context switching. */ void mi_switch(int flags, struct thread *newtd) { uint64_t runtime, new_switchtime; struct thread *td; struct proc *p; td = curthread; /* XXX */ THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); p = td->td_proc; /* XXX */ KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); #ifdef INVARIANTS if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) mtx_assert(&Giant, MA_NOTOWNED); #endif KASSERT(td->td_critnest == 1 || panicstr, ("mi_switch: switch in a critical section")); KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, ("mi_switch: switch must be voluntary or involuntary")); KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself")); /* * Don't perform context switches from the debugger. */ if (kdb_active) kdb_switch(); if (SCHEDULER_STOPPED()) return; if (flags & SW_VOL) { td->td_ru.ru_nvcsw++; td->td_swvoltick = ticks; } else td->td_ru.ru_nivcsw++; #ifdef SCHED_STATS SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); #endif /* * Compute the amount of time during which the current * thread was running, and add that to its total so far. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); td->td_generation++; /* bump preempt-detect counter */ PCPU_INC(cnt.v_swtch); PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); #if (KTR_COMPILE & KTR_SCHED) != 0 if (TD_IS_IDLETHREAD(td)) KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle", "prio:%d", td->td_priority); else KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td), "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, "lockname:\"%s\"", td->td_lockname); #endif SDT_PROBE0(sched, , , preempt); #ifdef XEN PT_UPDATES_FLUSH(); #endif sched_switch(td, newtd, flags); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", "prio:%d", td->td_priority); CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); /* * If the last thread was exiting, finish cleaning it up. */ if ((td = PCPU_GET(deadthread))) { PCPU_SET(deadthread, NULL); thread_stash(td); } } /* * Change thread state to be runnable, placing it on the run queue if * it is in memory. If it is swapped out, return true so our caller * will know to awaken the swapper. */ int setrunnable(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(td->td_proc->p_state != PRS_ZOMBIE, ("setrunnable: pid %d is a zombie", td->td_proc->p_pid)); switch (td->td_state) { case TDS_RUNNING: case TDS_RUNQ: return (0); case TDS_INHIBITED: /* * If we are only inhibited because we are swapped out * then arange to swap in this process. Otherwise just return. */ if (td->td_inhibitors != TDI_SWAPPED) return (0); /* FALLTHROUGH */ case TDS_CAN_RUN: break; default: printf("state is 0x%x", td->td_state); panic("setrunnable(2)"); } if ((td->td_flags & TDF_INMEM) == 0) { if ((td->td_flags & TDF_SWAPINREQ) == 0) { td->td_flags |= TDF_SWAPINREQ; return (1); } } else sched_wakeup(td); return (0); } /* * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ static void loadav(void *arg) { int i, nrun; struct loadavg *avg; nrun = sched_load(); avg = &averunnable; for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; /* * Schedule the next update to occur after 5 seconds, but add a * random variation to avoid synchronisation with processes that * run at regular intervals. */ callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)), loadav, NULL); } /* ARGSUSED */ static void synch_setup(void *dummy) { callout_init(&loadav_callout, CALLOUT_MPSAFE); /* Kick off timeout driven events by calling first time. */ loadav(NULL); } int should_yield(void) { return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks); } void maybe_yield(void) { if (should_yield()) kern_yield(PRI_USER); } void kern_yield(int prio) { struct thread *td; td = curthread; DROP_GIANT(); thread_lock(td); if (prio == PRI_USER) prio = td->td_user_pri; if (prio >= 0) sched_prio(td, prio); mi_switch(SW_VOL | SWT_RELINQUISH, NULL); thread_unlock(td); PICKUP_GIANT(); } /* * General purpose yield system call. */ int sys_yield(struct thread *td, struct yield_args *uap) { thread_lock(td); if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) sched_prio(td, PRI_MAX_TIMESHARE); mi_switch(SW_VOL | SWT_RELINQUISH, NULL); thread_unlock(td); td->td_retval[0] = 0; return (0); } Index: stable/9/sys/kern/subr_devstat.c =================================================================== --- stable/9/sys/kern/subr_devstat.c (revision 273911) +++ stable/9/sys/kern/subr_devstat.c (revision 273912) @@ -1,577 +1,577 @@ /*- * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kdtrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include SDT_PROVIDER_DEFINE(io); SDT_PROBE_DEFINE2(io, , , start, "struct bio *", "struct devstat *"); SDT_PROBE_DEFINE2(io, , , done, "struct bio *", "struct devstat *"); SDT_PROBE_DEFINE2(io, , , wait__start, "struct bio *", "struct devstat *"); SDT_PROBE_DEFINE2(io, , , wait__done, "struct bio *", "struct devstat *"); #define DTRACE_DEVSTAT_START() SDT_PROBE2(io, , , start, NULL, ds) #define DTRACE_DEVSTAT_BIO_START() SDT_PROBE2(io, , , start, bp, ds) #define DTRACE_DEVSTAT_DONE() SDT_PROBE2(io, , , done, NULL, ds) #define DTRACE_DEVSTAT_BIO_DONE() SDT_PROBE2(io, , , done, bp, ds) #define DTRACE_DEVSTAT_WAIT_START() SDT_PROBE2(io, , , wait__start, NULL, ds) #define DTRACE_DEVSTAT_WAIT_DONE() SDT_PROBE2(io, , , wait__done, NULL, ds) static int devstat_num_devs; static long devstat_generation = 1; static int devstat_version = DEVSTAT_VERSION; static int devstat_current_devnumber; static struct mtx devstat_mutex; MTX_SYSINIT(devstat_mutex, &devstat_mutex, "devstat", MTX_DEF); static struct devstatlist device_statq = STAILQ_HEAD_INITIALIZER(device_statq); static struct devstat *devstat_alloc(void); static void devstat_free(struct devstat *); static void devstat_add_entry(struct devstat *ds, const void *dev_name, int unit_number, uint32_t block_size, devstat_support_flags flags, devstat_type_flags device_type, devstat_priority priority); /* * Allocate a devstat and initialize it */ struct devstat * devstat_new_entry(const void *dev_name, int unit_number, uint32_t block_size, devstat_support_flags flags, devstat_type_flags device_type, devstat_priority priority) { struct devstat *ds; mtx_assert(&devstat_mutex, MA_NOTOWNED); ds = devstat_alloc(); mtx_lock(&devstat_mutex); if (unit_number == -1) { ds->id = dev_name; binuptime(&ds->creation_time); devstat_generation++; } else { devstat_add_entry(ds, dev_name, unit_number, block_size, flags, device_type, priority); } mtx_unlock(&devstat_mutex); return (ds); } /* * Take a malloced and zeroed devstat structure given to us, fill it in * and add it to the queue of devices. */ static void devstat_add_entry(struct devstat *ds, const void *dev_name, int unit_number, uint32_t block_size, devstat_support_flags flags, devstat_type_flags device_type, devstat_priority priority) { struct devstatlist *devstat_head; struct devstat *ds_tmp; mtx_assert(&devstat_mutex, MA_OWNED); devstat_num_devs++; devstat_head = &device_statq; /* * Priority sort. Each driver passes in its priority when it adds * its devstat entry. Drivers are sorted first by priority, and * then by probe order. * * For the first device, we just insert it, since the priority * doesn't really matter yet. Subsequent devices are inserted into * the list using the order outlined above. */ if (devstat_num_devs == 1) STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); else { STAILQ_FOREACH(ds_tmp, devstat_head, dev_links) { struct devstat *ds_next; ds_next = STAILQ_NEXT(ds_tmp, dev_links); /* * If we find a break between higher and lower * priority items, and if this item fits in the * break, insert it. This also applies if the * "lower priority item" is the end of the list. */ if ((priority <= ds_tmp->priority) && ((ds_next == NULL) || (priority > ds_next->priority))) { STAILQ_INSERT_AFTER(devstat_head, ds_tmp, ds, dev_links); break; } else if (priority > ds_tmp->priority) { /* * If this is the case, we should be able * to insert ourselves at the head of the * list. If we can't, something is wrong. */ if (ds_tmp == STAILQ_FIRST(devstat_head)) { STAILQ_INSERT_HEAD(devstat_head, ds, dev_links); break; } else { STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); printf("devstat_add_entry: HELP! " "sorting problem detected " "for name %p unit %d\n", dev_name, unit_number); break; } } } } ds->device_number = devstat_current_devnumber++; ds->unit_number = unit_number; strlcpy(ds->device_name, dev_name, DEVSTAT_NAME_LEN); ds->block_size = block_size; ds->flags = flags; ds->device_type = device_type; ds->priority = priority; binuptime(&ds->creation_time); devstat_generation++; } /* * Remove a devstat structure from the list of devices. */ void devstat_remove_entry(struct devstat *ds) { struct devstatlist *devstat_head; mtx_assert(&devstat_mutex, MA_NOTOWNED); if (ds == NULL) return; mtx_lock(&devstat_mutex); devstat_head = &device_statq; /* Remove this entry from the devstat queue */ atomic_add_acq_int(&ds->sequence1, 1); if (ds->id == NULL) { devstat_num_devs--; STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); } devstat_free(ds); devstat_generation++; mtx_unlock(&devstat_mutex); } /* * Record a transaction start. * * See comments for devstat_end_transaction(). Ordering is very important * here. */ void devstat_start_transaction(struct devstat *ds, struct bintime *now) { mtx_assert(&devstat_mutex, MA_NOTOWNED); /* sanity check */ if (ds == NULL) return; atomic_add_acq_int(&ds->sequence1, 1); /* * We only want to set the start time when we are going from idle * to busy. The start time is really the start of the latest busy * period. */ if (ds->start_count == ds->end_count) { if (now != NULL) ds->busy_from = *now; else binuptime(&ds->busy_from); } ds->start_count++; atomic_add_rel_int(&ds->sequence0, 1); DTRACE_DEVSTAT_START(); } void devstat_start_transaction_bio(struct devstat *ds, struct bio *bp) { mtx_assert(&devstat_mutex, MA_NOTOWNED); /* sanity check */ if (ds == NULL) return; binuptime(&bp->bio_t0); devstat_start_transaction(ds, &bp->bio_t0); DTRACE_DEVSTAT_BIO_START(); } /* * Record the ending of a transaction, and incrment the various counters. * * Ordering in this function, and in devstat_start_transaction() is VERY * important. The idea here is to run without locks, so we are very * careful to only modify some fields on the way "down" (i.e. at * transaction start) and some fields on the way "up" (i.e. at transaction * completion). One exception is busy_from, which we only modify in * devstat_start_transaction() when there are no outstanding transactions, * and thus it can't be modified in devstat_end_transaction() * simultaneously. * * The sequence0 and sequence1 fields are provided to enable an application * spying on the structures with mmap(2) to tell when a structure is in a * consistent state or not. * * For this to work 100% reliably, it is important that the two fields * are at opposite ends of the structure and that they are incremented * in the opposite order of how a memcpy(3) in userland would copy them. * We assume that the copying happens front to back, but there is actually * no way short of writing your own memcpy(3) replacement to guarantee * this will be the case. * * In addition to this, being a kind of locks, they must be updated with * atomic instructions using appropriate memory barriers. */ void devstat_end_transaction(struct devstat *ds, uint32_t bytes, devstat_tag_type tag_type, devstat_trans_flags flags, struct bintime *now, struct bintime *then) { struct bintime dt, lnow; /* sanity check */ if (ds == NULL) return; if (now == NULL) { now = &lnow; binuptime(now); } atomic_add_acq_int(&ds->sequence1, 1); /* Update byte and operations counts */ ds->bytes[flags] += bytes; ds->operations[flags]++; /* * Keep a count of the various tag types sent. */ if ((ds->flags & DEVSTAT_NO_ORDERED_TAGS) == 0 && tag_type != DEVSTAT_TAG_NONE) ds->tag_types[tag_type]++; if (then != NULL) { /* Update duration of operations */ dt = *now; bintime_sub(&dt, then); bintime_add(&ds->duration[flags], &dt); } /* Accumulate busy time */ dt = *now; bintime_sub(&dt, &ds->busy_from); bintime_add(&ds->busy_time, &dt); ds->busy_from = *now; ds->end_count++; atomic_add_rel_int(&ds->sequence0, 1); DTRACE_DEVSTAT_DONE(); } void devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) { devstat_end_transaction_bio_bt(ds, bp, NULL); } void devstat_end_transaction_bio_bt(struct devstat *ds, struct bio *bp, struct bintime *now) { devstat_trans_flags flg; /* sanity check */ if (ds == NULL) return; if (bp->bio_cmd == BIO_DELETE) flg = DEVSTAT_FREE; else if (bp->bio_cmd == BIO_READ) flg = DEVSTAT_READ; else if (bp->bio_cmd == BIO_WRITE) flg = DEVSTAT_WRITE; else flg = DEVSTAT_NO_DATA; devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, DEVSTAT_TAG_SIMPLE, flg, now, &bp->bio_t0); DTRACE_DEVSTAT_BIO_DONE(); } /* * This is the sysctl handler for the devstat package. The data pushed out * on the kern.devstat.all sysctl variable consists of the current devstat * generation number, and then an array of devstat structures, one for each * device in the system. * * This is more cryptic that obvious, but basically we neither can nor * want to hold the devstat_mutex for any amount of time, so we grab it * only when we need to and keep an eye on devstat_generation all the time. */ static int sysctl_devstat(SYSCTL_HANDLER_ARGS) { int error; long mygen; struct devstat *nds; mtx_assert(&devstat_mutex, MA_NOTOWNED); /* * XXX devstat_generation should really be "volatile" but that * XXX freaks out the sysctl macro below. The places where we * XXX change it and inspect it are bracketed in the mutex which * XXX guarantees us proper write barriers. I don't belive the * XXX compiler is allowed to optimize mygen away across calls * XXX to other functions, so the following is belived to be safe. */ mygen = devstat_generation; error = SYSCTL_OUT(req, &mygen, sizeof(mygen)); if (devstat_num_devs == 0) return(0); if (error != 0) return (error); mtx_lock(&devstat_mutex); nds = STAILQ_FIRST(&device_statq); if (mygen != devstat_generation) error = EBUSY; mtx_unlock(&devstat_mutex); if (error != 0) return (error); for (;nds != NULL;) { error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); if (error != 0) return (error); mtx_lock(&devstat_mutex); if (mygen != devstat_generation) error = EBUSY; else nds = STAILQ_NEXT(nds, dev_links); mtx_unlock(&devstat_mutex); if (error != 0) return (error); } return(error); } /* * Sysctl entries for devstat. The first one is a node that all the rest * hang off of. */ static SYSCTL_NODE(_kern, OID_AUTO, devstat, CTLFLAG_RD, NULL, "Device Statistics"); SYSCTL_PROC(_kern_devstat, OID_AUTO, all, CTLFLAG_RD|CTLTYPE_OPAQUE, NULL, 0, sysctl_devstat, "S,devstat", "All devices in the devstat list"); /* * Export the number of devices in the system so that userland utilities * can determine how much memory to allocate to hold all the devices. */ SYSCTL_INT(_kern_devstat, OID_AUTO, numdevs, CTLFLAG_RD, &devstat_num_devs, 0, "Number of devices in the devstat list"); SYSCTL_LONG(_kern_devstat, OID_AUTO, generation, CTLFLAG_RD, &devstat_generation, 0, "Devstat list generation"); SYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD, &devstat_version, 0, "Devstat list version number"); /* * Allocator for struct devstat structures. We sub-allocate these from pages * which we get from malloc. These pages are exported for mmap(2)'ing through * a miniature device driver */ #define statsperpage (PAGE_SIZE / sizeof(struct devstat)) static d_mmap_t devstat_mmap; static struct cdevsw devstat_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_mmap = devstat_mmap, .d_name = "devstat", }; struct statspage { TAILQ_ENTRY(statspage) list; struct devstat *stat; u_int nfree; }; static TAILQ_HEAD(, statspage) pagelist = TAILQ_HEAD_INITIALIZER(pagelist); static MALLOC_DEFINE(M_DEVSTAT, "devstat", "Device statistics"); static int devstat_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { struct statspage *spp; if (nprot != VM_PROT_READ) return (-1); TAILQ_FOREACH(spp, &pagelist, list) { if (offset == 0) { *paddr = vtophys(spp->stat); return (0); } offset -= PAGE_SIZE; } return (-1); } static struct devstat * devstat_alloc(void) { struct devstat *dsp; struct statspage *spp, *spp2; u_int u; static int once; mtx_assert(&devstat_mutex, MA_NOTOWNED); if (!once) { make_dev_credf(MAKEDEV_ETERNAL | MAKEDEV_CHECKNAME, &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0400, DEVSTAT_DEVICE_NAME); once = 1; } spp2 = NULL; mtx_lock(&devstat_mutex); for (;;) { TAILQ_FOREACH(spp, &pagelist, list) { if (spp->nfree > 0) break; } if (spp != NULL) break; mtx_unlock(&devstat_mutex); spp2 = malloc(sizeof *spp, M_DEVSTAT, M_ZERO | M_WAITOK); spp2->stat = malloc(PAGE_SIZE, M_DEVSTAT, M_ZERO | M_WAITOK); spp2->nfree = statsperpage; /* * If free statspages were added while the lock was released * just reuse them. */ mtx_lock(&devstat_mutex); TAILQ_FOREACH(spp, &pagelist, list) if (spp->nfree > 0) break; if (spp == NULL) { spp = spp2; /* * It would make more sense to add the new page at the * head but the order on the list determine the * sequence of the mapping so we can't do that. */ TAILQ_INSERT_TAIL(&pagelist, spp, list); } else break; } dsp = spp->stat; for (u = 0; u < statsperpage; u++) { if (dsp->allocated == 0) break; dsp++; } spp->nfree--; dsp->allocated = 1; mtx_unlock(&devstat_mutex); if (spp2 != NULL && spp2 != spp) { free(spp2->stat, M_DEVSTAT); free(spp2, M_DEVSTAT); } return (dsp); } static void devstat_free(struct devstat *dsp) { struct statspage *spp; mtx_assert(&devstat_mutex, MA_OWNED); bzero(dsp, sizeof *dsp); TAILQ_FOREACH(spp, &pagelist, list) { if (dsp >= spp->stat && dsp < (spp->stat + statsperpage)) { spp->nfree++; return; } } } SYSCTL_INT(_debug_sizeof, OID_AUTO, devstat, CTLFLAG_RD, - NULL, sizeof(struct devstat), "sizeof(struct devstat)"); + SYSCTL_NULL_INT_PTR, sizeof(struct devstat), "sizeof(struct devstat)"); Index: stable/9/sys/kern/subr_kdb.c =================================================================== --- stable/9/sys/kern/subr_kdb.c (revision 273911) +++ stable/9/sys/kern/subr_kdb.c (revision 273912) @@ -1,672 +1,672 @@ /*- * Copyright (c) 2004 The FreeBSD Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif int kdb_active = 0; static void *kdb_jmpbufp = NULL; struct kdb_dbbe *kdb_dbbe = NULL; static struct pcb kdb_pcb; struct pcb *kdb_thrctx = NULL; struct thread *kdb_thread = NULL; struct trapframe *kdb_frame = NULL; #ifdef BREAK_TO_DEBUGGER #define KDB_BREAK_TO_DEBUGGER 1 #else #define KDB_BREAK_TO_DEBUGGER 0 #endif #ifdef ALT_BREAK_TO_DEBUGGER #define KDB_ALT_BREAK_TO_DEBUGGER 1 #else #define KDB_ALT_BREAK_TO_DEBUGGER 0 #endif static int kdb_break_to_debugger = KDB_BREAK_TO_DEBUGGER; static int kdb_alt_break_to_debugger = KDB_ALT_BREAK_TO_DEBUGGER; KDB_BACKEND(null, NULL, NULL, NULL, NULL); SET_DECLARE(kdb_dbbe_set, struct kdb_dbbe); static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS); static SYSCTL_NODE(_debug, OID_AUTO, kdb, CTLFLAG_RW, NULL, "KDB nodes"); SYSCTL_PROC(_debug_kdb, OID_AUTO, available, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, kdb_sysctl_available, "A", "list of available KDB backends"); SYSCTL_PROC(_debug_kdb, OID_AUTO, current, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, kdb_sysctl_current, "A", "currently selected KDB backend"); SYSCTL_PROC(_debug_kdb, OID_AUTO, enter, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, kdb_sysctl_enter, "I", "set to enter the debugger"); SYSCTL_PROC(_debug_kdb, OID_AUTO, panic, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, kdb_sysctl_panic, "I", "set to panic the kernel"); SYSCTL_PROC(_debug_kdb, OID_AUTO, trap, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, kdb_sysctl_trap, "I", "set to cause a page fault via data access"); SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, kdb_sysctl_trap_code, "I", "set to cause a page fault via code access"); -SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger, CTLTYPE_INT | CTLFLAG_RW | +SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger, CTLFLAG_RW | CTLFLAG_TUN, &kdb_break_to_debugger, 0, "Enable break to debugger"); TUNABLE_INT("debug.kdb.break_to_debugger", &kdb_break_to_debugger); -SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger, CTLTYPE_INT | +SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger, CTLFLAG_RW | CTLFLAG_TUN, &kdb_alt_break_to_debugger, 0, "Enable alternative break to debugger"); TUNABLE_INT("debug.kdb.alt_break_to_debugger", &kdb_alt_break_to_debugger); /* * Flag to indicate to debuggers why the debugger was entered. */ const char * volatile kdb_why = KDB_WHY_UNSET; static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS) { struct kdb_dbbe **iter; struct sbuf sbuf; int error; sbuf_new_for_sysctl(&sbuf, NULL, 64, req); SET_FOREACH(iter, kdb_dbbe_set) { if ((*iter)->dbbe_active == 0) sbuf_printf(&sbuf, "%s ", (*iter)->dbbe_name); } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; if (kdb_dbbe != NULL) strlcpy(buf, kdb_dbbe->dbbe_name, sizeof(buf)); else *buf = '\0'; error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); if (kdb_active) return (EBUSY); return (kdb_dbbe_select(buf)); } static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); if (kdb_active) return (EBUSY); kdb_enter(KDB_WHY_SYSCTL, "sysctl debug.kdb.enter"); return (0); } static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); panic("kdb_sysctl_panic"); return (0); } static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS) { int error, i; int *addr = (int *)0x10; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); return (*addr); } static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS) { int error, i; void (*fp)(u_int, u_int, u_int) = (void *)0xdeadc0de; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); (*fp)(0x11111111, 0x22222222, 0x33333333); return (0); } void kdb_panic(const char *msg) { printf("KDB: panic\n"); panic("%s", msg); } void kdb_reboot(void) { printf("KDB: reboot requested\n"); shutdown_nice(0); } /* * Solaris implements a new BREAK which is initiated by a character sequence * CR ~ ^b which is similar to a familiar pattern used on Sun servers by the * Remote Console. * * Note that this function may be called from almost anywhere, with interrupts * disabled and with unknown locks held, so it must not access data other than * its arguments. Its up to the caller to ensure that the state variable is * consistent. */ #define KEY_CR 13 /* CR '\r' */ #define KEY_TILDE 126 /* ~ */ #define KEY_CRTLB 2 /* ^B */ #define KEY_CRTLP 16 /* ^P */ #define KEY_CRTLR 18 /* ^R */ /* States of th KDB "alternate break sequence" detecting state machine. */ enum { KDB_ALT_BREAK_SEEN_NONE, KDB_ALT_BREAK_SEEN_CR, KDB_ALT_BREAK_SEEN_CR_TILDE, }; int kdb_break(void) { if (!kdb_break_to_debugger) return (0); kdb_enter(KDB_WHY_BREAK, "Break to debugger"); return (KDB_REQ_DEBUGGER); } static int kdb_alt_break_state(int key, int *state) { int brk; /* All states transition to KDB_ALT_BREAK_SEEN_CR on a CR. */ if (key == KEY_CR) { *state = KDB_ALT_BREAK_SEEN_CR; return (0); } brk = 0; switch (*state) { case KDB_ALT_BREAK_SEEN_CR: *state = KDB_ALT_BREAK_SEEN_NONE; if (key == KEY_TILDE) *state = KDB_ALT_BREAK_SEEN_CR_TILDE; break; case KDB_ALT_BREAK_SEEN_CR_TILDE: *state = KDB_ALT_BREAK_SEEN_NONE; if (key == KEY_CRTLB) brk = KDB_REQ_DEBUGGER; else if (key == KEY_CRTLP) brk = KDB_REQ_PANIC; else if (key == KEY_CRTLR) brk = KDB_REQ_REBOOT; break; case KDB_ALT_BREAK_SEEN_NONE: default: *state = KDB_ALT_BREAK_SEEN_NONE; break; } return (brk); } static int kdb_alt_break_internal(int key, int *state, int force_gdb) { int brk; if (!kdb_alt_break_to_debugger) return (0); brk = kdb_alt_break_state(key, state); switch (brk) { case KDB_REQ_DEBUGGER: if (force_gdb) kdb_dbbe_select("gdb"); kdb_enter(KDB_WHY_BREAK, "Break to debugger"); break; case KDB_REQ_PANIC: if (force_gdb) kdb_dbbe_select("gdb"); kdb_panic("Panic sequence on console"); break; case KDB_REQ_REBOOT: kdb_reboot(); break; } return (0); } int kdb_alt_break(int key, int *state) { return (kdb_alt_break_internal(key, state, 0)); } /* * This variation on kdb_alt_break() is used only by dcons, which has its own * configuration flag to force GDB use regardless of the global KDB * configuration. */ int kdb_alt_break_gdb(int key, int *state) { return (kdb_alt_break_internal(key, state, 1)); } /* * Print a backtrace of the calling thread. The backtrace is generated by * the selected debugger, provided it supports backtraces. If no debugger * is selected or the current debugger does not support backtraces, this * function silently returns. */ void kdb_backtrace(void) { if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace != NULL) { printf("KDB: stack backtrace:\n"); kdb_dbbe->dbbe_trace(); } #ifdef STACK else { struct stack st; printf("KDB: stack backtrace:\n"); stack_zero(&st); stack_save(&st); stack_print_ddb(&st); } #endif } /* * Similar to kdb_backtrace() except that it prints a backtrace of an * arbitrary thread rather than the calling thread. */ void kdb_backtrace_thread(struct thread *td) { if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace_thread != NULL) { printf("KDB: stack backtrace of thread %d:\n", td->td_tid); kdb_dbbe->dbbe_trace_thread(td); } #ifdef STACK else { struct stack st; printf("KDB: stack backtrace of thread %d:\n", td->td_tid); stack_zero(&st); stack_save_td(&st, td); stack_print_ddb(&st); } #endif } /* * Set/change the current backend. */ int kdb_dbbe_select(const char *name) { struct kdb_dbbe *be, **iter; SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; if (be->dbbe_active == 0 && strcmp(be->dbbe_name, name) == 0) { kdb_dbbe = be; return (0); } } return (EINVAL); } /* * Enter the currently selected debugger. If a message has been provided, * it is printed first. If the debugger does not support the enter method, * it is entered by using breakpoint(), which enters the debugger through * kdb_trap(). The 'why' argument will contain a more mechanically usable * string than 'msg', and is relied upon by DDB scripting to identify the * reason for entering the debugger so that the right script can be run. */ void kdb_enter(const char *why, const char *msg) { if (kdb_dbbe != NULL && kdb_active == 0) { if (msg != NULL) printf("KDB: enter: %s\n", msg); kdb_why = why; breakpoint(); kdb_why = KDB_WHY_UNSET; } } /* * Initialize the kernel debugger interface. */ void kdb_init(void) { struct kdb_dbbe *be, **iter; int cur_pri, pri; kdb_active = 0; kdb_dbbe = NULL; cur_pri = -1; SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; pri = (be->dbbe_init != NULL) ? be->dbbe_init() : -1; be->dbbe_active = (pri >= 0) ? 0 : -1; if (pri > cur_pri) { cur_pri = pri; kdb_dbbe = be; } } if (kdb_dbbe != NULL) { printf("KDB: debugger backends:"); SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; if (be->dbbe_active == 0) printf(" %s", be->dbbe_name); } printf("\n"); printf("KDB: current backend: %s\n", kdb_dbbe->dbbe_name); } } /* * Handle contexts. */ void * kdb_jmpbuf(jmp_buf new) { void *old; old = kdb_jmpbufp; kdb_jmpbufp = new; return (old); } void kdb_reenter(void) { if (!kdb_active || kdb_jmpbufp == NULL) return; printf("KDB: reentering\n"); kdb_backtrace(); longjmp(kdb_jmpbufp, 1); /* NOTREACHED */ } /* * Thread related support functions. */ struct pcb * kdb_thr_ctx(struct thread *thr) { #if defined(SMP) && defined(KDB_STOPPEDPCB) struct pcpu *pc; #endif if (thr == curthread) return (&kdb_pcb); #if defined(SMP) && defined(KDB_STOPPEDPCB) STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc->pc_curthread == thr && CPU_ISSET(pc->pc_cpuid, &stopped_cpus)) return (KDB_STOPPEDPCB(pc)); } #endif return (thr->td_pcb); } struct thread * kdb_thr_first(void) { struct proc *p; struct thread *thr; p = LIST_FIRST(&allproc); while (p != NULL) { if (p->p_flag & P_INMEM) { thr = FIRST_THREAD_IN_PROC(p); if (thr != NULL) return (thr); } p = LIST_NEXT(p, p_list); } return (NULL); } struct thread * kdb_thr_from_pid(pid_t pid) { struct proc *p; p = LIST_FIRST(&allproc); while (p != NULL) { if (p->p_flag & P_INMEM && p->p_pid == pid) return (FIRST_THREAD_IN_PROC(p)); p = LIST_NEXT(p, p_list); } return (NULL); } struct thread * kdb_thr_lookup(lwpid_t tid) { struct thread *thr; thr = kdb_thr_first(); while (thr != NULL && thr->td_tid != tid) thr = kdb_thr_next(thr); return (thr); } struct thread * kdb_thr_next(struct thread *thr) { struct proc *p; p = thr->td_proc; thr = TAILQ_NEXT(thr, td_plist); do { if (thr != NULL) return (thr); p = LIST_NEXT(p, p_list); if (p != NULL && (p->p_flag & P_INMEM)) thr = FIRST_THREAD_IN_PROC(p); } while (p != NULL); return (NULL); } int kdb_thr_select(struct thread *thr) { if (thr == NULL) return (EINVAL); kdb_thread = thr; kdb_thrctx = kdb_thr_ctx(thr); return (0); } /* * Enter the debugger due to a trap. */ int kdb_trap(int type, int code, struct trapframe *tf) { #ifdef SMP cpuset_t other_cpus; #endif struct kdb_dbbe *be; register_t intr; int handled; #ifdef SMP int did_stop_cpus; #endif be = kdb_dbbe; if (be == NULL || be->dbbe_trap == NULL) return (0); /* We reenter the debugger through kdb_reenter(). */ if (kdb_active) return (0); intr = intr_disable(); #ifdef SMP if (!SCHEDULER_STOPPED()) { other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); stop_cpus_hard(other_cpus); did_stop_cpus = 1; } else did_stop_cpus = 0; #endif kdb_active++; kdb_frame = tf; /* Let MD code do its thing first... */ kdb_cpu_trap(type, code); makectx(tf, &kdb_pcb); kdb_thr_select(curthread); cngrab(); for (;;) { handled = be->dbbe_trap(type, code); if (be == kdb_dbbe) break; be = kdb_dbbe; if (be == NULL || be->dbbe_trap == NULL) break; printf("Switching to %s back-end\n", be->dbbe_name); } cnungrab(); kdb_active--; #ifdef SMP if (did_stop_cpus) restart_cpus(stopped_cpus); #endif intr_restore(intr); return (handled); } Index: stable/9/sys/kern/subr_uio.c =================================================================== --- stable/9/sys/kern/subr_uio.c (revision 273911) +++ stable/9/sys/kern/subr_uio.c (revision 273912) @@ -1,606 +1,606 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_zero.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ZERO_COPY_SOCKETS #include #endif -SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, +SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; /* * Identify the physical page mapped at the given kernel virtual * address. Insert this physical page into the given address space at * the given virtual address, replacing the physical page, if any, * that already exists there. */ static int vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobject; vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, ("vm_pgmoveco: uaddr is not page aligned")); /* * Herein the physical page is validated and dirtied. It is * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; vm_page_lock(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); vm_page_unlock(user_pg); } else { /* * Even if a physical page does not exist in the * object chain's first object, a physical page from a * backing object may be mapped read only. */ if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); } vm_page_insert(kern_pg, uobject, upindex); vm_page_dirty(kern_pg); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int copyin_nofault(const void *udaddr, void *kaddr, size_t len) { int error, save; save = vm_fault_disable_pagefaults(); error = copyin(udaddr, kaddr, len); vm_fault_enable_pagefaults(save); return (error); } int copyout_nofault(const void *kaddr, void *udaddr, size_t len) { int error, save; save = vm_fault_disable_pagefaults(); error = copyout(kaddr, udaddr, len); vm_fault_enable_pagefaults(save); return (error); } #define PHYS_PAGE_COUNT(len) (howmany(len, PAGE_SIZE) + 1) int physcopyin(void *src, vm_paddr_t dst, size_t len) { vm_page_t m[PHYS_PAGE_COUNT(len)]; struct iovec iov[1]; struct uio uio; int i; iov[0].iov_base = src; iov[0].iov_len = len; uio.uio_iov = iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; for (i = 0; i < PHYS_PAGE_COUNT(len); i++, dst += PAGE_SIZE) m[i] = PHYS_TO_VM_PAGE(dst); return (uiomove_fromphys(m, dst & PAGE_MASK, len, &uio)); } int physcopyout(vm_paddr_t src, void *dst, size_t len) { vm_page_t m[PHYS_PAGE_COUNT(len)]; struct iovec iov[1]; struct uio uio; int i; iov[0].iov_base = dst; iov[0].iov_len = len; uio.uio_iov = iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; for (i = 0; i < PHYS_PAGE_COUNT(len); i++, src += PAGE_SIZE) m[i] = PHYS_TO_VM_PAGE(src); return (uiomove_fromphys(m, src & PAGE_MASK, len, &uio)); } #undef PHYS_PAGE_COUNT int uiomove(void *cp, int n, struct uio *uio) { return (uiomove_faultflag(cp, n, uio, 0)); } int uiomove_nofault(void *cp, int n, struct uio *uio) { return (uiomove_faultflag(cp, n, uio, 1)); } static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault) { struct thread *td; struct iovec *iov; size_t cnt; int error, newflags, save; td = curthread; error = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, ("uiomove proc")); if (!nofault) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */ newflags = TDP_DEADLKTREAT; if (uio->uio_segflg == UIO_USERSPACE && nofault) { /* * Fail if a non-spurious page fault occurs. */ newflags |= TDP_NOFAULTING | TDP_RESETSPUR; } save = curthread_pflags_set(newflags); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: curthread_pflags_restore(save); return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assertion failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { size_t offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > IOSIZE_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposeable pages, since it gives the * kernel page to the userland process. */ error = vm_pgmoveco(&curproc->p_vmspace->vm_map, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov_base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); error = userspaceco(cp, cnt, uio, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling ureadc()"); again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_base; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } int copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src, dst, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } int copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > IOSIZE_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); } /* * Map some anonymous memory in user space of size sz, rounded up to the page * boundary. */ int copyout_map(struct thread *td, vm_offset_t *addr, size_t sz) { struct vmspace *vms; int error; vm_size_t size; vms = td->td_proc->p_vmspace; /* * Map somewhere after heap in process memory. */ PROC_LOCK(td->td_proc); *addr = round_page((vm_offset_t)vms->vm_daddr + lim_max(td->td_proc, RLIMIT_DATA)); PROC_UNLOCK(td->td_proc); /* round size up to page boundry */ size = (vm_size_t)round_page(sz); error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE, VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0); return (error); } /* * Unmap memory in user space. */ int copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz) { vm_map_t map; vm_size_t size; if (sz == 0) return (0); map = &td->td_proc->p_vmspace->vm_map; size = (vm_size_t)round_page(sz); if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS) return (EINVAL); return (0); } Index: stable/9/sys/kern/vfs_cache.c =================================================================== --- stable/9/sys/kern/vfs_cache.c (revision 273911) +++ stable/9/sys/kern/vfs_cache.c (revision 273912) @@ -1,1533 +1,1533 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Poul-Henning Kamp of the FreeBSD Project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdtrace.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include SDT_PROVIDER_DECLARE(vfs); SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int", "struct vnode *", "char *"); SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative, "struct vnode *", "char *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", "char *"); /* * This structure describes the elements in the cache of recent * names looked up by namei. */ struct namecache { LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_vp; /* vnode the name refers to */ u_char nc_flag; /* flag bits */ u_char nc_nlen; /* length of name */ char nc_name[0]; /* segment name + nul */ }; /* * struct namecache_ts repeats struct namecache layout up to the * nc_nlen member. * struct namecache_ts is used in place of struct namecache when time(s) need * to be stored. The nc_dotdottime field is used when a cache entry is mapping * both a non-dotdot directory name plus dotdot for the directory's * parent. */ struct namecache_ts { LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_vp; /* vnode the name refers to */ u_char nc_flag; /* flag bits */ u_char nc_nlen; /* length of name */ struct timespec nc_time; /* timespec provided by fs */ struct timespec nc_dotdottime; /* dotdot timespec provided by fs */ int nc_ticks; /* ticks value when entry was added */ char nc_name[0]; /* segment name + nul */ }; /* * Flags in namecache.nc_flag */ #define NCF_WHITE 0x01 #define NCF_ISDOTDOT 0x02 #define NCF_TS 0x04 #define NCF_DTS 0x08 /* * Name caching works as follows: * * Names found by directory scans are retained in a cache * for future reference. It is managed LRU, so frequently * used names will hang around. Cache is indexed by hash value * obtained from (vp, name) where vp refers to the directory * containing name. * * If it is a "negative" entry, (i.e. for a name that is known NOT to * exist) the vnode pointer will be NULL. * * Upon reaching the last segment of a path, if the reference * is for DELETE, or NOCACHE is set (rewrite), and the * name is located in the cache, it will be dropped. */ /* * Structures associated with name cacheing. */ #define NCHHASH(hash) \ (&nchashtbl[(hash) & nchash]) static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ static u_long nchash; /* size of hash table */ SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "Size of namecache hash table"); static u_long ncnegfactor = 16; /* ratio of negative entries */ SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "Ratio of negative namecache entries"); static u_long numneg; /* number of negative entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "Number of negative entries in namecache"); static u_long numcache; /* number of cache entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "Number of namecache entries"); static u_long numcachehv; /* number of cache entries with vnodes held */ SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "Number of namecache entries with vnodes held"); static u_int ncsizefactor = 2; SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, "Size factor for namecache"); struct nchstats nchstats; /* cache effectiveness statistics */ static struct rwlock cache_lock; RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) #define CACHE_RLOCK() rw_rlock(&cache_lock) #define CACHE_RUNLOCK() rw_runlock(&cache_lock) #define CACHE_WLOCK() rw_wlock(&cache_lock) #define CACHE_WUNLOCK() rw_wunlock(&cache_lock) /* * UMA zones for the VFS cache. * * The small cache is used for entries with short names, which are the * most common. The large cache is used for entries which are too big to * fit in the small cache. */ static uma_zone_t cache_zone_small; static uma_zone_t cache_zone_small_ts; static uma_zone_t cache_zone_large; static uma_zone_t cache_zone_large_ts; #define CACHE_PATH_CUTOFF 35 static struct namecache * cache_alloc(int len, int ts) { if (len > CACHE_PATH_CUTOFF) { if (ts) return (uma_zalloc(cache_zone_large_ts, M_WAITOK)); else return (uma_zalloc(cache_zone_large, M_WAITOK)); } if (ts) return (uma_zalloc(cache_zone_small_ts, M_WAITOK)); else return (uma_zalloc(cache_zone_small, M_WAITOK)); } static void cache_free(struct namecache *ncp) { int ts; if (ncp == NULL) return; ts = ncp->nc_flag & NCF_TS; if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) { if (ts) uma_zfree(cache_zone_small_ts, ncp); else uma_zfree(cache_zone_small, ncp); } else if (ts) uma_zfree(cache_zone_large_ts, ncp); else uma_zfree(cache_zone_large, ncp); } static char * nc_get_name(struct namecache *ncp) { struct namecache_ts *ncp_ts; if ((ncp->nc_flag & NCF_TS) == 0) return (ncp->nc_name); ncp_ts = (struct namecache_ts *)ncp; return (ncp_ts->nc_name); } static void cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) { KASSERT((ncp->nc_flag & NCF_TS) != 0 || (tsp == NULL && ticksp == NULL), ("No NCF_TS")); if (tsp != NULL) *tsp = ((struct namecache_ts *)ncp)->nc_time; if (ticksp != NULL) *ticksp = ((struct namecache_ts *)ncp)->nc_ticks; } static int doingcache = 1; /* 1 => enable the cache */ SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "VFS namecache enabled"); /* Export size information to userland */ -SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0, +SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct namecache), "sizeof(struct namecache)"); /* * The new name cache statistics */ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); #define STATNODE(mode, name, var, descr) \ SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr); STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries"); STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries"); static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls, "Number of cache lookups"); static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits, "Number of '.' hits"); static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits, "Number of '..' hits"); static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks, "Number of checks in lookup"); static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss, "Number of cache misses"); static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap, "Number of cache misses we do not want to cache"); static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, "Number of cache hits (positive) we do not want to cache"); static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits, "Number of cache hits (positive)"); static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps, "Number of cache hits (negative) we do not want to cache"); static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits, "Number of cache hits (negative)"); static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades, "Number of updates of the cache after lookup (write lock + retry)"); SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); static void cache_zap(struct namecache *ncp); static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen); static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); #ifdef DIAGNOSTIC /* * Grab an atomic snapshot of the name cache hash chain lengths */ static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); static int sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) { int error; struct nchashhead *ncpp; struct namecache *ncp; int n_nchash; int count; n_nchash = nchash + 1; /* nchash is max index, not count */ if (!req->oldptr) return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { CACHE_RLOCK(); count = 0; LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } CACHE_RUNLOCK(); error = SYSCTL_OUT(req, &count, sizeof(count)); if (error) return (error); } return (0); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); static int sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) { int error; struct nchashhead *ncpp; struct namecache *ncp; int n_nchash; int count, maxlength, used, pct; if (!req->oldptr) return SYSCTL_OUT(req, 0, 4 * sizeof(int)); n_nchash = nchash + 1; /* nchash is max index, not count */ used = 0; maxlength = 0; /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; CACHE_RLOCK(); LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } CACHE_RUNLOCK(); if (count) used++; if (maxlength < count) maxlength = count; } n_nchash = nchash + 1; pct = (used * 100 * 100) / n_nchash; error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); if (error) return (error); error = SYSCTL_OUT(req, &used, sizeof(used)); if (error) return (error); error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); if (error) return (error); error = SYSCTL_OUT(req, &pct, sizeof(pct)); if (error) return (error); return (0); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); #endif /* * cache_zap(): * * Removes a namecache entry from cache, whether it contains an actual * pointer to a vnode or if it is just a negative cache entry. */ static void cache_zap(ncp) struct namecache *ncp; { struct vnode *vp; rw_assert(&cache_lock, RA_WLOCKED); CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); #ifdef KDTRACE_HOOKS if (ncp->nc_vp != NULL) { SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp, nc_get_name(ncp), ncp->nc_vp, 0, 0); } else { SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp, nc_get_name(ncp), 0, 0, 0); } #endif vp = NULL; LIST_REMOVE(ncp, nc_hash); if (ncp->nc_flag & NCF_ISDOTDOT) { if (ncp == ncp->nc_dvp->v_cache_dd) ncp->nc_dvp->v_cache_dd = NULL; } else { LIST_REMOVE(ncp, nc_src); if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { vp = ncp->nc_dvp; numcachehv--; } } if (ncp->nc_vp) { TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); if (ncp == ncp->nc_vp->v_cache_dd) ncp->nc_vp->v_cache_dd = NULL; } else { TAILQ_REMOVE(&ncneg, ncp, nc_dst); numneg--; } numcache--; cache_free(ncp); if (vp) vdrop(vp); } /* * Lookup an entry in the cache * * Lookup is called with dvp pointing to the directory to search, * cnp pointing to the name of the entry being sought. If the lookup * succeeds, the vnode is returned in *vpp, and a status of -1 is * returned. If the lookup determines that the name does not exist * (negative cacheing), a status of ENOENT is returned. If the lookup * fails, a status of zero is returned. If the directory vnode is * recycled out from under us due to a forced unmount, a status of * ENOENT is returned. * * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is * unlocked. If we're looking up . an extra ref is taken, but the lock is * not recursively acquired. */ int cache_lookup_times(dvp, vpp, cnp, tsp, ticksp) struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; struct timespec *tsp; int *ticksp; { struct namecache *ncp; uint32_t hash; int error, ltype, wlocked; if (!doingcache) { cnp->cn_flags &= ~MAKEENTRY; return (0); } retry: CACHE_RLOCK(); wlocked = 0; numcalls++; error = 0; retry_wlocked: if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { *vpp = dvp; CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", dvp, cnp->cn_nameptr); dothits++; SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", *vpp, 0, 0); if (tsp != NULL) timespecclear(tsp); if (ticksp != NULL) *ticksp = ticks; goto success; } if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { dotdothits++; if (dvp->v_cache_dd == NULL) { SDT_PROBE(vfs, namecache, lookup, miss, dvp, "..", NULL, 0, 0); goto unlock; } if ((cnp->cn_flags & MAKEENTRY) == 0) { if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) cache_zap(dvp->v_cache_dd); dvp->v_cache_dd = NULL; CACHE_WUNLOCK(); return (0); } ncp = dvp->v_cache_dd; if (ncp->nc_flag & NCF_ISDOTDOT) *vpp = ncp->nc_vp; else *vpp = ncp->nc_dvp; /* Return failure if negative entry was found. */ if (*vpp == NULL) goto negative_success; CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", dvp, cnp->cn_nameptr, *vpp); SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", *vpp, 0, 0); cache_out_ts(ncp, tsp, ticksp); if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) == NCF_DTS && tsp != NULL) *tsp = ((struct namecache_ts *)ncp)-> nc_dotdottime; goto success; } } hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { numchecks++; if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen)) break; } /* We failed to find an entry */ if (ncp == NULL) { SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, NULL, 0, 0); if ((cnp->cn_flags & MAKEENTRY) == 0) { nummisszap++; } else { nummiss++; } nchstats.ncs_miss++; goto unlock; } /* We don't want to have an entry, so dump it */ if ((cnp->cn_flags & MAKEENTRY) == 0) { numposzaps++; nchstats.ncs_badhits++; if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; cache_zap(ncp); CACHE_WUNLOCK(); return (0); } /* We found a "positive" match, return the vnode */ if (ncp->nc_vp) { numposhits++; nchstats.ncs_goodhits++; *vpp = ncp->nc_vp; CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", dvp, cnp->cn_nameptr, *vpp, ncp); SDT_PROBE(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp), *vpp, 0, 0); cache_out_ts(ncp, tsp, ticksp); goto success; } negative_success: /* We found a negative match, and want to create it, so purge */ if (cnp->cn_nameiop == CREATE) { numnegzaps++; nchstats.ncs_badhits++; if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; cache_zap(ncp); CACHE_WUNLOCK(); return (0); } if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; numneghits++; /* * We found a "negative" match, so we shift it to the end of * the "negative" cache entries queue to satisfy LRU. Also, * check to see if the entry is a whiteout; indicate this to * the componentname, if so. */ TAILQ_REMOVE(&ncneg, ncp, nc_dst); TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); nchstats.ncs_neghits++; if (ncp->nc_flag & NCF_WHITE) cnp->cn_flags |= ISWHITEOUT; SDT_PROBE(vfs, namecache, lookup, hit__negative, dvp, nc_get_name(ncp), 0, 0, 0); cache_out_ts(ncp, tsp, ticksp); CACHE_WUNLOCK(); return (ENOENT); wlock: /* * We need to update the cache after our lookup, so upgrade to * a write lock and retry the operation. */ CACHE_RUNLOCK(); CACHE_WLOCK(); numupgrades++; wlocked = 1; goto retry_wlocked; success: /* * On success we return a locked and ref'd vnode as per the lookup * protocol. */ if (dvp == *vpp) { /* lookup on "." */ VREF(*vpp); if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); /* * When we lookup "." we still can be asked to lock it * differently... */ ltype = cnp->cn_lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(*vpp)) { if (ltype == LK_EXCLUSIVE) { vn_lock(*vpp, LK_UPGRADE | LK_RETRY); if ((*vpp)->v_iflag & VI_DOOMED) { /* forced unmount */ vrele(*vpp); *vpp = NULL; return (ENOENT); } } else vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); } return (-1); } ltype = 0; /* silence gcc warning */ if (cnp->cn_flags & ISDOTDOT) { ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); } VI_LOCK(*vpp); if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { if (error == 0) vput(*vpp); *vpp = NULL; return (ENOENT); } } if (error) { *vpp = NULL; goto retry; } if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); } return (-1); unlock: if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); return (0); } /* * Add an entry to the cache. */ void cache_enter_time(dvp, vp, cnp, tsp, dtsp) struct vnode *dvp; struct vnode *vp; struct componentname *cnp; struct timespec *tsp; struct timespec *dtsp; { struct namecache *ncp, *n2; struct namecache_ts *n3; struct nchashhead *ncpp; uint32_t hash; int flag; int hold; int zap; int len; CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, ("cache_enter: Adding a doomed vnode")); VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, ("cache_enter: Doomed vnode used as src")); if (!doingcache) return; /* * Avoid blowout in namecache entries. */ if (numcache >= desiredvnodes * ncsizefactor) return; flag = 0; if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) return; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { CACHE_WLOCK(); /* * If dotdot entry already exists, just retarget it * to new parent vnode, otherwise continue with new * namecache entry allocation. */ if ((ncp = dvp->v_cache_dd) != NULL && ncp->nc_flag & NCF_ISDOTDOT) { KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent")); if (ncp->nc_vp != NULL) { TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); } else { TAILQ_REMOVE(&ncneg, ncp, nc_dst); numneg--; } if (vp != NULL) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); } else { TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); numneg++; } ncp->nc_vp = vp; CACHE_WUNLOCK(); return; } dvp->v_cache_dd = NULL; SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp, 0, 0); CACHE_WUNLOCK(); flag = NCF_ISDOTDOT; } } hold = 0; zap = 0; /* * Calculate the hash key and setup as much of the new * namecache entry as possible before acquiring the lock. */ ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); ncp->nc_vp = vp; ncp->nc_dvp = dvp; ncp->nc_flag = flag; if (tsp != NULL) { n3 = (struct namecache_ts *)ncp; n3->nc_time = *tsp; n3->nc_ticks = ticks; n3->nc_flag |= NCF_TS; if (dtsp != NULL) { n3->nc_dotdottime = *dtsp; n3->nc_flag |= NCF_DTS; } } len = ncp->nc_nlen = cnp->cn_namelen; hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); CACHE_WLOCK(); /* * See if this vnode or negative entry is already in the cache * with this name. This can happen with concurrent lookups of * the same path name. */ ncpp = NCHHASH(hash); LIST_FOREACH(n2, ncpp, nc_hash) { if (n2->nc_dvp == dvp && n2->nc_nlen == cnp->cn_namelen && !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) { if (tsp != NULL) { KASSERT((n2->nc_flag & NCF_TS) != 0, ("no NCF_TS")); n3 = (struct namecache_ts *)n2; n3->nc_time = ((struct namecache_ts *)ncp)->nc_time; n3->nc_ticks = ((struct namecache_ts *)ncp)->nc_ticks; if (dtsp != NULL) { n3->nc_dotdottime = ((struct namecache_ts *)ncp)-> nc_dotdottime; n3->nc_flag |= NCF_DTS; } } CACHE_WUNLOCK(); cache_free(ncp); return; } } if (flag == NCF_ISDOTDOT) { /* * See if we are trying to add .. entry, but some other lookup * has populated v_cache_dd pointer already. */ if (dvp->v_cache_dd != NULL) { CACHE_WUNLOCK(); cache_free(ncp); return; } KASSERT(vp == NULL || vp->v_type == VDIR, ("wrong vnode type %p", vp)); dvp->v_cache_dd = ncp; } numcache++; if (!vp) { numneg++; if (cnp->cn_flags & ISWHITEOUT) ncp->nc_flag |= NCF_WHITE; } else if (vp->v_type == VDIR) { if (flag != NCF_ISDOTDOT) { /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the * directory's parent. */ if ((n2 = vp->v_cache_dd) != NULL && (n2->nc_flag & NCF_ISDOTDOT) != 0) cache_zap(n2); vp->v_cache_dd = ncp; } } else { vp->v_cache_dd = NULL; } /* * Insert the new namecache entry into the appropriate chain * within the cache entries table. */ LIST_INSERT_HEAD(ncpp, ncp, nc_hash); if (flag != NCF_ISDOTDOT) { if (LIST_EMPTY(&dvp->v_cache_src)) { hold = 1; numcachehv++; } LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); } /* * If the entry is "negative", we place it into the * "negative" cache queue, otherwise, we place it into the * destination vnode's cache entries queue. */ if (vp) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); SDT_PROBE(vfs, namecache, enter, done, dvp, nc_get_name(ncp), vp, 0, 0); } else { TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); SDT_PROBE(vfs, namecache, enter_negative, done, dvp, nc_get_name(ncp), 0, 0, 0); } if (numneg * ncnegfactor > numcache) { ncp = TAILQ_FIRST(&ncneg); KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg", ncp, ncp->nc_vp)); zap = 1; } if (hold) vhold(dvp); if (zap) cache_zap(ncp); CACHE_WUNLOCK(); } /* * Name cache initialization, from vfs_init() when we are booting */ static void nchinit(void *dummy __unused) { TAILQ_INIT(&ncneg); cache_zone_small = uma_zcreate("S VFS Cache", sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_small_ts = uma_zcreate("STS VFS Cache", sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_large = uma_zcreate("L VFS Cache", sizeof(struct namecache) + NAME_MAX + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_large_ts = uma_zcreate("LTS VFS Cache", sizeof(struct namecache_ts) + NAME_MAX + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); /* * Invalidate all entries to a particular vnode. */ void cache_purge(vp) struct vnode *vp; { CTR1(KTR_VFS, "cache_purge(%p)", vp); SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0); CACHE_WLOCK(); while (!LIST_EMPTY(&vp->v_cache_src)) cache_zap(LIST_FIRST(&vp->v_cache_src)); while (!TAILQ_EMPTY(&vp->v_cache_dst)) cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); if (vp->v_cache_dd != NULL) { KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT, ("lost dotdot link")); cache_zap(vp->v_cache_dd); } KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); CACHE_WUNLOCK(); } /* * Invalidate all negative entries for a particular directory vnode. */ void cache_purge_negative(vp) struct vnode *vp; { struct namecache *cp, *ncp; CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0); CACHE_WLOCK(); LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { if (cp->nc_vp == NULL) cache_zap(cp); } CACHE_WUNLOCK(); } /* * Flush all entries referencing a particular filesystem. */ void cache_purgevfs(mp) struct mount *mp; { struct nchashhead *ncpp; struct namecache *ncp, *nnp; /* Scan hash tables for applicable entries */ SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0); CACHE_WLOCK(); for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { if (ncp->nc_dvp->v_mount == mp) cache_zap(ncp); } } CACHE_WUNLOCK(); } /* * Perform canonical checks and cache lookup and pass on to filesystem * through the vop_cachedlookup only if needed. */ int vfs_cache_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { struct vnode *dvp; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; struct thread *td = cnp->cn_thread; *vpp = NULL; dvp = ap->a_dvp; if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); error = VOP_ACCESS(dvp, VEXEC, cred, td); if (error) return (error); error = cache_lookup(dvp, vpp, cnp); if (error == 0) return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); if (error == -1) return (0); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __getcwd_args { u_char *buf; u_int buflen; }; #endif /* * XXX All of these sysctls would probably be more productive dead. */ static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "Disable the getcwd syscall"); /* Implementation of the getcwd syscall. */ int sys___getcwd(td, uap) struct thread *td; struct __getcwd_args *uap; { return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); } int kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) { char *bp, *tmpbuf; struct filedesc *fdp; struct vnode *cdir, *rdir; int error, vfslocked; if (disablecwd) return (ENODEV); if (buflen < 2) return (EINVAL); if (buflen > MAXPATHLEN) buflen = MAXPATHLEN; tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); cdir = fdp->fd_cdir; VREF(cdir); rdir = fdp->fd_rdir; VREF(rdir); FILEDESC_SUNLOCK(fdp); error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); vfslocked = VFS_LOCK_GIANT(rdir->v_mount); vrele(rdir); VFS_UNLOCK_GIANT(vfslocked); vfslocked = VFS_LOCK_GIANT(cdir->v_mount); vrele(cdir); VFS_UNLOCK_GIANT(vfslocked); if (!error) { if (bufseg == UIO_SYSSPACE) bcopy(bp, buf, strlen(bp) + 1); else error = copyout(bp, buf, strlen(bp) + 1); #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(bp); #endif } free(tmpbuf, M_TEMP); return (error); } /* * Thus begins the fullpath magic. */ #undef STATNODE #define STATNODE(name, descr) \ static u_int name; \ SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr) static int disablefullpath; SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, "Disable the vn_fullpath function"); /* These count for kern___getcwd(), too. */ STATNODE(numfullpathcalls, "Number of fullpath search calls"); STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)"); STATNODE(numfullpathfail2, "Number of fullpath search errors (VOP_VPTOCNP failures)"); STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); STATNODE(numfullpathfound, "Number of successful fullpath calls"); /* * Retrieve the full filesystem path that correspond to a vnode from the name * cache (if available) */ int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; struct filedesc *fdp; struct vnode *rdir; int error, vfslocked; if (disablefullpath) return (ENODEV); if (vn == NULL) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); rdir = fdp->fd_rdir; VREF(rdir); FILEDESC_SUNLOCK(fdp); error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); vfslocked = VFS_LOCK_GIANT(rdir->v_mount); vrele(rdir); VFS_UNLOCK_GIANT(vfslocked); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } /* * This function is similar to vn_fullpath, but it attempts to lookup the * pathname relative to the global root mount point. This is required for the * auditing sub-system, as audited pathnames must be absolute, relative to the * global root mount point. */ int vn_fullpath_global(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; int error; if (disablefullpath) return (ENODEV); if (vn == NULL) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) { int error; CACHE_RLOCK(); error = vn_vptocnp_locked(vp, cred, buf, buflen); if (error == 0) CACHE_RUNLOCK(); return (error); } static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) { struct vnode *dvp; struct namecache *ncp; int error, vfslocked; TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; } if (ncp != NULL) { if (*buflen < ncp->nc_nlen) { CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT((*vp)->v_mount); vrele(*vp); VFS_UNLOCK_GIANT(vfslocked); numfullpathfail4++; error = ENOMEM; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); return (error); } *buflen -= ncp->nc_nlen; memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen); SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp, nc_get_name(ncp), vp, 0, 0); dvp = *vp; *vp = ncp->nc_dvp; vref(*vp); CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(dvp->v_mount); vrele(dvp); VFS_UNLOCK_GIANT(vfslocked); CACHE_RLOCK(); return (0); } SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT((*vp)->v_mount); vn_lock(*vp, LK_SHARED | LK_RETRY); error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); vput(*vp); VFS_UNLOCK_GIANT(vfslocked); if (error) { numfullpathfail2++; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); return (error); } *vp = dvp; CACHE_RLOCK(); if (dvp->v_iflag & VI_DOOMED) { /* forced unmount */ CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(dvp->v_mount); vrele(dvp); VFS_UNLOCK_GIANT(vfslocked); error = ENOENT; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); return (error); } /* * *vp has its use count incremented still. */ return (0); } /* * The magic behind kern___getcwd() and vn_fullpath(). */ static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen) { int error, slash_prefixed, vfslocked; #ifdef KDTRACE_HOOKS struct vnode *startvp = vp; #endif struct vnode *vp1; buflen--; buf[buflen] = '\0'; error = 0; slash_prefixed = 0; SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); numfullpathcalls++; vref(vp); CACHE_RLOCK(); if (vp->v_type != VDIR) { error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); if (error) return (error); if (buflen == 0) { CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (ENOMEM); } buf[--buflen] = '/'; slash_prefixed = 1; } while (vp != rdir && vp != rootvnode) { if (vp->v_vflag & VV_ROOT) { if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); error = ENOENT; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); break; } vp1 = vp->v_mount->mnt_vnodecovered; vref(vp1); CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); vp = vp1; CACHE_RLOCK(); continue; } if (vp->v_type != VDIR) { CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); numfullpathfail1++; error = ENOTDIR; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); break; } error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); if (error) break; if (buflen == 0) { CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); error = ENOMEM; SDT_PROBE(vfs, namecache, fullpath, return, error, startvp, NULL, 0, 0); break; } buf[--buflen] = '/'; slash_prefixed = 1; } if (error) return (error); if (!slash_prefixed) { if (buflen == 0) { CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); numfullpathfail4++; SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, startvp, NULL, 0, 0); return (ENOMEM); } buf[--buflen] = '/'; } numfullpathfound++; CACHE_RUNLOCK(); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, 0, 0); *retbuf = buf + buflen; return (0); } struct vnode * vn_dir_dd_ino(struct vnode *vp) { struct namecache *ncp; struct vnode *ddvp; ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); CACHE_RLOCK(); TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; VI_LOCK(ddvp); CACHE_RUNLOCK(); if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread)) return (NULL); return (ddvp); } CACHE_RUNLOCK(); return (NULL); } int vn_commname(struct vnode *vp, char *buf, u_int buflen) { struct namecache *ncp; int l; CACHE_RLOCK(); TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; if (ncp == NULL) { CACHE_RUNLOCK(); return (ENOENT); } l = min(ncp->nc_nlen, buflen - 1); memcpy(buf, nc_get_name(ncp), l); CACHE_RUNLOCK(); buf[l] = '\0'; return (0); } /* ABI compat shims for old kernel modules. */ #undef cache_enter #undef cache_lookup void cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp); int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp); void cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { cache_enter_time(dvp, vp, cnp, NULL, NULL); } int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL)); } /* * This function updates path string to vnode's full global path * and checks the size of the new path string against the pathlen argument. * * Requires a locked, referenced vnode and GIANT lock held. * Vnode is re-locked on success or ENODEV, otherwise unlocked. * * If sysctl debug.disablefullpath is set, ENODEV is returned, * vnode is left locked and path remain untouched. * * If vp is a directory, the call to vn_fullpath_global() always succeeds * because it falls back to the ".." lookup if the namecache lookup fails. */ int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen) { struct nameidata nd; struct vnode *vp1; char *rpath, *fbuf; int error, vfslocked; VFS_ASSERT_GIANT(vp->v_mount); ASSERT_VOP_ELOCKED(vp, __func__); /* Return ENODEV if sysctl debug.disablefullpath==1 */ if (disablefullpath) return (ENODEV); /* Construct global filesystem path from vp. */ VOP_UNLOCK(vp, 0); error = vn_fullpath_global(td, vp, &rpath, &fbuf); if (error != 0) { vrele(vp); return (error); } if (strlen(rpath) >= pathlen) { vrele(vp); error = ENAMETOOLONG; goto out; } /* * Re-lookup the vnode by path to detect a possible rename. * As a side effect, the vnode is relocked. * If vnode was renamed, return ENOENT. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, path, td); error = namei(&nd); if (error != 0) { vrele(vp); goto out; } vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp1 = nd.ni_vp; vrele(vp); if (vp1 == vp) strcpy(path, rpath); else { vput(vp1); error = ENOENT; } VFS_UNLOCK_GIANT(vfslocked); out: free(fbuf, M_TEMP); return (error); } Index: stable/9/sys/mips/mips/busdma_machdep.c =================================================================== --- stable/9/sys/mips/mips/busdma_machdep.c (revision 273911) +++ stable/9/sys/mips/mips/busdma_machdep.c (revision 273912) @@ -1,1416 +1,1416 @@ /*- * Copyright (c) 2006 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); /* * MIPS bus dma support routines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 64 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_size_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ vm_offset_t vaddr_nocache; /* kva of bounce buffer uncached */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ bus_addr_t dataaddr; /* client physical address */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); #define DMAMAP_UNCACHEABLE 0x8 #define DMAMAP_ALLOCATED 0x10 #define DMAMAP_MALLOCUSED 0x20 struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; int flags; void *origbuffer; void *allocbuffer; TAILQ_ENTRY(bus_dmamap) freelist; STAILQ_ENTRY(bus_dmamap) links; bus_dmamap_callback_t *callback; void *callback_arg; int sync_count; struct sync_list *slist; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static TAILQ_HEAD(,bus_dmamap) dmamap_freelist = TAILQ_HEAD_INITIALIZER(dmamap_freelist); #define BUSDMA_STATIC_MAPS 128 static struct bus_dmamap map_pool[BUSDMA_STATIC_MAPS]; static struct mtx busdma_mtx; MTX_SYSINIT(busdma_mtx, &busdma_mtx, "busdma lock", MTX_DEF); static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); /* Default tag, as most drivers provide no parent tag. */ bus_dma_tag_t mips_root_dma_tag; /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || ((paddr & (dmat->alignment - 1)) != 0)) && (dmat->filter == NULL || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } static void mips_dmamap_freelist_init(void *dummy) { int i; for (i = 0; i < BUSDMA_STATIC_MAPS; i++) TAILQ_INSERT_HEAD(&dmamap_freelist, &map_pool[i], freelist); } SYSINIT(busdma, SI_SUB_VM, SI_ORDER_ANY, mips_dmamap_freelist_init, NULL); /* * Check to see if the specified page is in an allowed DMA range. */ static __inline int _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr > phys_avail[i])) return (1); } return (0); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { #ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); #else printf("DRIVER_ERROR: busdma dflt_lock called\n"); #endif } static __inline bus_dmamap_t _busdma_alloc_dmamap(bus_dma_tag_t dmat) { struct sync_list *slist; bus_dmamap_t map; slist = malloc(sizeof(*slist) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if (slist == NULL) return (NULL); mtx_lock(&busdma_mtx); map = TAILQ_FIRST(&dmamap_freelist); if (map) TAILQ_REMOVE(&dmamap_freelist, map, freelist); mtx_unlock(&busdma_mtx); if (!map) { map = malloc(sizeof(*map), M_DEVBUF, M_NOWAIT | M_ZERO); if (map) map->flags = DMAMAP_ALLOCATED; } else map->flags = 0; if (map != NULL) { STAILQ_INIT(&map->bpages); map->slist = slist; } else free(slist, M_DEVBUF); return (map); } static __inline void _busdma_free_dmamap(bus_dmamap_t map) { free(map->slist, M_DEVBUF); if (map->flags & DMAMAP_ALLOCATED) free(map, M_DEVBUF); else { mtx_lock(&busdma_mtx); TAILQ_INSERT_HEAD(&dmamap_freelist, map, freelist); mtx_unlock(&busdma_mtx); } } /* * Allocate a device specific dma_tag. */ #define SEG_NB 1024 int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_size_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; if (!parent) parent = mips_root_dma_tag; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; if (cpuinfo.cache_coherent_dma) newtag->flags |= BUS_DMA_COHERENT; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } newtag->segments = NULL; /* * Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = min(parent->lowaddr, newtag->lowaddr); newtag->highaddr = max(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = min(parent->boundary, newtag->boundary); if ((newtag->filter != NULL) || ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { #ifdef KTR bus_dma_tag_t dmat_copy = dmat; #endif if (dmat != NULL) { if (dmat->map_count != 0) return (EBUSY); while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy); return (0); } #include /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t newmap; int error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } newmap = _busdma_alloc_dmamap(dmat); if (newmap == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } *mapp = newmap; newmap->dmat = dmat; newmap->allocbuffer = NULL; newmap->sync_count = 0; dmat->map_count++; /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { _busdma_free_dmamap(newmap); *mapp = NULL; return (error); } } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; dmat->map_count--; _busdma_free_dmamap(map); CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { bus_dmamap_t newmap = NULL; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; newmap = _busdma_alloc_dmamap(dmat); if (newmap == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } dmat->map_count++; *mapp = newmap; newmap->dmat = dmat; newmap->sync_count = 0; /* * If all the memory is coherent with DMA then we don't need to * do anything special for a coherent mapping request. */ if (dmat->flags & BUS_DMA_COHERENT) flags &= ~BUS_DMA_COHERENT; /* * Allocate uncacheable memory if all else fails. */ if (flags & BUS_DMA_COHERENT) newmap->flags |= DMAMAP_UNCACHEABLE; if (dmat->maxsize <= PAGE_SIZE && (dmat->alignment < dmat->maxsize) && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr) && !(newmap->flags & DMAMAP_UNCACHEABLE)) { *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); newmap->flags |= DMAMAP_MALLOCUSED; } else { /* * XXX Use Contigmalloc until it is merged into this facility * and handles multi-seg allocations. Nobody is doing * multi-seg allocations yet though. */ *vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags, 0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul, dmat->boundary); } if (*vaddr == NULL) { if (newmap != NULL) { _busdma_free_dmamap(newmap); dmat->map_count--; } *mapp = NULL; return (ENOMEM); } if (newmap->flags & DMAMAP_UNCACHEABLE) { void *tmpaddr = (void *)*vaddr; if (tmpaddr) { tmpaddr = (void *)pmap_mapdev(vtophys(tmpaddr), dmat->maxsize); newmap->origbuffer = *vaddr; newmap->allocbuffer = tmpaddr; mips_dcache_wbinv_range((vm_offset_t)*vaddr, dmat->maxsize); *vaddr = tmpaddr; } else newmap->origbuffer = newmap->allocbuffer = NULL; } else newmap->origbuffer = newmap->allocbuffer = NULL; return (0); } /* * Free a piece of memory and it's allocated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { if (map->allocbuffer) { KASSERT(map->allocbuffer == vaddr, ("Trying to freeing the wrong DMA buffer")); vaddr = map->origbuffer; } if (map->flags & DMAMAP_UNCACHEABLE) pmap_unmapdev((vm_offset_t)map->allocbuffer, dmat->maxsize); if (map->flags & DMAMAP_MALLOCUSED) free(vaddr, M_DEVBUF); else contigfree(vaddr, dmat->maxsize, M_DEVBUF); dmat->map_count--; _busdma_free_dmamap(map); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if ((map->pagesneeded == 0)) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if ((map->pagesneeded == 0)) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { bus_size_t sg_len; KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); paddr = pmap_kextract(vaddr); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * the previous segment if possible. */ seg = *segp; if (seg >= 0 && curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) { segs[seg].ds_len += sgsize; } else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { _bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. * first indicates if this is the first invocation of this function. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; struct sync_list *sl; vm_offset_t vaddr = (vm_offset_t)buf; int error = 0; if (segs == NULL) segs = dmat->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); while (buflen > 0) { /* * Get the physical address for this segment. * * XXX Don't support checking for coherent mappings * XXX in user address space. */ KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); curaddr = pmap_kextract(vaddr); /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { sl = &map->slist[map->sync_count - 1]; if (map->sync_count == 0 || vaddr != sl->vaddr + sl->datacount) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = vaddr; sl->datacount = sgsize; sl->busaddr = curaddr; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { _bus_dmamap_unload(dmat, map); error = EFBIG; /* XXX better return value here? */ } return (error); } void __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { KASSERT(dmat != NULL, ("dmatag is NULL")); KASSERT(map != NULL, ("dmamap is NULL")); map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ void _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; return; } static void bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op) { char tmp_cl[mips_pdcache_linesize], tmp_clend[mips_pdcache_linesize]; vm_offset_t buf_cl, buf_clend; vm_size_t size_cl, size_clend; int cache_linesize_mask = mips_pdcache_linesize - 1; /* * dcache invalidation operates on cache line aligned addresses * and could modify areas of memory that share the same cache line * at the beginning and the ending of the buffer. In order to * prevent a data loss we save these chunks in temporary buffer * before invalidation and restore them afer it */ buf_cl = buf & ~cache_linesize_mask; size_cl = buf & cache_linesize_mask; buf_clend = buf + len; size_clend = (mips_pdcache_linesize - (buf_clend & cache_linesize_mask)) & cache_linesize_mask; switch (op) { case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: case BUS_DMASYNC_POSTREAD: /* * Save buffers that might be modified by invalidation */ if (size_cl) memcpy (tmp_cl, (void*)buf_cl, size_cl); if (size_clend) memcpy (tmp_clend, (void*)buf_clend, size_clend); mips_dcache_inv_range(buf, len); /* * Restore them */ if (size_cl) memcpy ((void*)buf_cl, tmp_cl, size_cl); if (size_clend) memcpy ((void*)buf_clend, tmp_clend, size_clend); /* * Copies above have brought corresponding memory * cache lines back into dirty state. Write them back * out and invalidate affected cache lines again if * necessary. */ if (size_cl) mips_dcache_wbinv_range(buf_cl, size_cl); if (size_clend && (size_cl == 0 || buf_clend - buf_cl > mips_pdcache_linesize)) mips_dcache_wbinv_range(buf_clend, size_clend); break; case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE: mips_dcache_wbinv_range(buf_cl, len); break; case BUS_DMASYNC_PREREAD: /* * Save buffers that might be modified by invalidation */ if (size_cl) memcpy (tmp_cl, (void *)buf_cl, size_cl); if (size_clend) memcpy (tmp_clend, (void *)buf_clend, size_clend); mips_dcache_inv_range(buf, len); /* * Restore them */ if (size_cl) memcpy ((void *)buf_cl, tmp_cl, size_cl); if (size_clend) memcpy ((void *)buf_clend, tmp_clend, size_clend); /* * Copies above have brought corresponding memory * cache lines back into dirty state. Write them back * out and invalidate affected cache lines again if * necessary. */ if (size_cl) mips_dcache_wbinv_range(buf_cl, size_cl); if (size_clend && (size_cl == 0 || buf_clend - buf_cl > mips_pdcache_linesize)) mips_dcache_wbinv_range(buf_clend, size_clend); break; case BUS_DMASYNC_PREWRITE: mips_dcache_wb_range(buf, len); break; } } static void _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; STAILQ_FOREACH(bpage, &map->bpages, links) { if (op & BUS_DMASYNC_PREWRITE) { if (bpage->datavaddr != 0) bcopy((void *)bpage->datavaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); else physcopyout(bpage->dataaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); if (bpage->vaddr_nocache == 0) { mips_dcache_wb_range(bpage->vaddr, bpage->datacount); } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { if (bpage->vaddr_nocache == 0) { mips_dcache_inv_range(bpage->vaddr, bpage->datacount); } if (bpage->datavaddr != 0) bcopy((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), (void *)bpage->datavaddr, bpage->datacount); else physcopyin((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->dataaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } } } void _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct sync_list *sl, *end; if (op == BUS_DMASYNC_POSTWRITE) return; if (STAILQ_FIRST(&map->bpages)) _bus_dmamap_sync_bp(dmat, map, op); if (dmat->flags & BUS_DMA_COHERENT) return; if (map->flags & DMAMAP_UNCACHEABLE) return; CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); if (map->sync_count) { end = &map->slist[map->sync_count]; for (sl = &map->slist[0]; sl != end; sl++) bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "alignment", CTLFLAG_RD, &bz->alignment, 0, ""); + "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); bpage->vaddr_nocache = (vm_offset_t)pmap_mapdev(bpage->busaddr, PAGE_SIZE); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= vaddr & PAGE_MASK; bpage->busaddr |= vaddr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->dataaddr = addr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: stable/9/sys/modules/ixgbe =================================================================== --- stable/9/sys/modules/ixgbe (revision 273911) +++ stable/9/sys/modules/ixgbe (revision 273912) Property changes on: stable/9/sys/modules/ixgbe ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/modules/ixgbe:r263710,273377-273378,273423,273455 Index: stable/9/sys/modules/svr4 =================================================================== --- stable/9/sys/modules/svr4 (revision 273911) +++ stable/9/sys/modules/svr4 (revision 273912) Property changes on: stable/9/sys/modules/svr4 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/modules/svr4:r263710,273377-273378,273423,273455 Index: stable/9/sys/modules =================================================================== --- stable/9/sys/modules (revision 273911) +++ stable/9/sys/modules (revision 273912) Property changes on: stable/9/sys/modules ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/modules:r263710,273377-273378,273423,273455 Index: stable/9/sys/net/if_lagg.c =================================================================== --- stable/9/sys/net/if_lagg.c (revision 273911) +++ stable/9/sys/net/if_lagg.c (revision 273912) @@ -1,1952 +1,1952 @@ /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ /* * Copyright (c) 2005, 2006 Reyk Floeter * Copyright (c) 2007 Andrew Thompson * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #endif #include #include #include /* Special flags we should propagate to the lagg ports. */ static struct { int flag; int (*func)(struct ifnet *, int); } lagg_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */ static struct mtx lagg_list_mtx; eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); static void lagg_lladdr(struct lagg_softc *, uint8_t *); static void lagg_capabilities(struct lagg_softc *); static void lagg_port_lladdr(struct lagg_port *, uint8_t *); static void lagg_port_setlladdr(void *, int); static int lagg_port_create(struct lagg_softc *, struct ifnet *); static int lagg_port_destroy(struct lagg_port *, int); static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); static void lagg_linkstate(struct lagg_softc *); static void lagg_port_state(struct ifnet *, int); static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_port_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct route *); static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); #ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *); #endif static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); static int lagg_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_ether_setmulti(struct lagg_softc *); static int lagg_ether_cmdmulti(struct lagg_port *, int); static int lagg_setflag(struct lagg_port *, int, int, int (*func)(struct ifnet *, int)); static int lagg_setflags(struct lagg_port *, int status); static int lagg_transmit(struct ifnet *, struct mbuf *); static void lagg_qflush(struct ifnet *); static int lagg_media_change(struct ifnet *); static void lagg_media_status(struct ifnet *, struct ifmediareq *); static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); IFC_SIMPLE_DECLARE(lagg, 0); /* Simple round robin */ static int lagg_rr_attach(struct lagg_softc *); static int lagg_rr_detach(struct lagg_softc *); static int lagg_rr_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Active failover */ static int lagg_fail_attach(struct lagg_softc *); static int lagg_fail_detach(struct lagg_softc *); static int lagg_fail_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Loadbalancing */ static int lagg_lb_attach(struct lagg_softc *); static int lagg_lb_detach(struct lagg_softc *); static int lagg_lb_port_create(struct lagg_port *); static void lagg_lb_port_destroy(struct lagg_port *); static int lagg_lb_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); /* 802.3ad LACP */ static int lagg_lacp_attach(struct lagg_softc *); static int lagg_lacp_detach(struct lagg_softc *); static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); static void lagg_lacp_lladdr(struct lagg_softc *); /* lagg protocol table */ static const struct { int ti_proto; int (*ti_attach)(struct lagg_softc *); } lagg_protos[] = { { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach }, { LAGG_PROTO_FAILOVER, lagg_fail_attach }, { LAGG_PROTO_LOADBALANCE, lagg_lb_attach }, { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach }, { LAGG_PROTO_LACP, lagg_lacp_attach }, { LAGG_PROTO_NONE, NULL } }; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, &lagg_failover_rx_all, 0, "Accept input from any interface in a failover lagg"); static int def_use_flowid = 1; /* Default value for using M_FLOWID */ TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid); SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW, &def_use_flowid, 0, "Default setting for using flow id for load sharing"); static int lagg_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF); SLIST_INIT(&lagg_list); if_clone_attach(&lagg_cloner); lagg_input_p = lagg_input; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( ifnet_departure_event, lagg_port_ifdetach, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); if_clone_detach(&lagg_cloner); lagg_input_p = NULL; lagg_linkstate_p = NULL; mtx_destroy(&lagg_list_mtx); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t lagg_mod = { "if_lagg", lagg_modevent, 0 }; DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_lagg, 1); #if __FreeBSD_version >= 800000 /* * This routine is run via an vlan * config EVENT */ static void lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct lagg_softc *sc = ifp->if_softc; struct lagg_port *lp; if (ifp->if_softc != arg) /* Not our event */ return; LAGG_RLOCK(sc); if (!SLIST_EMPTY(&sc->sc_ports)) { SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); } LAGG_RUNLOCK(sc); } /* * This routine is run via an vlan * unconfig EVENT */ static void lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) { struct lagg_softc *sc = ifp->if_softc; struct lagg_port *lp; if (ifp->if_softc != arg) /* Not our event */ return; LAGG_RLOCK(sc); if (!SLIST_EMPTY(&sc->sc_ports)) { SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); } LAGG_RUNLOCK(sc); } #endif static int lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct lagg_softc *sc; struct ifnet *ifp; int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ struct sysctl_oid *oid; char num[14]; /* sufficient for 32 bits */ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_DEVBUF); return (ENOSPC); } sysctl_ctx_init(&sc->ctx); snprintf(num, sizeof(num), "%u", unit); sc->use_flowid = def_use_flowid; oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg), OID_AUTO, num, CTLFLAG_RD, NULL, ""); SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, + "use_flowid", CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, "Use flow id for load sharing"); SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count, + "count", CTLFLAG_RD, &sc->sc_count, sc->sc_count, "Total number of ports"); /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; sc->sc_proto = LAGG_PROTO_NONE; for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { sc->sc_proto = lagg_protos[i].ti_proto; if ((error = lagg_protos[i].ti_attach(sc)) != 0) { if_free_type(ifp, IFT_ETHER); free(sc, M_DEVBUF); return (error); } break; } } LAGG_LOCK_INIT(sc); SLIST_INIT(&sc->sc_ports); TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc); /* Initialise pseudo media types */ ifmedia_init(&sc->sc_media, 0, lagg_media_change, lagg_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); if_initname(ifp, ifc->ifc_name, unit); ifp->if_type = IFT_ETHER; ifp->if_softc = sc; ifp->if_transmit = lagg_transmit; ifp->if_qflush = lagg_qflush; ifp->if_init = lagg_init; ifp->if_ioctl = lagg_ioctl; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; /* * Attach as an ordinary ethernet device, childs will be attached * as special device IFT_IEEE8023ADLAG. */ ether_ifattach(ifp, eaddr); #if __FreeBSD_version >= 800000 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); #endif /* Insert into the global list of laggs */ mtx_lock(&lagg_list_mtx); SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries); mtx_unlock(&lagg_list_mtx); return (0); } static void lagg_clone_destroy(struct ifnet *ifp) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; LAGG_WLOCK(sc); lagg_stop(sc); ifp->if_flags &= ~IFF_UP; #if __FreeBSD_version >= 800000 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); #endif /* Shutdown and remove lagg ports */ while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) lagg_port_destroy(lp, 1); /* Unhook the aggregation protocol */ if (sc->sc_detach != NULL) (*sc->sc_detach)(sc); LAGG_WUNLOCK(sc); sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free_type(ifp, IFT_ETHER); mtx_lock(&lagg_list_mtx); SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries); mtx_unlock(&lagg_list_mtx); taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); LAGG_LOCK_DESTROY(sc); free(sc, M_DEVBUF); } static void lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) { struct ifnet *ifp = sc->sc_ifp; if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); /* Let the protocol know the MAC has changed */ if (sc->sc_lladdr != NULL) (*sc->sc_lladdr)(sc); EVENTHANDLER_INVOKE(iflladdr_event, ifp); } static void lagg_capabilities(struct lagg_softc *sc) { struct lagg_port *lp; int cap = ~0, ena = ~0; u_long hwa = ~0UL; #if defined(INET) || defined(INET6) u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */ #else u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */ #endif LAGG_WLOCK_ASSERT(sc); /* Get capabilities from the lagg ports */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { cap &= lp->lp_ifp->if_capabilities; ena &= lp->lp_ifp->if_capenable; hwa &= lp->lp_ifp->if_hwassist; /* Set to the minimum value of the lagg ports. */ if (lp->lp_ifp->if_hw_tsomax < hw_tsomax && lp->lp_ifp->if_hw_tsomax > 0) hw_tsomax = lp->lp_ifp->if_hw_tsomax; } cap = (cap == ~0 ? 0 : cap); ena = (ena == ~0 ? 0 : ena); hwa = (hwa == ~0 ? 0 : hwa); if (sc->sc_ifp->if_capabilities != cap || sc->sc_ifp->if_capenable != ena || sc->sc_ifp->if_hwassist != hwa || sc->sc_ifp->if_hw_tsomax != hw_tsomax) { sc->sc_ifp->if_capabilities = cap; sc->sc_ifp->if_capenable = ena; sc->sc_ifp->if_hwassist = hwa; sc->sc_ifp->if_hw_tsomax = hw_tsomax; getmicrotime(&sc->sc_ifp->if_lastchange); if (sc->sc_ifflags & IFF_DEBUG) if_printf(sc->sc_ifp, "capabilities 0x%08x enabled 0x%08x\n", cap, ena); } } static void lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *ifp = lp->lp_ifp; struct lagg_llq *llq; int pending = 0; LAGG_WLOCK_ASSERT(sc); if (lp->lp_detaching || memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; /* Check to make sure its not already queued to be changed */ SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { if (llq->llq_ifp == ifp) { pending = 1; break; } } if (!pending) { llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT); if (llq == NULL) /* XXX what to do */ return; } /* Update the lladdr even if pending, it may have changed */ llq->llq_ifp = ifp; bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); if (!pending) SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries); taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task); } /* * Set the interface MAC address from a taskqueue to avoid a LOR. */ static void lagg_port_setlladdr(void *arg, int pending) { struct lagg_softc *sc = (struct lagg_softc *)arg; struct lagg_llq *llq, *head; struct ifnet *ifp; int error; /* Grab a local reference of the queue and remove it from the softc */ LAGG_WLOCK(sc); head = SLIST_FIRST(&sc->sc_llq_head); SLIST_FIRST(&sc->sc_llq_head) = NULL; LAGG_WUNLOCK(sc); /* * Traverse the queue and set the lladdr on each ifp. It is safe to do * unlocked as we have the only reference to it. */ for (llq = head; llq != NULL; llq = head) { ifp = llq->llq_ifp; /* Set the link layer address */ CURVNET_SET(ifp->if_vnet); error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); CURVNET_RESTORE(); if (error) printf("%s: setlladdr failed on %s\n", __func__, ifp->if_xname); head = SLIST_NEXT(llq, llq_entries); free(llq, M_DEVBUF); } } static int lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) { struct lagg_softc *sc_ptr; struct lagg_port *lp, *tlp; int error = 0; LAGG_WLOCK_ASSERT(sc); /* Limit the maximal number of lagg ports */ if (sc->sc_count >= LAGG_MAX_PORTS) return (ENOSPC); /* Check if port has already been associated to a lagg */ if (ifp->if_lagg != NULL) { /* Port is already in the current lagg? */ lp = (struct lagg_port *)ifp->if_lagg; if (lp->lp_softc == sc) return (EEXIST); return (EBUSY); } /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); #ifdef INET6 /* * The member interface should not have inet6 address because * two interfaces with a valid link-local scope zone must not be * merged in any form. This restriction is needed to * prevent violation of link-local scope zone. Attempts to * add a member interface which has inet6 addresses triggers * removal of all inet6 addresses on the member interface. */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (in6ifa_llaonifp(lp->lp_ifp)) { in6_ifdetach(lp->lp_ifp); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", lp->lp_ifp->if_xname); } } if (in6ifa_llaonifp(ifp)) { in6_ifdetach(ifp); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " "IPv6 address scope violation.\n", ifp->if_xname); } #endif /* Allow the first Ethernet member to define the MTU */ if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_ifp->if_mtu = ifp->if_mtu; else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { if_printf(sc->sc_ifp, "invalid MTU for %s\n", ifp->if_xname); return (EINVAL); } if ((lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return (ENOMEM); /* Check if port is a stacked lagg */ mtx_lock(&lagg_list_mtx); SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { mtx_unlock(&lagg_list_mtx); free(lp, M_DEVBUF); return (EINVAL); /* XXX disable stacking for the moment, its untested */ #ifdef LAGG_PORT_STACKING lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { mtx_unlock(&lagg_list_mtx); free(lp, M_DEVBUF); return (E2BIG); } #endif } } mtx_unlock(&lagg_list_mtx); /* Change the interface type */ lp->lp_iftype = ifp->if_type; ifp->if_type = IFT_IEEE8023ADLAG; ifp->if_lagg = lp; lp->lp_ioctl = ifp->if_ioctl; ifp->if_ioctl = lagg_port_ioctl; lp->lp_output = ifp->if_output; ifp->if_output = lagg_port_output; lp->lp_ifp = ifp; lp->lp_softc = sc; /* Save port link layer address */ bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); if (SLIST_EMPTY(&sc->sc_ports)) { sc->sc_primary = lp; lagg_lladdr(sc, IF_LLADDR(ifp)); } else { /* Update link layer address for this port */ lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp)); } /* Insert into the list of ports. Keep ports sorted by if_index. */ SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) { if (tlp->lp_ifp->if_index < ifp->if_index && ( SLIST_NEXT(tlp, lp_entries) == NULL || SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index < ifp->if_index)) break; } if (tlp != NULL) SLIST_INSERT_AFTER(tlp, lp, lp_entries); else SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); sc->sc_count++; /* Update lagg capabilities */ lagg_capabilities(sc); lagg_linkstate(sc); /* Add multicast addresses and interface flags to this port */ lagg_ether_cmdmulti(lp, 1); lagg_setflags(lp, 1); if (sc->sc_port_create != NULL) error = (*sc->sc_port_create)(lp); if (error) { /* remove the port again, without calling sc_port_destroy */ lagg_port_destroy(lp, 0); return (error); } return (error); } #ifdef LAGG_PORT_STACKING static int lagg_port_checkstacking(struct lagg_softc *sc) { struct lagg_softc *sc_ptr; struct lagg_port *lp; int m = 0; LAGG_WLOCK_ASSERT(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_flags & LAGG_PORT_STACK) { sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; m = MAX(m, lagg_port_checkstacking(sc_ptr)); } } return (m + 1); } #endif static int lagg_port_destroy(struct lagg_port *lp, int runpd) { struct lagg_softc *sc = lp->lp_softc; struct lagg_port *lp_ptr; struct lagg_llq *llq; struct ifnet *ifp = lp->lp_ifp; LAGG_WLOCK_ASSERT(sc); if (runpd && sc->sc_port_destroy != NULL) (*sc->sc_port_destroy)(lp); /* * Remove multicast addresses and interface flags from this port and * reset the MAC address, skip if the interface is being detached. */ if (!lp->lp_detaching) { lagg_ether_cmdmulti(lp, 0); lagg_setflags(lp, 0); lagg_port_lladdr(lp, lp->lp_lladdr); } /* Restore interface */ ifp->if_type = lp->lp_iftype; ifp->if_ioctl = lp->lp_ioctl; ifp->if_output = lp->lp_output; ifp->if_lagg = NULL; /* Finally, remove the port from the lagg */ SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); sc->sc_count--; /* Update the primary interface */ if (lp == sc->sc_primary) { uint8_t lladdr[ETHER_ADDR_LEN]; if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) { bzero(&lladdr, ETHER_ADDR_LEN); } else { bcopy(lp_ptr->lp_lladdr, lladdr, ETHER_ADDR_LEN); } lagg_lladdr(sc, lladdr); sc->sc_primary = lp_ptr; /* Update link layer address for each port */ SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) lagg_port_lladdr(lp_ptr, lladdr); } /* Remove any pending lladdr changes from the queue */ if (lp->lp_detaching) { SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { if (llq->llq_ifp == ifp) { SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq, llq_entries); free(llq, M_DEVBUF); break; /* Only appears once */ } } } if (lp->lp_ifflags) if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); free(lp, M_DEVBUF); /* Update lagg capabilities */ lagg_capabilities(sc); lagg_linkstate(sc); return (0); } static int lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_reqport *rp = (struct lagg_reqport *)data; struct lagg_softc *sc; struct lagg_port *lp = NULL; int error = 0; /* Should be checked by the caller */ if (ifp->if_type != IFT_IEEE8023ADLAG || (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) goto fallback; switch (cmd) { case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || ifunit(rp->rp_portname) != ifp) { error = EINVAL; break; } LAGG_RLOCK(sc); if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_RUNLOCK(sc); break; } lagg_port2req(lp, rp); LAGG_RUNLOCK(sc); break; case SIOCSIFCAP: if (lp->lp_ioctl == NULL) { error = EINVAL; break; } error = (*lp->lp_ioctl)(ifp, cmd, data); if (error) break; /* Update lagg interface capabilities */ LAGG_WLOCK(sc); lagg_capabilities(sc); LAGG_WUNLOCK(sc); break; case SIOCSIFMTU: /* Do not allow the MTU to be changed once joined */ error = EINVAL; break; default: goto fallback; } return (error); fallback: if (lp->lp_ioctl != NULL) return ((*lp->lp_ioctl)(ifp, cmd, data)); return (EINVAL); } /* * For direct output to child ports. */ static int lagg_port_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route *ro) { struct lagg_port *lp = ifp->if_lagg; switch (dst->sa_family) { case pseudo_AF_HDRCMPLT: case AF_UNSPEC: return ((*lp->lp_output)(ifp, m, dst, ro)); } /* drop any other frames */ m_freem(m); return (ENETDOWN); } static void lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) { struct lagg_port *lp; struct lagg_softc *sc; if ((lp = ifp->if_lagg) == NULL) return; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; sc = lp->lp_softc; LAGG_WLOCK(sc); lp->lp_detaching = 1; lagg_port_destroy(lp, 1); LAGG_WUNLOCK(sc); } static void lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) { struct lagg_softc *sc = lp->lp_softc; strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); rp->rp_prio = lp->lp_prio; rp->rp_flags = lp->lp_flags; if (sc->sc_portreq != NULL) (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc); /* Add protocol specific flags */ switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: if (lp == sc->sc_primary) rp->rp_flags |= LAGG_PORT_MASTER; if (lp == lagg_link_active(sc, sc->sc_primary)) rp->rp_flags |= LAGG_PORT_ACTIVE; break; case LAGG_PROTO_ROUNDROBIN: case LAGG_PROTO_LOADBALANCE: case LAGG_PROTO_ETHERCHANNEL: if (LAGG_PORTACTIVE(lp)) rp->rp_flags |= LAGG_PORT_ACTIVE; break; case LAGG_PROTO_LACP: /* LACP has a different definition of active */ if (lacp_isactive(lp)) rp->rp_flags |= LAGG_PORT_ACTIVE; if (lacp_iscollecting(lp)) rp->rp_flags |= LAGG_PORT_COLLECTING; if (lacp_isdistributing(lp)) rp->rp_flags |= LAGG_PORT_DISTRIBUTING; break; } } static void lagg_init(void *xsc) { struct lagg_softc *sc = (struct lagg_softc *)xsc; struct lagg_port *lp; struct ifnet *ifp = sc->sc_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; LAGG_WLOCK(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update the port lladdrs */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_port_lladdr(lp, IF_LLADDR(ifp)); if (sc->sc_init != NULL) (*sc->sc_init)(sc); LAGG_WUNLOCK(sc); } static void lagg_stop(struct lagg_softc *sc) { struct ifnet *ifp = sc->sc_ifp; LAGG_WLOCK_ASSERT(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->sc_stop != NULL) (*sc->sc_stop)(sc); } static int lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; struct ifnet *tpif; struct thread *td = curthread; char *buf, *outbuf; int count, buflen, len, error = 0; bzero(&rpbuf, sizeof(rpbuf)); switch (cmd) { case SIOCGLAGG: LAGG_RLOCK(sc); count = 0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) count++; buflen = count * sizeof(struct lagg_reqport); LAGG_RUNLOCK(sc); outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); LAGG_RLOCK(sc); ra->ra_proto = sc->sc_proto; if (sc->sc_req != NULL) (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc); count = 0; buf = outbuf; len = min(ra->ra_size, buflen); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (len < sizeof(rpbuf)) break; lagg_port2req(lp, &rpbuf); memcpy(buf, &rpbuf, sizeof(rpbuf)); count++; buf += sizeof(rpbuf); len -= sizeof(rpbuf); } LAGG_RUNLOCK(sc); ra->ra_ports = count; ra->ra_size = count * sizeof(rpbuf); error = copyout(outbuf, ra->ra_port, ra->ra_size); free(outbuf, M_TEMP); break; case SIOCSLAGG: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (ra->ra_proto >= LAGG_PROTO_MAX) { error = EPROTONOSUPPORT; break; } LAGG_WLOCK(sc); if (sc->sc_proto != LAGG_PROTO_NONE) { /* Reset protocol first in case detach unlocks */ sc->sc_proto = LAGG_PROTO_NONE; error = sc->sc_detach(sc); sc->sc_detach = NULL; sc->sc_start = NULL; sc->sc_input = NULL; sc->sc_port_create = NULL; sc->sc_port_destroy = NULL; sc->sc_linkstate = NULL; sc->sc_init = NULL; sc->sc_stop = NULL; sc->sc_lladdr = NULL; sc->sc_req = NULL; sc->sc_portreq = NULL; } else if (sc->sc_input != NULL) { /* Still detaching */ error = EBUSY; } if (error != 0) { LAGG_WUNLOCK(sc); break; } for (int i = 0; i < (sizeof(lagg_protos) / sizeof(lagg_protos[0])); i++) { if (lagg_protos[i].ti_proto == ra->ra_proto) { if (sc->sc_ifflags & IFF_DEBUG) printf("%s: using proto %u\n", sc->sc_ifname, lagg_protos[i].ti_proto); sc->sc_proto = lagg_protos[i].ti_proto; if (sc->sc_proto != LAGG_PROTO_NONE) error = lagg_protos[i].ti_attach(sc); LAGG_WUNLOCK(sc); return (error); } } LAGG_WUNLOCK(sc); error = EPROTONOSUPPORT; break; case SIOCGLAGGFLAGS: rf->rf_flags = sc->sc_flags; break; case SIOCSLAGGHASH: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) { error = EINVAL; break; } LAGG_WLOCK(sc); sc->sc_flags &= ~LAGG_F_HASHMASK; sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK; LAGG_WUNLOCK(sc); break; case SIOCGLAGGPORT: if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_RLOCK(sc); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_RUNLOCK(sc); break; } lagg_port2req(lp, rp); LAGG_RUNLOCK(sc); break; case SIOCSLAGGPORT: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_WLOCK(sc); error = lagg_port_create(sc, tpif); LAGG_WUNLOCK(sc); break; case SIOCSLAGGDELPORT: error = priv_check(td, PRIV_NET_LAGG); if (error) break; if (rp->rp_portname[0] == '\0' || (tpif = ifunit(rp->rp_portname)) == NULL) { error = EINVAL; break; } LAGG_WLOCK(sc); if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || lp->lp_softc != sc) { error = ENOENT; LAGG_WUNLOCK(sc); break; } error = lagg_port_destroy(lp, 1); LAGG_WUNLOCK(sc); break; case SIOCSIFFLAGS: /* Set flags on ports too */ LAGG_WLOCK(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { lagg_setflags(lp, 1); } LAGG_WUNLOCK(sc); if (!(ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked down and it is running, * then stop and disable it. */ LAGG_WLOCK(sc); lagg_stop(sc); LAGG_WUNLOCK(sc); } else if ((ifp->if_flags & IFF_UP) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* * If interface is marked up and it is stopped, then * start it. */ (*ifp->if_init)(sc); } break; case SIOCADDMULTI: case SIOCDELMULTI: LAGG_WLOCK(sc); error = lagg_ether_setmulti(sc); LAGG_WUNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCSIFCAP: case SIOCSIFMTU: /* Do not allow the MTU or caps to be directly changed */ error = EINVAL; break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static int lagg_ether_setmulti(struct lagg_softc *sc) { struct lagg_port *lp; LAGG_WLOCK_ASSERT(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { /* First, remove any existing filter entries. */ lagg_ether_cmdmulti(lp, 0); /* copy all addresses from the lagg interface to the port */ lagg_ether_cmdmulti(lp, 1); } return (0); } static int lagg_ether_cmdmulti(struct lagg_port *lp, int set) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *ifp = lp->lp_ifp; struct ifnet *scifp = sc->sc_ifp; struct lagg_mc *mc; struct ifmultiaddr *ifma, *rifma = NULL; struct sockaddr_dl sdl; int error; LAGG_WLOCK_ASSERT(sc); bzero((char *)&sdl, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_type = IFT_ETHER; sdl.sdl_alen = ETHER_ADDR_LEN; sdl.sdl_index = ifp->if_index; if (set) { TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), LLADDR(&sdl), ETHER_ADDR_LEN); error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); if (error) return (error); mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT); if (mc == NULL) return (ENOMEM); mc->mc_ifma = rifma; SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); } } else { while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); if_delmulti_ifma(mc->mc_ifma); free(mc, M_DEVBUF); } } return (0); } /* Handle a ref counted flag that should be set on the lagg port as well */ static int lagg_setflag(struct lagg_port *lp, int flag, int status, int (*func)(struct ifnet *, int)) { struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; struct ifnet *ifp = lp->lp_ifp; int error; LAGG_WLOCK_ASSERT(sc); status = status ? (scifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded ports status is different from what * we want it to be. If it is, flip it. We record ports * status in lp_ifflags so that we won't clear ports flag * we haven't set. In fact, we don't clear or set ports * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual ports flags. */ if (status != (lp->lp_ifflags & flag)) { error = (*func)(ifp, status); if (error) return (error); lp->lp_ifflags &= ~flag; lp->lp_ifflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the lagg port * if "status" is true, update ports flags respective to the lagg * if "status" is false, forcedly clear the flags set on port. */ static int lagg_setflags(struct lagg_port *lp, int status) { int error, i; for (i = 0; lagg_pflags[i].flag; i++) { error = lagg_setflag(lp, lagg_pflags[i].flag, status, lagg_pflags[i].func); if (error) return (error); } return (0); } static int lagg_transmit(struct ifnet *ifp, struct mbuf *m) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error, len, mcast; len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; LAGG_RLOCK(sc); /* We need a Tx algorithm and at least one port */ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { LAGG_RUNLOCK(sc); m_freem(m); ifp->if_oerrors++; return (ENXIO); } ETHER_BPF_MTAP(ifp, m); error = (*sc->sc_start)(sc, m); LAGG_RUNLOCK(sc); if (error == 0) { ifp->if_opackets++; ifp->if_omcasts += mcast; ifp->if_obytes += len; } else ifp->if_oerrors++; return (error); } /* * The ifp->if_qflush entry point for lagg(4) is no-op. */ static void lagg_qflush(struct ifnet *ifp __unused) { } static struct mbuf * lagg_input(struct ifnet *ifp, struct mbuf *m) { struct lagg_port *lp = ifp->if_lagg; struct lagg_softc *sc = lp->lp_softc; struct ifnet *scifp = sc->sc_ifp; LAGG_RLOCK(sc); if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (lp->lp_flags & LAGG_PORT_DISABLED) || sc->sc_proto == LAGG_PROTO_NONE) { LAGG_RUNLOCK(sc); m_freem(m); return (NULL); } ETHER_BPF_MTAP(scifp, m); m = (*sc->sc_input)(sc, lp, m); if (m != NULL) { scifp->if_ipackets++; scifp->if_ibytes += m->m_pkthdr.len; if (scifp->if_flags & IFF_MONITOR) { m_freem(m); m = NULL; } } LAGG_RUNLOCK(sc); return (m); } static int lagg_media_change(struct ifnet *ifp) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; if (sc->sc_ifflags & IFF_DEBUG) printf("%s\n", __func__); /* Ignore */ return (0); } static void lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_port *lp; imr->ifm_status = IFM_AVALID; imr->ifm_active = IFM_ETHER | IFM_AUTO; LAGG_RLOCK(sc); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp)) imr->ifm_status |= IFM_ACTIVE; } LAGG_RUNLOCK(sc); } static void lagg_linkstate(struct lagg_softc *sc) { struct lagg_port *lp; int new_link = LINK_STATE_DOWN; uint64_t speed; /* Our link is considered up if at least one of our ports is active */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (lp->lp_link_state == LINK_STATE_UP) { new_link = LINK_STATE_UP; break; } } if_link_state_change(sc->sc_ifp, new_link); /* Update if_baudrate to reflect the max possible speed */ switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ? sc->sc_primary->lp_ifp->if_baudrate : 0; break; case LAGG_PROTO_ROUNDROBIN: case LAGG_PROTO_LOADBALANCE: case LAGG_PROTO_ETHERCHANNEL: speed = 0; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) speed += lp->lp_ifp->if_baudrate; sc->sc_ifp->if_baudrate = speed; break; case LAGG_PROTO_LACP: /* LACP updates if_baudrate itself */ break; } } static void lagg_port_state(struct ifnet *ifp, int state) { struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; struct lagg_softc *sc = NULL; if (lp != NULL) sc = lp->lp_softc; if (sc == NULL) return; LAGG_WLOCK(sc); lagg_linkstate(sc); if (sc->sc_linkstate != NULL) (*sc->sc_linkstate)(lp); LAGG_WUNLOCK(sc); } struct lagg_port * lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_port *lp_next, *rval = NULL; // int new_link = LINK_STATE_DOWN; LAGG_RLOCK_ASSERT(sc); /* * Search a port which reports an active link state. */ if (lp == NULL) goto search; if (LAGG_PORTACTIVE(lp)) { rval = lp; goto found; } if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL && LAGG_PORTACTIVE(lp_next)) { rval = lp_next; goto found; } search: SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp_next)) { rval = lp_next; goto found; } } found: if (rval != NULL) { /* * The IEEE 802.1D standard assumes that a lagg with * multiple ports is always full duplex. This is valid * for load sharing laggs and if at least two links * are active. Unfortunately, checking the latter would * be too expensive at this point. XXX if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) && (sc->sc_count > 1)) new_link = LINK_STATE_FULL_DUPLEX; else new_link = rval->lp_link_state; */ } return (rval); } static const void * lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) { if (m->m_pkthdr.len < (off + len)) { return (NULL); } else if (m->m_len < (off + len)) { m_copydata(m, off, len, buf); return (buf); } return (mtod(m, char *) + off); } uint32_t lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key) { uint16_t etype; uint32_t p = key; int off; struct ether_header *eh; const struct ether_vlan_header *vlan; #ifdef INET const struct ip *ip; const uint32_t *ports; int iphlen; #endif #ifdef INET6 const struct ip6_hdr *ip6; uint32_t flow; #endif union { #ifdef INET struct ip ip; #endif #ifdef INET6 struct ip6_hdr ip6; #endif struct ether_vlan_header vlan; uint32_t port; } buf; off = sizeof(*eh); if (m->m_len < off) goto out; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if (sc->sc_flags & LAGG_F_HASHL2) { p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p); p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); } /* Special handling for encapsulating VLAN frames */ if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) { p = hash32_buf(&m->m_pkthdr.ether_vtag, sizeof(m->m_pkthdr.ether_vtag), p); } else if (etype == ETHERTYPE_VLAN) { vlan = lagg_gethdr(m, off, sizeof(*vlan), &buf); if (vlan == NULL) goto out; if (sc->sc_flags & LAGG_F_HASHL2) p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); etype = ntohs(vlan->evl_proto); off += sizeof(*vlan) - sizeof(*eh); } switch (etype) { #ifdef INET case ETHERTYPE_IP: ip = lagg_gethdr(m, off, sizeof(*ip), &buf); if (ip == NULL) goto out; if (sc->sc_flags & LAGG_F_HASHL3) { p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); } if (!(sc->sc_flags & LAGG_F_HASHL4)) break; switch (ip->ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: iphlen = ip->ip_hl << 2; if (iphlen < sizeof(*ip)) break; off += iphlen; ports = lagg_gethdr(m, off, sizeof(*ports), &buf); if (ports == NULL) break; p = hash32_buf(ports, sizeof(*ports), p); break; } break; #endif #ifdef INET6 case ETHERTYPE_IPV6: if (!(sc->sc_flags & LAGG_F_HASHL3)) break; ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf); if (ip6 == NULL) goto out; p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK; p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ break; #endif } out: return (p); } int lagg_enqueue(struct ifnet *ifp, struct mbuf *m) { return (ifp->if_transmit)(ifp, m); } /* * Simple round robin aggregation */ static int lagg_rr_attach(struct lagg_softc *sc) { sc->sc_detach = lagg_rr_detach; sc->sc_start = lagg_rr_start; sc->sc_input = lagg_rr_input; sc->sc_port_create = NULL; sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; sc->sc_seq = 0; return (0); } static int lagg_rr_detach(struct lagg_softc *sc) { return (0); } static int lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; uint32_t p; p = atomic_fetchadd_32(&sc->sc_seq, 1); p %= sc->sc_count; lp = SLIST_FIRST(&sc->sc_ports); while (p--) lp = SLIST_NEXT(lp, lp_entries); /* * Check the port's link state. This will return the next active * port if the link is down or the port is NULL. */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * Active failover */ static int lagg_fail_attach(struct lagg_softc *sc) { sc->sc_detach = lagg_fail_detach; sc->sc_start = lagg_fail_start; sc->sc_input = lagg_fail_input; sc->sc_port_create = NULL; sc->sc_port_destroy = NULL; return (0); } static int lagg_fail_detach(struct lagg_softc *sc) { return (0); } static int lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; /* Use the master port if active or the next available port */ if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; struct lagg_port *tmp_tp; if (lp == sc->sc_primary || lagg_failover_rx_all) { m->m_pkthdr.rcvif = ifp; return (m); } if (!LAGG_PORTACTIVE(sc->sc_primary)) { tmp_tp = lagg_link_active(sc, sc->sc_primary); /* * If tmp_tp is null, we've recieved a packet when all * our links are down. Weird, but process it anyways. */ if ((tmp_tp == NULL || tmp_tp == lp)) { m->m_pkthdr.rcvif = ifp; return (m); } } m_freem(m); return (NULL); } /* * Loadbalancing */ static int lagg_lb_attach(struct lagg_softc *sc) { struct lagg_port *lp; struct lagg_lb *lb; if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return (ENOMEM); sc->sc_detach = lagg_lb_detach; sc->sc_start = lagg_lb_start; sc->sc_input = lagg_lb_input; sc->sc_port_create = lagg_lb_port_create; sc->sc_port_destroy = lagg_lb_port_destroy; sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; lb->lb_key = arc4random(); sc->sc_psc = (caddr_t)lb; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lagg_lb_port_create(lp); return (0); } static int lagg_lb_detach(struct lagg_softc *sc) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; if (lb != NULL) free(lb, M_DEVBUF); return (0); } static int lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp_next; int i = 0; bzero(&lb->lb_ports, sizeof(lb->lb_ports)); SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (lp_next == lp) continue; if (i >= LAGG_MAX_PORTS) return (EINVAL); if (sc->sc_ifflags & IFF_DEBUG) printf("%s: port %s at index %d\n", sc->sc_ifname, lp_next->lp_ifname, i); lb->lb_ports[i++] = lp_next; } return (0); } static int lagg_lb_port_create(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; return (lagg_lb_porttable(sc, NULL)); } static void lagg_lb_port_destroy(struct lagg_port *lp) { struct lagg_softc *sc = lp->lp_softc; lagg_lb_porttable(sc, lp); } static int lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp = NULL; uint32_t p = 0; if (sc->use_flowid && (m->m_flags & M_FLOWID)) p = m->m_pkthdr.flowid; else p = lagg_hashmbuf(sc, m, lb->lb_key); p %= sc->sc_count; lp = lb->lb_ports[p]; /* * Check the port's link state. This will return the next active * port if the link is down or the port is NULL. */ if ((lp = lagg_link_active(sc, lp)) == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; /* Just pass in the packet to our lagg device */ m->m_pkthdr.rcvif = ifp; return (m); } /* * 802.3ad LACP */ static int lagg_lacp_attach(struct lagg_softc *sc) { struct lagg_port *lp; int error; sc->sc_detach = lagg_lacp_detach; sc->sc_port_create = lacp_port_create; sc->sc_port_destroy = lacp_port_destroy; sc->sc_linkstate = lacp_linkstate; sc->sc_start = lagg_lacp_start; sc->sc_input = lagg_lacp_input; sc->sc_init = lacp_init; sc->sc_stop = lacp_stop; sc->sc_lladdr = lagg_lacp_lladdr; sc->sc_req = lacp_req; sc->sc_portreq = lacp_portreq; error = lacp_attach(sc); if (error) return (error); SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); return (error); } static int lagg_lacp_detach(struct lagg_softc *sc) { struct lagg_port *lp; int error; SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); /* unlocking is safe here */ LAGG_WUNLOCK(sc); error = lacp_detach(sc); LAGG_WLOCK(sc); return (error); } static void lagg_lacp_lladdr(struct lagg_softc *sc) { struct lagg_port *lp; /* purge all the lacp ports */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_destroy(lp); /* add them back in */ SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) lacp_port_create(lp); } static int lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) { struct lagg_port *lp; lp = lacp_select_tx_port(sc, m); if (lp == NULL) { m_freem(m); return (ENETDOWN); } /* Send mbuf */ return (lagg_enqueue(lp->lp_ifp, m)); } static struct mbuf * lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) { struct ifnet *ifp = sc->sc_ifp; struct ether_header *eh; u_short etype; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); /* Tap off LACP control messages */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) { m = lacp_input(lp, m); if (m == NULL) return (NULL); } /* * If the port is not collecting or not in the active aggregator then * free and return. */ if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) { m_freem(m); return (NULL); } m->m_pkthdr.rcvif = ifp; return (m); } Index: stable/9/sys/net =================================================================== --- stable/9/sys/net (revision 273911) +++ stable/9/sys/net (revision 273912) Property changes on: stable/9/sys/net ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/net:r263710,273377-273378,273423,273455 Index: stable/9/sys/net80211/ieee80211_ht.c =================================================================== --- stable/9/sys/net80211/ieee80211_ht.c (revision 273911) +++ stable/9/sys/net80211/ieee80211_ht.c (revision 273912) @@ -1,2782 +1,2782 @@ /*- * Copyright (c) 2007-2008 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef __FreeBSD__ __FBSDID("$FreeBSD$"); #endif /* * IEEE 802.11n protocol support. */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include /* define here, used throughout file */ #define MS(_v, _f) (((_v) & _f) >> _f##_S) #define SM(_v, _f) (((_v) << _f##_S) & _f) const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = { { 13, 14, 27, 30 }, /* MCS 0 */ { 26, 29, 54, 60 }, /* MCS 1 */ { 39, 43, 81, 90 }, /* MCS 2 */ { 52, 58, 108, 120 }, /* MCS 3 */ { 78, 87, 162, 180 }, /* MCS 4 */ { 104, 116, 216, 240 }, /* MCS 5 */ { 117, 130, 243, 270 }, /* MCS 6 */ { 130, 144, 270, 300 }, /* MCS 7 */ { 26, 29, 54, 60 }, /* MCS 8 */ { 52, 58, 108, 120 }, /* MCS 9 */ { 78, 87, 162, 180 }, /* MCS 10 */ { 104, 116, 216, 240 }, /* MCS 11 */ { 156, 173, 324, 360 }, /* MCS 12 */ { 208, 231, 432, 480 }, /* MCS 13 */ { 234, 260, 486, 540 }, /* MCS 14 */ { 260, 289, 540, 600 }, /* MCS 15 */ { 39, 43, 81, 90 }, /* MCS 16 */ { 78, 87, 162, 180 }, /* MCS 17 */ { 117, 130, 243, 270 }, /* MCS 18 */ { 156, 173, 324, 360 }, /* MCS 19 */ { 234, 260, 486, 540 }, /* MCS 20 */ { 312, 347, 648, 720 }, /* MCS 21 */ { 351, 390, 729, 810 }, /* MCS 22 */ { 390, 433, 810, 900 }, /* MCS 23 */ { 52, 58, 108, 120 }, /* MCS 24 */ { 104, 116, 216, 240 }, /* MCS 25 */ { 156, 173, 324, 360 }, /* MCS 26 */ { 208, 231, 432, 480 }, /* MCS 27 */ { 312, 347, 648, 720 }, /* MCS 28 */ { 416, 462, 864, 960 }, /* MCS 29 */ { 468, 520, 972, 1080 }, /* MCS 30 */ { 520, 578, 1080, 1200 }, /* MCS 31 */ { 0, 0, 12, 13 }, /* MCS 32 */ { 78, 87, 162, 180 }, /* MCS 33 */ { 104, 116, 216, 240 }, /* MCS 34 */ { 130, 144, 270, 300 }, /* MCS 35 */ { 117, 130, 243, 270 }, /* MCS 36 */ { 156, 173, 324, 360 }, /* MCS 37 */ { 195, 217, 405, 450 }, /* MCS 38 */ { 104, 116, 216, 240 }, /* MCS 39 */ { 130, 144, 270, 300 }, /* MCS 40 */ { 130, 144, 270, 300 }, /* MCS 41 */ { 156, 173, 324, 360 }, /* MCS 42 */ { 182, 202, 378, 420 }, /* MCS 43 */ { 182, 202, 378, 420 }, /* MCS 44 */ { 208, 231, 432, 480 }, /* MCS 45 */ { 156, 173, 324, 360 }, /* MCS 46 */ { 195, 217, 405, 450 }, /* MCS 47 */ { 195, 217, 405, 450 }, /* MCS 48 */ { 234, 260, 486, 540 }, /* MCS 49 */ { 273, 303, 567, 630 }, /* MCS 50 */ { 273, 303, 567, 630 }, /* MCS 51 */ { 312, 347, 648, 720 }, /* MCS 52 */ { 130, 144, 270, 300 }, /* MCS 53 */ { 156, 173, 324, 360 }, /* MCS 54 */ { 182, 202, 378, 420 }, /* MCS 55 */ { 156, 173, 324, 360 }, /* MCS 56 */ { 182, 202, 378, 420 }, /* MCS 57 */ { 208, 231, 432, 480 }, /* MCS 58 */ { 234, 260, 486, 540 }, /* MCS 59 */ { 208, 231, 432, 480 }, /* MCS 60 */ { 234, 260, 486, 540 }, /* MCS 61 */ { 260, 289, 540, 600 }, /* MCS 62 */ { 260, 289, 540, 600 }, /* MCS 63 */ { 286, 318, 594, 660 }, /* MCS 64 */ { 195, 217, 405, 450 }, /* MCS 65 */ { 234, 260, 486, 540 }, /* MCS 66 */ { 273, 303, 567, 630 }, /* MCS 67 */ { 234, 260, 486, 540 }, /* MCS 68 */ { 273, 303, 567, 630 }, /* MCS 69 */ { 312, 347, 648, 720 }, /* MCS 70 */ { 351, 390, 729, 810 }, /* MCS 71 */ { 312, 347, 648, 720 }, /* MCS 72 */ { 351, 390, 729, 810 }, /* MCS 73 */ { 390, 433, 810, 900 }, /* MCS 74 */ { 390, 433, 810, 900 }, /* MCS 75 */ { 429, 477, 891, 990 }, /* MCS 76 */ }; #ifdef IEEE80211_AMPDU_AGE static int ieee80211_ampdu_age = -1; /* threshold for ampdu reorder q (ms) */ SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I", "AMPDU max reorder age (ms)"); #endif static int ieee80211_recv_bar_ena = 1; SYSCTL_INT(_net_wlan, OID_AUTO, recv_bar, CTLFLAG_RW, &ieee80211_recv_bar_ena, 0, "BAR frame processing (ena/dis)"); static int ieee80211_addba_timeout = -1;/* timeout for ADDBA response */ SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I", "ADDBA request timeout (ms)"); static int ieee80211_addba_backoff = -1;/* backoff after max ADDBA requests */ SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I", "ADDBA request backoff (ms)"); static int ieee80211_addba_maxtries = 3;/* max ADDBA requests before backoff */ -SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLFLAG_RW, &ieee80211_addba_maxtries, 0, "max ADDBA requests sent before backoff"); static int ieee80211_bar_timeout = -1; /* timeout waiting for BAR response */ static int ieee80211_bar_maxtries = 50;/* max BAR requests before DELBA */ static ieee80211_recv_action_func ht_recv_action_ba_addba_request; static ieee80211_recv_action_func ht_recv_action_ba_addba_response; static ieee80211_recv_action_func ht_recv_action_ba_delba; static ieee80211_recv_action_func ht_recv_action_ht_mimopwrsave; static ieee80211_recv_action_func ht_recv_action_ht_txchwidth; static ieee80211_send_action_func ht_send_action_ba_addba; static ieee80211_send_action_func ht_send_action_ba_delba; static ieee80211_send_action_func ht_send_action_ht_txchwidth; static void ieee80211_ht_init(void) { /* * Setup HT parameters that depends on the clock frequency. */ #ifdef IEEE80211_AMPDU_AGE ieee80211_ampdu_age = msecs_to_ticks(500); #endif ieee80211_addba_timeout = msecs_to_ticks(250); ieee80211_addba_backoff = msecs_to_ticks(10*1000); ieee80211_bar_timeout = msecs_to_ticks(250); /* * Register action frame handlers. */ ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_REQUEST, ht_recv_action_ba_addba_request); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_RESPONSE, ht_recv_action_ba_addba_response); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_DELBA, ht_recv_action_ba_delba); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_HT, IEEE80211_ACTION_HT_MIMOPWRSAVE, ht_recv_action_ht_mimopwrsave); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_HT, IEEE80211_ACTION_HT_TXCHWIDTH, ht_recv_action_ht_txchwidth); ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_REQUEST, ht_send_action_ba_addba); ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_RESPONSE, ht_send_action_ba_addba); ieee80211_send_action_register(IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_DELBA, ht_send_action_ba_delba); ieee80211_send_action_register(IEEE80211_ACTION_CAT_HT, IEEE80211_ACTION_HT_TXCHWIDTH, ht_send_action_ht_txchwidth); } SYSINIT(wlan_ht, SI_SUB_DRIVERS, SI_ORDER_FIRST, ieee80211_ht_init, NULL); static int ieee80211_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); static int ieee80211_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout); static int ieee80211_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int code, int baparamset, int batimeout); static void ieee80211_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); static void null_addba_response_timeout(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); static void ieee80211_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status); static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap); static void bar_stop_timer(struct ieee80211_tx_ampdu *tap); static int ampdu_rx_start(struct ieee80211_node *, struct ieee80211_rx_ampdu *, int baparamset, int batimeout, int baseqctl); static void ampdu_rx_stop(struct ieee80211_node *, struct ieee80211_rx_ampdu *); void ieee80211_ht_attach(struct ieee80211com *ic) { /* setup default aggregation policy */ ic->ic_recv_action = ieee80211_recv_action; ic->ic_send_action = ieee80211_send_action; ic->ic_ampdu_enable = ieee80211_ampdu_enable; ic->ic_addba_request = ieee80211_addba_request; ic->ic_addba_response = ieee80211_addba_response; ic->ic_addba_response_timeout = null_addba_response_timeout; ic->ic_addba_stop = ieee80211_addba_stop; ic->ic_bar_response = ieee80211_bar_response; ic->ic_ampdu_rx_start = ampdu_rx_start; ic->ic_ampdu_rx_stop = ampdu_rx_stop; ic->ic_htprotmode = IEEE80211_PROT_RTSCTS; ic->ic_curhtprotmode = IEEE80211_HTINFO_OPMODE_PURE; } void ieee80211_ht_detach(struct ieee80211com *ic) { } void ieee80211_ht_vattach(struct ieee80211vap *vap) { /* driver can override defaults */ vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_8K; vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_NA; vap->iv_ampdu_limit = vap->iv_ampdu_rxmax; vap->iv_amsdu_limit = vap->iv_htcaps & IEEE80211_HTCAP_MAXAMSDU; /* tx aggregation traffic thresholds */ vap->iv_ampdu_mintraffic[WME_AC_BK] = 128; vap->iv_ampdu_mintraffic[WME_AC_BE] = 64; vap->iv_ampdu_mintraffic[WME_AC_VO] = 32; vap->iv_ampdu_mintraffic[WME_AC_VI] = 32; if (vap->iv_htcaps & IEEE80211_HTC_HT) { /* * Device is HT capable; enable all HT-related * facilities by default. * XXX these choices may be too aggressive. */ vap->iv_flags_ht |= IEEE80211_FHT_HT | IEEE80211_FHT_HTCOMPAT ; if (vap->iv_htcaps & IEEE80211_HTCAP_SHORTGI20) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI20; /* XXX infer from channel list? */ if (vap->iv_htcaps & IEEE80211_HTCAP_CHWIDTH40) { vap->iv_flags_ht |= IEEE80211_FHT_USEHT40; if (vap->iv_htcaps & IEEE80211_HTCAP_SHORTGI40) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI40; } /* enable RIFS if capable */ if (vap->iv_htcaps & IEEE80211_HTC_RIFS) vap->iv_flags_ht |= IEEE80211_FHT_RIFS; /* NB: A-MPDU and A-MSDU rx are mandated, these are tx only */ vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_RX; if (vap->iv_htcaps & IEEE80211_HTC_AMPDU) vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_TX; vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_RX; if (vap->iv_htcaps & IEEE80211_HTC_AMSDU) vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_TX; } /* NB: disable default legacy WDS, too many issues right now */ if (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY) vap->iv_flags_ht &= ~IEEE80211_FHT_HT; } void ieee80211_ht_vdetach(struct ieee80211vap *vap) { } static int ht_getrate(struct ieee80211com *ic, int index, enum ieee80211_phymode mode, int ratetype) { int mword, rate; mword = ieee80211_rate2media(ic, index | IEEE80211_RATE_MCS, mode); if (IFM_SUBTYPE(mword) != IFM_IEEE80211_MCS) return (0); switch (ratetype) { case 0: rate = ieee80211_htrates[index].ht20_rate_800ns; break; case 1: rate = ieee80211_htrates[index].ht20_rate_400ns; break; case 2: rate = ieee80211_htrates[index].ht40_rate_800ns; break; default: rate = ieee80211_htrates[index].ht40_rate_400ns; break; } return (rate); } static struct printranges { int minmcs; int maxmcs; int txstream; int ratetype; int htcapflags; } ranges[] = { { 0, 7, 1, 0, 0 }, { 8, 15, 2, 0, 0 }, { 16, 23, 3, 0, 0 }, { 24, 31, 4, 0, 0 }, { 32, 0, 1, 2, IEEE80211_HTC_TXMCS32 }, { 33, 38, 2, 0, IEEE80211_HTC_TXUNEQUAL }, { 39, 52, 3, 0, IEEE80211_HTC_TXUNEQUAL }, { 53, 76, 4, 0, IEEE80211_HTC_TXUNEQUAL }, { 0, 0, 0, 0, 0 }, }; static void ht_rateprint(struct ieee80211com *ic, enum ieee80211_phymode mode, int ratetype) { struct ifnet *ifp = ic->ic_ifp; int minrate, maxrate; struct printranges *range; for (range = ranges; range->txstream != 0; range++) { if (ic->ic_txstream < range->txstream) continue; if (range->htcapflags && (ic->ic_htcaps & range->htcapflags) == 0) continue; if (ratetype < range->ratetype) continue; minrate = ht_getrate(ic, range->minmcs, mode, ratetype); maxrate = ht_getrate(ic, range->maxmcs, mode, ratetype); if (range->maxmcs) { if_printf(ifp, "MCS %d-%d: %d%sMbps - %d%sMbps\n", range->minmcs, range->maxmcs, minrate/2, ((minrate & 0x1) != 0 ? ".5" : ""), maxrate/2, ((maxrate & 0x1) != 0 ? ".5" : "")); } else { if_printf(ifp, "MCS %d: %d%sMbps\n", range->minmcs, minrate/2, ((minrate & 0x1) != 0 ? ".5" : "")); } } } static void ht_announce(struct ieee80211com *ic, enum ieee80211_phymode mode) { struct ifnet *ifp = ic->ic_ifp; const char *modestr = ieee80211_phymode_name[mode]; if_printf(ifp, "%s MCS 20MHz\n", modestr); ht_rateprint(ic, mode, 0); if (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20) { if_printf(ifp, "%s MCS 20MHz SGI\n", modestr); ht_rateprint(ic, mode, 1); } if (ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) { if_printf(ifp, "%s MCS 40MHz:\n", modestr); ht_rateprint(ic, mode, 2); } if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && (ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40)) { if_printf(ifp, "%s MCS 40MHz SGI:\n", modestr); ht_rateprint(ic, mode, 3); } } void ieee80211_ht_announce(struct ieee80211com *ic) { struct ifnet *ifp = ic->ic_ifp; if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA) || isset(ic->ic_modecaps, IEEE80211_MODE_11NG)) if_printf(ifp, "%dT%dR\n", ic->ic_txstream, ic->ic_rxstream); if (isset(ic->ic_modecaps, IEEE80211_MODE_11NA)) ht_announce(ic, IEEE80211_MODE_11NA); if (isset(ic->ic_modecaps, IEEE80211_MODE_11NG)) ht_announce(ic, IEEE80211_MODE_11NG); } static struct ieee80211_htrateset htrateset; const struct ieee80211_htrateset * ieee80211_get_suphtrates(struct ieee80211com *ic, const struct ieee80211_channel *c) { #define ADDRATE(x) do { \ htrateset.rs_rates[htrateset.rs_nrates] = x; \ htrateset.rs_nrates++; \ } while (0) int i; memset(&htrateset, 0, sizeof(struct ieee80211_htrateset)); for (i = 0; i < ic->ic_txstream * 8; i++) ADDRATE(i); if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && (ic->ic_htcaps & IEEE80211_HTC_TXMCS32)) ADDRATE(32); if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) { if (ic->ic_txstream >= 2) { for (i = 33; i <= 38; i++) ADDRATE(i); } if (ic->ic_txstream >= 3) { for (i = 39; i <= 52; i++) ADDRATE(i); } if (ic->ic_txstream == 4) { for (i = 53; i <= 76; i++) ADDRATE(i); } } return &htrateset; #undef ADDRATE } /* * Receive processing. */ /* * Decap the encapsulated A-MSDU frames and dispatch all but * the last for delivery. The last frame is returned for * delivery via the normal path. */ struct mbuf * ieee80211_decap_amsdu(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211vap *vap = ni->ni_vap; int framelen; struct mbuf *n; /* discard 802.3 header inserted by ieee80211_decap */ m_adj(m, sizeof(struct ether_header)); vap->iv_stats.is_amsdu_decap++; for (;;) { /* * Decap the first frame, bust it apart from the * remainder and deliver. We leave the last frame * delivery to the caller (for consistency with other * code paths, could also do it here). */ m = ieee80211_decap1(m, &framelen); if (m == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "a-msdu", "%s", "decap failed"); vap->iv_stats.is_amsdu_tooshort++; return NULL; } if (m->m_pkthdr.len == framelen) break; n = m_split(m, framelen, M_NOWAIT); if (n == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "a-msdu", "%s", "unable to split encapsulated frames"); vap->iv_stats.is_amsdu_split++; m_freem(m); /* NB: must reclaim */ return NULL; } vap->iv_deliver_data(vap, ni, m); /* * Remove frame contents; each intermediate frame * is required to be aligned to a 4-byte boundary. */ m = n; m_adj(m, roundup2(framelen, 4) - framelen); /* padding */ } return m; /* last delivered by caller */ } /* * Purge all frames in the A-MPDU re-order queue. */ static void ampdu_rx_purge(struct ieee80211_rx_ampdu *rap) { struct mbuf *m; int i; for (i = 0; i < rap->rxa_wnd; i++) { m = rap->rxa_m[i]; if (m != NULL) { rap->rxa_m[i] = NULL; rap->rxa_qbytes -= m->m_pkthdr.len; m_freem(m); if (--rap->rxa_qframes == 0) break; } } KASSERT(rap->rxa_qbytes == 0 && rap->rxa_qframes == 0, ("lost %u data, %u frames on ampdu rx q", rap->rxa_qbytes, rap->rxa_qframes)); } /* * Start A-MPDU rx/re-order processing for the specified TID. */ static int ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap, int baparamset, int batimeout, int baseqctl) { int bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); if (rap->rxa_flags & IEEE80211_AGGR_RUNNING) { /* * AMPDU previously setup and not terminated with a DELBA, * flush the reorder q's in case anything remains. */ ampdu_rx_purge(rap); } memset(rap, 0, sizeof(*rap)); rap->rxa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); rap->rxa_start = MS(baseqctl, IEEE80211_BASEQ_START); rap->rxa_flags |= IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND; return 0; } /* * Stop A-MPDU rx processing for the specified TID. */ static void ampdu_rx_stop(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) { ampdu_rx_purge(rap); rap->rxa_flags &= ~(IEEE80211_AGGR_RUNNING | IEEE80211_AGGR_XCHGPEND); } /* * Dispatch a frame from the A-MPDU reorder queue. The * frame is fed back into ieee80211_input marked with an * M_AMPDU_MPDU flag so it doesn't come back to us (it also * permits ieee80211_input to optimize re-processing). */ static __inline void ampdu_dispatch(struct ieee80211_node *ni, struct mbuf *m) { m->m_flags |= M_AMPDU_MPDU; /* bypass normal processing */ /* NB: rssi and noise are ignored w/ M_AMPDU_MPDU set */ (void) ieee80211_input(ni, m, 0, 0); } /* * Dispatch as many frames as possible from the re-order queue. * Frames will always be "at the front"; we process all frames * up to the first empty slot in the window. On completion we * cleanup state if there are still pending frames in the current * BA window. We assume the frame at slot 0 is already handled * by the caller; we always start at slot 1. */ static void ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct mbuf *m; int i; /* flush run of frames */ for (i = 1; i < rap->rxa_wnd; i++) { m = rap->rxa_m[i]; if (m == NULL) break; rap->rxa_m[i] = NULL; rap->rxa_qbytes -= m->m_pkthdr.len; rap->rxa_qframes--; ampdu_dispatch(ni, m); } /* * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. */ if (rap->rxa_qframes != 0) { int n = rap->rxa_qframes, j; for (j = i+1; j < rap->rxa_wnd; j++) { if (rap->rxa_m[j] != NULL) { rap->rxa_m[j-i] = rap->rxa_m[j]; rap->rxa_m[j] = NULL; if (--n == 0) break; } } KASSERT(n == 0, ("lost %d frames", n)); vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes; } /* * Adjust the start of the BA window to * reflect the frames just dispatched. */ rap->rxa_start = IEEE80211_SEQ_ADD(rap->rxa_start, i); vap->iv_stats.is_ampdu_rx_oor += i; } #ifdef IEEE80211_AMPDU_AGE /* * Dispatch all frames in the A-MPDU re-order queue. */ static void ampdu_rx_flush(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) { struct ieee80211vap *vap = ni->ni_vap; struct mbuf *m; int i; for (i = 0; i < rap->rxa_wnd; i++) { m = rap->rxa_m[i]; if (m == NULL) continue; rap->rxa_m[i] = NULL; rap->rxa_qbytes -= m->m_pkthdr.len; rap->rxa_qframes--; vap->iv_stats.is_ampdu_rx_oor++; ampdu_dispatch(ni, m); if (rap->rxa_qframes == 0) break; } } #endif /* IEEE80211_AMPDU_AGE */ /* * Dispatch all frames in the A-MPDU re-order queue * preceding the specified sequence number. This logic * handles window moves due to a received MSDU or BAR. */ static void ampdu_rx_flush_upto(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap, ieee80211_seq winstart) { struct ieee80211vap *vap = ni->ni_vap; struct mbuf *m; ieee80211_seq seqno; int i; /* * Flush any complete MSDU's with a sequence number lower * than winstart. Gaps may exist. Note that we may actually * dispatch frames past winstart if a run continues; this is * an optimization that avoids having to do a separate pass * to dispatch frames after moving the BA window start. */ seqno = rap->rxa_start; for (i = 0; i < rap->rxa_wnd; i++) { m = rap->rxa_m[i]; if (m != NULL) { rap->rxa_m[i] = NULL; rap->rxa_qbytes -= m->m_pkthdr.len; rap->rxa_qframes--; vap->iv_stats.is_ampdu_rx_oor++; ampdu_dispatch(ni, m); } else { if (!IEEE80211_SEQ_BA_BEFORE(seqno, winstart)) break; } seqno = IEEE80211_SEQ_INC(seqno); } /* * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. */ if (rap->rxa_qframes != 0) { int n = rap->rxa_qframes, j; /* NB: this loop assumes i > 0 and/or rxa_m[0] is NULL */ KASSERT(rap->rxa_m[0] == NULL, ("%s: BA window slot 0 occupied", __func__)); for (j = i+1; j < rap->rxa_wnd; j++) { if (rap->rxa_m[j] != NULL) { rap->rxa_m[j-i] = rap->rxa_m[j]; rap->rxa_m[j] = NULL; if (--n == 0) break; } } KASSERT(n == 0, ("%s: lost %d frames, qframes %d off %d " "BA win <%d:%d> winstart %d", __func__, n, rap->rxa_qframes, i, rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), winstart)); vap->iv_stats.is_ampdu_rx_copy += rap->rxa_qframes; } /* * Move the start of the BA window; we use the * sequence number of the last MSDU that was * passed up the stack+1 or winstart if stopped on * a gap in the reorder buffer. */ rap->rxa_start = seqno; } /* * Process a received QoS data frame for an HT station. Handle * A-MPDU reordering: if this frame is received out of order * and falls within the BA window hold onto it. Otherwise if * this frame completes a run, flush any pending frames. We * return 1 if the frame is consumed. A 0 is returned if * the frame should be processed normally by the caller. */ int ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m) { #define IEEE80211_FC0_QOSDATA \ (IEEE80211_FC0_TYPE_DATA|IEEE80211_FC0_SUBTYPE_QOS|IEEE80211_FC0_VERSION_0) #define PROCESS 0 /* caller should process frame */ #define CONSUMED 1 /* frame consumed, caller does nothing */ struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_qosframe *wh; struct ieee80211_rx_ampdu *rap; ieee80211_seq rxseq; uint8_t tid; int off; KASSERT((m->m_flags & (M_AMPDU | M_AMPDU_MPDU)) == M_AMPDU, ("!a-mpdu or already re-ordered, flags 0x%x", m->m_flags)); KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT sta")); /* NB: m_len known to be sufficient */ wh = mtod(m, struct ieee80211_qosframe *); if (wh->i_fc[0] != IEEE80211_FC0_QOSDATA) { /* * Not QoS data, shouldn't get here but just * return it to the caller for processing. */ return PROCESS; } if (IEEE80211_IS_DSTODS(wh)) tid = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0]; else tid = wh->i_qos[0]; tid &= IEEE80211_QOS_TID; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { /* * No ADDBA request yet, don't touch. */ return PROCESS; } rxseq = le16toh(*(uint16_t *)wh->i_seq); if ((rxseq & IEEE80211_SEQ_FRAG_MASK) != 0) { /* * Fragments are not allowed; toss. */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "A-MPDU", "fragment, rxseq 0x%x tid %u%s", rxseq, tid, wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : ""); vap->iv_stats.is_ampdu_rx_drop++; IEEE80211_NODE_STAT(ni, rx_drop); m_freem(m); return CONSUMED; } rxseq >>= IEEE80211_SEQ_SEQ_SHIFT; rap->rxa_nframes++; again: if (rxseq == rap->rxa_start) { /* * First frame in window. */ if (rap->rxa_qframes != 0) { /* * Dispatch as many packets as we can. */ KASSERT(rap->rxa_m[0] == NULL, ("unexpected dup")); ampdu_dispatch(ni, m); ampdu_rx_dispatch(rap, ni); return CONSUMED; } else { /* * In order; advance window and notify * caller to dispatch directly. */ rap->rxa_start = IEEE80211_SEQ_INC(rxseq); return PROCESS; } } /* * Frame is out of order; store if in the BA window. */ /* calculate offset in BA window */ off = IEEE80211_SEQ_SUB(rxseq, rap->rxa_start); if (off < rap->rxa_wnd) { /* * Common case (hopefully): in the BA window. * Sec 9.10.7.6.2 a) (p.137) */ #ifdef IEEE80211_AMPDU_AGE /* * Check for frames sitting too long in the reorder queue. * This should only ever happen if frames are not delivered * without the sender otherwise notifying us (e.g. with a * BAR to move the window). Typically this happens because * of vendor bugs that cause the sequence number to jump. * When this happens we get a gap in the reorder queue that * leaves frame sitting on the queue until they get pushed * out due to window moves. When the vendor does not send * BAR this move only happens due to explicit packet sends * * NB: we only track the time of the oldest frame in the * reorder q; this means that if we flush we might push * frames that still "new"; if this happens then subsequent * frames will result in BA window moves which cost something * but is still better than a big throughput dip. */ if (rap->rxa_qframes != 0) { /* XXX honor batimeout? */ if (ticks - rap->rxa_age > ieee80211_ampdu_age) { /* * Too long since we received the first * frame; flush the reorder buffer. */ if (rap->rxa_qframes != 0) { vap->iv_stats.is_ampdu_rx_age += rap->rxa_qframes; ampdu_rx_flush(ni, rap); } rap->rxa_start = IEEE80211_SEQ_INC(rxseq); return PROCESS; } } else { /* * First frame, start aging timer. */ rap->rxa_age = ticks; } #endif /* IEEE80211_AMPDU_AGE */ /* save packet */ if (rap->rxa_m[off] == NULL) { rap->rxa_m[off] = m; rap->rxa_qframes++; rap->rxa_qbytes += m->m_pkthdr.len; vap->iv_stats.is_ampdu_rx_reorder++; } else { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "a-mpdu duplicate", "seqno %u tid %u BA win <%u:%u>", rxseq, tid, rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1)); vap->iv_stats.is_rx_dup++; IEEE80211_NODE_STAT(ni, rx_dup); m_freem(m); } return CONSUMED; } if (off < IEEE80211_SEQ_BA_RANGE) { /* * Outside the BA window, but within range; * flush the reorder q and move the window. * Sec 9.10.7.6.2 b) (p.138) */ IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "move BA win <%u:%u> (%u frames) rxseq %u tid %u", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid); vap->iv_stats.is_ampdu_rx_move++; /* * The spec says to flush frames up to but not including: * WinStart_B = rxseq - rap->rxa_wnd + 1 * Then insert the frame or notify the caller to process * it immediately. We can safely do this by just starting * over again because we know the frame will now be within * the BA window. */ /* NB: rxa_wnd known to be >0 */ ampdu_rx_flush_upto(ni, rap, IEEE80211_SEQ_SUB(rxseq, rap->rxa_wnd-1)); goto again; } else { /* * Outside the BA window and out of range; toss. * Sec 9.10.7.6.2 c) (p.138) */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "MPDU", "BA win <%u:%u> (%u frames) rxseq %u tid %u%s", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid, wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : ""); vap->iv_stats.is_ampdu_rx_drop++; IEEE80211_NODE_STAT(ni, rx_drop); m_freem(m); return CONSUMED; } #undef CONSUMED #undef PROCESS #undef IEEE80211_FC0_QOSDATA } /* * Process a BAR ctl frame. Dispatch all frames up to * the sequence number of the frame. If this frame is * out of range it's discarded. */ void ieee80211_recv_bar(struct ieee80211_node *ni, struct mbuf *m0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_frame_bar *wh; struct ieee80211_rx_ampdu *rap; ieee80211_seq rxseq; int tid, off; if (!ieee80211_recv_bar_ena) { #if 0 IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_11N, ni->ni_macaddr, "BAR", "%s", "processing disabled"); #endif vap->iv_stats.is_ampdu_bar_bad++; return; } wh = mtod(m0, struct ieee80211_frame_bar *); /* XXX check basic BAR */ tid = MS(le16toh(wh->i_ctl), IEEE80211_BAR_TID); rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { /* * No ADDBA request yet, don't touch. */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "BAR", "no BA stream, tid %u", tid); vap->iv_stats.is_ampdu_bar_bad++; return; } vap->iv_stats.is_ampdu_bar_rx++; rxseq = le16toh(wh->i_seq) >> IEEE80211_SEQ_SEQ_SHIFT; if (rxseq == rap->rxa_start) return; /* calculate offset in BA window */ off = IEEE80211_SEQ_SUB(rxseq, rap->rxa_start); if (off < IEEE80211_SEQ_BA_RANGE) { /* * Flush the reorder q up to rxseq and move the window. * Sec 9.10.7.6.3 a) (p.138) */ IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "BAR moves BA win <%u:%u> (%u frames) rxseq %u tid %u", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid); vap->iv_stats.is_ampdu_bar_move++; ampdu_rx_flush_upto(ni, rap, rxseq); if (off >= rap->rxa_wnd) { /* * BAR specifies a window start to the right of BA * window; we must move it explicitly since * ampdu_rx_flush_upto will not. */ rap->rxa_start = rxseq; } } else { /* * Out of range; toss. * Sec 9.10.7.6.3 b) (p.138) */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, ni->ni_macaddr, "BAR", "BA win <%u:%u> (%u frames) rxseq %u tid %u%s", rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), rap->rxa_qframes, rxseq, tid, wh->i_fc[1] & IEEE80211_FC1_RETRY ? " (retransmit)" : ""); vap->iv_stats.is_ampdu_bar_oow++; IEEE80211_NODE_STAT(ni, rx_drop); } } /* * Setup HT-specific state in a node. Called only * when HT use is negotiated so we don't do extra * work for temporary and/or legacy sta's. */ void ieee80211_ht_node_init(struct ieee80211_node *ni) { struct ieee80211_tx_ampdu *tap; int ac; if (ni->ni_flags & IEEE80211_NODE_HT) { /* * Clean AMPDU state on re-associate. This handles the case * where a station leaves w/o notifying us and then returns * before node is reaped for inactivity. */ ieee80211_ht_node_cleanup(ni); } for (ac = 0; ac < WME_NUM_AC; ac++) { tap = &ni->ni_tx_ampdu[ac]; tap->txa_ac = ac; tap->txa_ni = ni; /* NB: further initialization deferred */ } ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU; } /* * Cleanup HT-specific state in a node. Called only * when HT use has been marked. */ void ieee80211_ht_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; int i; KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT node")); /* XXX optimize this */ for (i = 0; i < WME_NUM_AC; i++) { struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[i]; if (tap->txa_flags & IEEE80211_AGGR_SETUP) ampdu_tx_stop(tap); } for (i = 0; i < WME_NUM_TID; i++) ic->ic_ampdu_rx_stop(ni, &ni->ni_rx_ampdu[i]); ni->ni_htcap = 0; ni->ni_flags &= ~IEEE80211_NODE_HT_ALL; } /* * Age out HT resources for a station. */ void ieee80211_ht_node_age(struct ieee80211_node *ni) { #ifdef IEEE80211_AMPDU_AGE struct ieee80211vap *vap = ni->ni_vap; uint8_t tid; #endif KASSERT(ni->ni_flags & IEEE80211_NODE_HT, ("not an HT sta")); #ifdef IEEE80211_AMPDU_AGE for (tid = 0; tid < WME_NUM_TID; tid++) { struct ieee80211_rx_ampdu *rap; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) continue; if (rap->rxa_qframes == 0) continue; /* * Check for frames sitting too long in the reorder queue. * See above for more details on what's happening here. */ /* XXX honor batimeout? */ if (ticks - rap->rxa_age > ieee80211_ampdu_age) { /* * Too long since we received the first * frame; flush the reorder buffer. */ vap->iv_stats.is_ampdu_rx_age += rap->rxa_qframes; ampdu_rx_flush(ni, rap); } } #endif /* IEEE80211_AMPDU_AGE */ } static struct ieee80211_channel * findhtchan(struct ieee80211com *ic, struct ieee80211_channel *c, int htflags) { return ieee80211_find_channel(ic, c->ic_freq, (c->ic_flags &~ IEEE80211_CHAN_HT) | htflags); } /* * Adjust a channel to be HT/non-HT according to the vap's configuration. */ struct ieee80211_channel * ieee80211_ht_adjust_channel(struct ieee80211com *ic, struct ieee80211_channel *chan, int flags) { struct ieee80211_channel *c; if (flags & IEEE80211_FHT_HT) { /* promote to HT if possible */ if (flags & IEEE80211_FHT_USEHT40) { if (!IEEE80211_IS_CHAN_HT40(chan)) { /* NB: arbitrarily pick ht40+ over ht40- */ c = findhtchan(ic, chan, IEEE80211_CHAN_HT40U); if (c == NULL) c = findhtchan(ic, chan, IEEE80211_CHAN_HT40D); if (c == NULL) c = findhtchan(ic, chan, IEEE80211_CHAN_HT20); if (c != NULL) chan = c; } } else if (!IEEE80211_IS_CHAN_HT20(chan)) { c = findhtchan(ic, chan, IEEE80211_CHAN_HT20); if (c != NULL) chan = c; } } else if (IEEE80211_IS_CHAN_HT(chan)) { /* demote to legacy, HT use is disabled */ c = ieee80211_find_channel(ic, chan->ic_freq, chan->ic_flags &~ IEEE80211_CHAN_HT); if (c != NULL) chan = c; } return chan; } /* * Setup HT-specific state for a legacy WDS peer. */ void ieee80211_ht_wds_init(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_tx_ampdu *tap; int ac; KASSERT(vap->iv_flags_ht & IEEE80211_FHT_HT, ("no HT requested")); /* XXX check scan cache in case peer has an ap and we have info */ /* * If setup with a legacy channel; locate an HT channel. * Otherwise if the inherited channel (from a companion * AP) is suitable use it so we use the same location * for the extension channel). */ ni->ni_chan = ieee80211_ht_adjust_channel(ni->ni_ic, ni->ni_chan, ieee80211_htchanflags(ni->ni_chan)); ni->ni_htcap = 0; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI20; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { ni->ni_htcap |= IEEE80211_HTCAP_CHWIDTH40; ni->ni_chw = 40; if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan)) ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan)) ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_BELOW; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) ni->ni_htcap |= IEEE80211_HTCAP_SHORTGI40; } else { ni->ni_chw = 20; ni->ni_ht2ndchan = IEEE80211_HTINFO_2NDCHAN_NONE; } ni->ni_htctlchan = ni->ni_chan->ic_ieee; if (vap->iv_flags_ht & IEEE80211_FHT_RIFS) ni->ni_flags |= IEEE80211_NODE_RIFS; /* XXX does it make sense to enable SMPS? */ ni->ni_htopmode = 0; /* XXX need protection state */ ni->ni_htstbc = 0; /* XXX need info */ for (ac = 0; ac < WME_NUM_AC; ac++) { tap = &ni->ni_tx_ampdu[ac]; tap->txa_ac = ac; } /* NB: AMPDU tx/rx governed by IEEE80211_FHT_AMPDU_{TX,RX} */ ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU; } /* * Notify hostap vaps of a change in the HTINFO ie. */ static void htinfo_notify(struct ieee80211com *ic) { struct ieee80211vap *vap; int first = 1; IEEE80211_LOCK_ASSERT(ic); TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { if (vap->iv_opmode != IEEE80211_M_HOSTAP) continue; if (vap->iv_state != IEEE80211_S_RUN || !IEEE80211_IS_CHAN_HT(vap->iv_bss->ni_chan)) continue; if (first) { IEEE80211_NOTE(vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, vap->iv_bss, "HT bss occupancy change: %d sta, %d ht, " "%d ht40%s, HT protmode now 0x%x" , ic->ic_sta_assoc , ic->ic_ht_sta_assoc , ic->ic_ht40_sta_assoc , (ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR) ? ", non-HT sta present" : "" , ic->ic_curhtprotmode); first = 0; } ieee80211_beacon_notify(vap, IEEE80211_BEACON_HTINFO); } } /* * Calculate HT protection mode from current * state and handle updates. */ static void htinfo_update(struct ieee80211com *ic) { uint8_t protmode; if (ic->ic_sta_assoc != ic->ic_ht_sta_assoc) { protmode = IEEE80211_HTINFO_OPMODE_MIXED | IEEE80211_HTINFO_NONHT_PRESENT; } else if (ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR) { protmode = IEEE80211_HTINFO_OPMODE_PROTOPT | IEEE80211_HTINFO_NONHT_PRESENT; } else if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_HT40(ic->ic_bsschan) && ic->ic_sta_assoc != ic->ic_ht40_sta_assoc) { protmode = IEEE80211_HTINFO_OPMODE_HT20PR; } else { protmode = IEEE80211_HTINFO_OPMODE_PURE; } if (protmode != ic->ic_curhtprotmode) { ic->ic_curhtprotmode = protmode; htinfo_notify(ic); } } /* * Handle an HT station joining a BSS. */ void ieee80211_ht_node_join(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; IEEE80211_LOCK_ASSERT(ic); if (ni->ni_flags & IEEE80211_NODE_HT) { ic->ic_ht_sta_assoc++; if (ni->ni_chw == 40) ic->ic_ht40_sta_assoc++; } htinfo_update(ic); } /* * Handle an HT station leaving a BSS. */ void ieee80211_ht_node_leave(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; IEEE80211_LOCK_ASSERT(ic); if (ni->ni_flags & IEEE80211_NODE_HT) { ic->ic_ht_sta_assoc--; if (ni->ni_chw == 40) ic->ic_ht40_sta_assoc--; } htinfo_update(ic); } /* * Public version of htinfo_update; used for processing * beacon frames from overlapping bss. * * Caller can specify either IEEE80211_HTINFO_OPMODE_MIXED * (on receipt of a beacon that advertises MIXED) or * IEEE80211_HTINFO_OPMODE_PROTOPT (on receipt of a beacon * from an overlapping legacy bss). We treat MIXED with * a higher precedence than PROTOPT (i.e. we will not change * change PROTOPT -> MIXED; only MIXED -> PROTOPT). This * corresponds to how we handle things in htinfo_update. */ void ieee80211_htprot_update(struct ieee80211com *ic, int protmode) { #define OPMODE(x) SM(x, IEEE80211_HTINFO_OPMODE) IEEE80211_LOCK(ic); /* track non-HT station presence */ KASSERT(protmode & IEEE80211_HTINFO_NONHT_PRESENT, ("protmode 0x%x", protmode)); ic->ic_flags_ht |= IEEE80211_FHT_NONHT_PR; ic->ic_lastnonht = ticks; if (protmode != ic->ic_curhtprotmode && (OPMODE(ic->ic_curhtprotmode) != IEEE80211_HTINFO_OPMODE_MIXED || OPMODE(protmode) == IEEE80211_HTINFO_OPMODE_PROTOPT)) { /* push beacon update */ ic->ic_curhtprotmode = protmode; htinfo_notify(ic); } IEEE80211_UNLOCK(ic); #undef OPMODE } /* * Time out presence of an overlapping bss with non-HT * stations. When operating in hostap mode we listen for * beacons from other stations and if we identify a non-HT * station is present we update the opmode field of the * HTINFO ie. To identify when all non-HT stations are * gone we time out this condition. */ void ieee80211_ht_timeout(struct ieee80211com *ic) { IEEE80211_LOCK_ASSERT(ic); if ((ic->ic_flags_ht & IEEE80211_FHT_NONHT_PR) && time_after(ticks, ic->ic_lastnonht + IEEE80211_NONHT_PRESENT_AGE)) { #if 0 IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "%s", "time out non-HT STA present on channel"); #endif ic->ic_flags_ht &= ~IEEE80211_FHT_NONHT_PR; htinfo_update(ic); } } /* unalligned little endian access */ #define LE_READ_2(p) \ ((uint16_t) \ ((((const uint8_t *)(p))[0] ) | \ (((const uint8_t *)(p))[1] << 8))) /* * Process an 802.11n HT capabilities ie. */ void ieee80211_parse_htcap(struct ieee80211_node *ni, const uint8_t *ie) { if (ie[0] == IEEE80211_ELEMID_VENDOR) { /* * Station used Vendor OUI ie to associate; * mark the node so when we respond we'll use * the Vendor OUI's and not the standard ie's. */ ni->ni_flags |= IEEE80211_NODE_HTCOMPAT; ie += 4; } else ni->ni_flags &= ~IEEE80211_NODE_HTCOMPAT; ni->ni_htcap = LE_READ_2(ie + __offsetof(struct ieee80211_ie_htcap, hc_cap)); ni->ni_htparam = ie[__offsetof(struct ieee80211_ie_htcap, hc_param)]; } static void htinfo_parse(struct ieee80211_node *ni, const struct ieee80211_ie_htinfo *htinfo) { uint16_t w; ni->ni_htctlchan = htinfo->hi_ctrlchannel; ni->ni_ht2ndchan = SM(htinfo->hi_byte1, IEEE80211_HTINFO_2NDCHAN); w = LE_READ_2(&htinfo->hi_byte2); ni->ni_htopmode = SM(w, IEEE80211_HTINFO_OPMODE); w = LE_READ_2(&htinfo->hi_byte45); ni->ni_htstbc = SM(w, IEEE80211_HTINFO_BASIC_STBCMCS); } /* * Parse an 802.11n HT info ie and save useful information * to the node state. Note this does not effect any state * changes such as for channel width change. */ void ieee80211_parse_htinfo(struct ieee80211_node *ni, const uint8_t *ie) { if (ie[0] == IEEE80211_ELEMID_VENDOR) ie += 4; htinfo_parse(ni, (const struct ieee80211_ie_htinfo *) ie); } /* * Handle 11n channel switch. Use the received HT ie's to * identify the right channel to use. If we cannot locate it * in the channel table then fallback to legacy operation. * Note that we use this information to identify the node's * channel only; the caller is responsible for insuring any * required channel change is done (e.g. in sta mode when * parsing the contents of a beacon frame). */ static void htinfo_update_chw(struct ieee80211_node *ni, int htflags) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211_channel *c; int chanflags; chanflags = (ni->ni_chan->ic_flags &~ IEEE80211_CHAN_HT) | htflags; if (chanflags != ni->ni_chan->ic_flags) { /* XXX not right for ht40- */ c = ieee80211_find_channel(ic, ni->ni_chan->ic_freq, chanflags); if (c == NULL && (htflags & IEEE80211_CHAN_HT40)) { /* * No HT40 channel entry in our table; fall back * to HT20 operation. This should not happen. */ c = findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT20); #if 0 IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "no HT40 channel (freq %u), falling back to HT20", ni->ni_chan->ic_freq); #endif /* XXX stat */ } if (c != NULL && c != ni->ni_chan) { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ASSOC | IEEE80211_MSG_11N, ni, "switch station to HT%d channel %u/0x%x", IEEE80211_IS_CHAN_HT40(c) ? 40 : 20, c->ic_freq, c->ic_flags); ni->ni_chan = c; } /* NB: caller responsible for forcing any channel change */ } /* update node's tx channel width */ ni->ni_chw = IEEE80211_IS_CHAN_HT40(ni->ni_chan)? 40 : 20; } /* * Update 11n MIMO PS state according to received htcap. */ static __inline int htcap_update_mimo_ps(struct ieee80211_node *ni) { uint16_t oflags = ni->ni_flags; switch (ni->ni_htcap & IEEE80211_HTCAP_SMPS) { case IEEE80211_HTCAP_SMPS_DYNAMIC: ni->ni_flags |= IEEE80211_NODE_MIMO_PS; ni->ni_flags |= IEEE80211_NODE_MIMO_RTS; break; case IEEE80211_HTCAP_SMPS_ENA: ni->ni_flags |= IEEE80211_NODE_MIMO_PS; ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS; break; case IEEE80211_HTCAP_SMPS_OFF: default: /* disable on rx of reserved value */ ni->ni_flags &= ~IEEE80211_NODE_MIMO_PS; ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS; break; } return (oflags ^ ni->ni_flags); } /* * Update short GI state according to received htcap * and local settings. */ static __inline void htcap_update_shortgi(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; ni->ni_flags &= ~(IEEE80211_NODE_SGI20|IEEE80211_NODE_SGI40); if ((ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) && (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20)) ni->ni_flags |= IEEE80211_NODE_SGI20; if ((ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) && (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40)) ni->ni_flags |= IEEE80211_NODE_SGI40; } /* * Parse and update HT-related state extracted from * the HT cap and info ie's. */ void ieee80211_ht_updateparams(struct ieee80211_node *ni, const uint8_t *htcapie, const uint8_t *htinfoie) { struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ie_htinfo *htinfo; int htflags; ieee80211_parse_htcap(ni, htcapie); if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS) htcap_update_mimo_ps(ni); htcap_update_shortgi(ni); if (htinfoie[0] == IEEE80211_ELEMID_VENDOR) htinfoie += 4; htinfo = (const struct ieee80211_ie_htinfo *) htinfoie; htinfo_parse(ni, htinfo); htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ? IEEE80211_CHAN_HT20 : 0; /* NB: honor operating mode constraint */ if ((htinfo->hi_byte1 & IEEE80211_HTINFO_TXWIDTH_2040) && (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) { if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_ABOVE) htflags = IEEE80211_CHAN_HT40U; else if (ni->ni_ht2ndchan == IEEE80211_HTINFO_2NDCHAN_BELOW) htflags = IEEE80211_CHAN_HT40D; } htinfo_update_chw(ni, htflags); if ((htinfo->hi_byte1 & IEEE80211_HTINFO_RIFSMODE_PERM) && (vap->iv_flags_ht & IEEE80211_FHT_RIFS)) ni->ni_flags |= IEEE80211_NODE_RIFS; else ni->ni_flags &= ~IEEE80211_NODE_RIFS; } /* * Parse and update HT-related state extracted from the HT cap ie * for a station joining an HT BSS. */ void ieee80211_ht_updatehtcap(struct ieee80211_node *ni, const uint8_t *htcapie) { struct ieee80211vap *vap = ni->ni_vap; int htflags; ieee80211_parse_htcap(ni, htcapie); if (vap->iv_htcaps & IEEE80211_HTCAP_SMPS) htcap_update_mimo_ps(ni); htcap_update_shortgi(ni); /* NB: honor operating mode constraint */ /* XXX 40 MHz intolerant */ htflags = (vap->iv_flags_ht & IEEE80211_FHT_HT) ? IEEE80211_CHAN_HT20 : 0; if ((ni->ni_htcap & IEEE80211_HTCAP_CHWIDTH40) && (vap->iv_flags_ht & IEEE80211_FHT_USEHT40)) { if (IEEE80211_IS_CHAN_HT40U(vap->iv_bss->ni_chan)) htflags = IEEE80211_CHAN_HT40U; else if (IEEE80211_IS_CHAN_HT40D(vap->iv_bss->ni_chan)) htflags = IEEE80211_CHAN_HT40D; } htinfo_update_chw(ni, htflags); } /* * Install received HT rate set by parsing the HT cap ie. */ int ieee80211_setup_htrates(struct ieee80211_node *ni, const uint8_t *ie, int flags) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ie_htcap *htcap; struct ieee80211_htrateset *rs; int i, maxequalmcs, maxunequalmcs; maxequalmcs = ic->ic_txstream * 8 - 1; if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) { if (ic->ic_txstream >= 2) maxunequalmcs = 38; if (ic->ic_txstream >= 3) maxunequalmcs = 52; if (ic->ic_txstream >= 4) maxunequalmcs = 76; } else maxunequalmcs = 0; rs = &ni->ni_htrates; memset(rs, 0, sizeof(*rs)); if (ie != NULL) { if (ie[0] == IEEE80211_ELEMID_VENDOR) ie += 4; htcap = (const struct ieee80211_ie_htcap *) ie; for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) { if (isclr(htcap->hc_mcsset, i)) continue; if (rs->rs_nrates == IEEE80211_HTRATE_MAXSIZE) { IEEE80211_NOTE(vap, IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni, "WARNING, HT rate set too large; only " "using %u rates", IEEE80211_HTRATE_MAXSIZE); vap->iv_stats.is_rx_rstoobig++; break; } if (i <= 31 && i > maxequalmcs) continue; if (i == 32 && (ic->ic_htcaps & IEEE80211_HTC_TXMCS32) == 0) continue; if (i > 32 && i > maxunequalmcs) continue; rs->rs_rates[rs->rs_nrates++] = i; } } return ieee80211_fix_rate(ni, (struct ieee80211_rateset *) rs, flags); } /* * Mark rates in a node's HT rate set as basic according * to the information in the supplied HT info ie. */ void ieee80211_setup_basic_htrates(struct ieee80211_node *ni, const uint8_t *ie) { const struct ieee80211_ie_htinfo *htinfo; struct ieee80211_htrateset *rs; int i, j; if (ie[0] == IEEE80211_ELEMID_VENDOR) ie += 4; htinfo = (const struct ieee80211_ie_htinfo *) ie; rs = &ni->ni_htrates; if (rs->rs_nrates == 0) { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_XRATE | IEEE80211_MSG_11N, ni, "%s", "WARNING, empty HT rate set"); return; } for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) { if (isclr(htinfo->hi_basicmcsset, i)) continue; for (j = 0; j < rs->rs_nrates; j++) if ((rs->rs_rates[j] & IEEE80211_RATE_VAL) == i) rs->rs_rates[j] |= IEEE80211_RATE_BASIC; } } static void ampdu_tx_setup(struct ieee80211_tx_ampdu *tap) { callout_init(&tap->txa_timer, CALLOUT_MPSAFE); tap->txa_flags |= IEEE80211_AGGR_SETUP; } static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap) { struct ieee80211_node *ni = tap->txa_ni; struct ieee80211com *ic = ni->ni_ic; KASSERT(tap->txa_flags & IEEE80211_AGGR_SETUP, ("txa_flags 0x%x ac %d", tap->txa_flags, tap->txa_ac)); /* * Stop BA stream if setup so driver has a chance * to reclaim any resources it might have allocated. */ ic->ic_addba_stop(ni, tap); /* * Stop any pending BAR transmit. */ bar_stop_timer(tap); tap->txa_lastsample = 0; tap->txa_avgpps = 0; /* NB: clearing NAK means we may re-send ADDBA */ tap->txa_flags &= ~(IEEE80211_AGGR_SETUP | IEEE80211_AGGR_NAK); } /* * ADDBA response timeout. * * If software aggregation and per-TID queue management was done here, * that queue would be unpaused after the ADDBA timeout occurs. */ static void addba_timeout(void *arg) { struct ieee80211_tx_ampdu *tap = arg; struct ieee80211_node *ni = tap->txa_ni; struct ieee80211com *ic = ni->ni_ic; /* XXX ? */ tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND; tap->txa_attempts++; ic->ic_addba_response_timeout(ni, tap); } static void addba_start_timeout(struct ieee80211_tx_ampdu *tap) { /* XXX use CALLOUT_PENDING instead? */ callout_reset(&tap->txa_timer, ieee80211_addba_timeout, addba_timeout, tap); tap->txa_flags |= IEEE80211_AGGR_XCHGPEND; tap->txa_nextrequest = ticks + ieee80211_addba_timeout; } static void addba_stop_timeout(struct ieee80211_tx_ampdu *tap) { /* XXX use CALLOUT_PENDING instead? */ if (tap->txa_flags & IEEE80211_AGGR_XCHGPEND) { callout_stop(&tap->txa_timer); tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND; } } static void null_addba_response_timeout(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { } /* * Default method for requesting A-MPDU tx aggregation. * We setup the specified state block and start a timer * to wait for an ADDBA response frame. */ static int ieee80211_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout) { int bufsiz; /* XXX locking */ tap->txa_token = dialogtoken; tap->txa_flags |= IEEE80211_AGGR_IMMEDIATE; bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); tap->txa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); addba_start_timeout(tap); return 1; } /* * Default method for processing an A-MPDU tx aggregation * response. We shutdown any pending timer and update the * state block according to the reply. */ static int ieee80211_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status, int baparamset, int batimeout) { int bufsiz, tid; /* XXX locking */ addba_stop_timeout(tap); if (status == IEEE80211_STATUS_SUCCESS) { bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); /* XXX override our request? */ tap->txa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); /* XXX AC/TID */ tid = MS(baparamset, IEEE80211_BAPS_TID); tap->txa_flags |= IEEE80211_AGGR_RUNNING; tap->txa_attempts = 0; } else { /* mark tid so we don't try again */ tap->txa_flags |= IEEE80211_AGGR_NAK; } return 1; } /* * Default method for stopping A-MPDU tx aggregation. * Any timer is cleared and we drain any pending frames. */ static void ieee80211_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { /* XXX locking */ addba_stop_timeout(tap); if (tap->txa_flags & IEEE80211_AGGR_RUNNING) { /* XXX clear aggregation queue */ tap->txa_flags &= ~IEEE80211_AGGR_RUNNING; } tap->txa_attempts = 0; } /* * Process a received action frame using the default aggregation * policy. We intercept ADDBA-related frames and use them to * update our aggregation state. All other frames are passed up * for processing by ieee80211_recv_action. */ static int ht_recv_action_ba_addba_request(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_rx_ampdu *rap; uint8_t dialogtoken; uint16_t baparamset, batimeout, baseqctl; uint16_t args[5]; int tid; dialogtoken = frm[2]; baparamset = LE_READ_2(frm+3); batimeout = LE_READ_2(frm+5); baseqctl = LE_READ_2(frm+7); tid = MS(baparamset, IEEE80211_BAPS_TID); IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "recv ADDBA request: dialogtoken %u baparamset 0x%x " "(tid %d bufsiz %d) batimeout %d baseqctl %d:%d", dialogtoken, baparamset, tid, MS(baparamset, IEEE80211_BAPS_BUFSIZ), batimeout, MS(baseqctl, IEEE80211_BASEQ_START), MS(baseqctl, IEEE80211_BASEQ_FRAG)); rap = &ni->ni_rx_ampdu[tid]; /* Send ADDBA response */ args[0] = dialogtoken; /* * NB: We ack only if the sta associated with HT and * the ap is configured to do AMPDU rx (the latter * violates the 11n spec and is mostly for testing). */ if ((ni->ni_flags & IEEE80211_NODE_AMPDU_RX) && (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_RX)) { /* XXX handle ampdu_rx_start failure */ ic->ic_ampdu_rx_start(ni, rap, baparamset, batimeout, baseqctl); args[1] = IEEE80211_STATUS_SUCCESS; } else { IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "reject ADDBA request: %s", ni->ni_flags & IEEE80211_NODE_AMPDU_RX ? "administratively disabled" : "not negotiated for station"); vap->iv_stats.is_addba_reject++; args[1] = IEEE80211_STATUS_UNSPECIFIED; } /* XXX honor rap flags? */ args[2] = IEEE80211_BAPS_POLICY_IMMEDIATE | SM(tid, IEEE80211_BAPS_TID) | SM(rap->rxa_wnd, IEEE80211_BAPS_BUFSIZ) ; args[3] = 0; args[4] = 0; ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_RESPONSE, args); return 0; } static int ht_recv_action_ba_addba_response(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_tx_ampdu *tap; uint8_t dialogtoken, policy; uint16_t baparamset, batimeout, code; int tid, ac, bufsiz; dialogtoken = frm[2]; code = LE_READ_2(frm+3); baparamset = LE_READ_2(frm+5); tid = MS(baparamset, IEEE80211_BAPS_TID); bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); policy = MS(baparamset, IEEE80211_BAPS_POLICY); batimeout = LE_READ_2(frm+7); ac = TID_TO_WME_AC(tid); tap = &ni->ni_tx_ampdu[ac]; if ((tap->txa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "no pending ADDBA, tid %d dialogtoken %u " "code %d", tid, dialogtoken, code); vap->iv_stats.is_addba_norequest++; return 0; } if (dialogtoken != tap->txa_token) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "dialogtoken mismatch: waiting for %d, " "received %d, tid %d code %d", tap->txa_token, dialogtoken, tid, code); vap->iv_stats.is_addba_badtoken++; return 0; } /* NB: assumes IEEE80211_AGGR_IMMEDIATE is 1 */ if (policy != (tap->txa_flags & IEEE80211_AGGR_IMMEDIATE)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "policy mismatch: expecting %s, " "received %s, tid %d code %d", tap->txa_flags & IEEE80211_AGGR_IMMEDIATE, policy, tid, code); vap->iv_stats.is_addba_badpolicy++; return 0; } #if 0 /* XXX we take MIN in ieee80211_addba_response */ if (bufsiz > IEEE80211_AGGR_BAWMAX) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni->ni_macaddr, "ADDBA response", "BA window too large: max %d, " "received %d, tid %d code %d", bufsiz, IEEE80211_AGGR_BAWMAX, tid, code); vap->iv_stats.is_addba_badbawinsize++; return 0; } #endif IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "recv ADDBA response: dialogtoken %u code %d " "baparamset 0x%x (tid %d bufsiz %d) batimeout %d", dialogtoken, code, baparamset, tid, bufsiz, batimeout); ic->ic_addba_response(ni, tap, code, baparamset, batimeout); return 0; } static int ht_recv_action_ba_delba(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211_rx_ampdu *rap; struct ieee80211_tx_ampdu *tap; uint16_t baparamset, code; int tid, ac; baparamset = LE_READ_2(frm+2); code = LE_READ_2(frm+4); tid = MS(baparamset, IEEE80211_DELBAPS_TID); IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "recv DELBA: baparamset 0x%x (tid %d initiator %d) " "code %d", baparamset, tid, MS(baparamset, IEEE80211_DELBAPS_INIT), code); if ((baparamset & IEEE80211_DELBAPS_INIT) == 0) { ac = TID_TO_WME_AC(tid); tap = &ni->ni_tx_ampdu[ac]; ic->ic_addba_stop(ni, tap); } else { rap = &ni->ni_rx_ampdu[tid]; ic->ic_ampdu_rx_stop(ni, rap); } return 0; } static int ht_recv_action_ht_txchwidth(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { int chw; chw = (frm[2] == IEEE80211_A_HT_TXCHWIDTH_2040) ? 40 : 20; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: HT txchwidth, width %d%s", __func__, chw, ni->ni_chw != chw ? "*" : ""); if (chw != ni->ni_chw) { ni->ni_chw = chw; /* XXX notify on change */ } return 0; } static int ht_recv_action_ht_mimopwrsave(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { const struct ieee80211_action_ht_mimopowersave *mps = (const struct ieee80211_action_ht_mimopowersave *) frm; /* XXX check iv_htcaps */ if (mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA) ni->ni_flags |= IEEE80211_NODE_MIMO_PS; else ni->ni_flags &= ~IEEE80211_NODE_MIMO_PS; if (mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_MODE) ni->ni_flags |= IEEE80211_NODE_MIMO_RTS; else ni->ni_flags &= ~IEEE80211_NODE_MIMO_RTS; /* XXX notify on change */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: HT MIMO PS (%s%s)", __func__, (ni->ni_flags & IEEE80211_NODE_MIMO_PS) ? "on" : "off", (ni->ni_flags & IEEE80211_NODE_MIMO_RTS) ? "+rts" : "" ); return 0; } /* * Transmit processing. */ /* * Check if A-MPDU should be requested/enabled for a stream. * We require a traffic rate above a per-AC threshold and we * also handle backoff from previous failed attempts. * * Drivers may override this method to bring in information * such as link state conditions in making the decision. */ static int ieee80211_ampdu_enable(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct ieee80211vap *vap = ni->ni_vap; if (tap->txa_avgpps < vap->iv_ampdu_mintraffic[tap->txa_ac]) return 0; /* XXX check rssi? */ if (tap->txa_attempts >= ieee80211_addba_maxtries && ticks < tap->txa_nextrequest) { /* * Don't retry too often; txa_nextrequest is set * to the minimum interval we'll retry after * ieee80211_addba_maxtries failed attempts are made. */ return 0; } IEEE80211_NOTE(vap, IEEE80211_MSG_11N, ni, "enable AMPDU on %s, avgpps %d pkts %d", ieee80211_wme_acnames[tap->txa_ac], tap->txa_avgpps, tap->txa_pkts); return 1; } /* * Request A-MPDU tx aggregation. Setup local state and * issue an ADDBA request. BA use will only happen after * the other end replies with ADDBA response. */ int ieee80211_ampdu_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct ieee80211com *ic = ni->ni_ic; uint16_t args[5]; int tid, dialogtoken; static int tokens = 0; /* XXX */ /* XXX locking */ if ((tap->txa_flags & IEEE80211_AGGR_SETUP) == 0) { /* do deferred setup of state */ ampdu_tx_setup(tap); } /* XXX hack for not doing proper locking */ tap->txa_flags &= ~IEEE80211_AGGR_NAK; dialogtoken = (tokens+1) % 63; /* XXX */ tid = WME_AC_TO_TID(tap->txa_ac); tap->txa_start = ni->ni_txseqs[tid]; args[0] = dialogtoken; args[1] = 0; /* NB: status code not used */ args[2] = IEEE80211_BAPS_POLICY_IMMEDIATE | SM(tid, IEEE80211_BAPS_TID) | SM(IEEE80211_AGGR_BAWMAX, IEEE80211_BAPS_BUFSIZ) ; args[3] = 0; /* batimeout */ /* NB: do first so there's no race against reply */ if (!ic->ic_addba_request(ni, tap, dialogtoken, args[2], args[3])) { /* unable to setup state, don't make request */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: could not setup BA stream for AC %d", __func__, tap->txa_ac); /* defer next try so we don't slam the driver with requests */ tap->txa_attempts = ieee80211_addba_maxtries; /* NB: check in case driver wants to override */ if (tap->txa_nextrequest <= ticks) tap->txa_nextrequest = ticks + ieee80211_addba_backoff; return 0; } tokens = dialogtoken; /* allocate token */ /* NB: after calling ic_addba_request so driver can set txa_start */ args[4] = SM(tap->txa_start, IEEE80211_BASEQ_START) | SM(0, IEEE80211_BASEQ_FRAG) ; return ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_ADDBA_REQUEST, args); } /* * Terminate an AMPDU tx stream. State is reclaimed * and the peer notified with a DelBA Action frame. */ void ieee80211_ampdu_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int reason) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; uint16_t args[4]; /* XXX locking */ tap->txa_flags &= ~IEEE80211_AGGR_BARPEND; if (IEEE80211_AMPDU_RUNNING(tap)) { IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: stop BA stream for AC %d (reason %d)", __func__, tap->txa_ac, reason); vap->iv_stats.is_ampdu_stop++; ic->ic_addba_stop(ni, tap); args[0] = WME_AC_TO_TID(tap->txa_ac); args[1] = IEEE80211_DELBAPS_INIT; args[2] = reason; /* XXX reason code */ ic->ic_send_action(ni, IEEE80211_ACTION_CAT_BA, IEEE80211_ACTION_BA_DELBA, args); } else { IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "%s: BA stream for AC %d not running (reason %d)", __func__, tap->txa_ac, reason); vap->iv_stats.is_ampdu_stop_failed++; } } static void bar_timeout(void *arg) { struct ieee80211_tx_ampdu *tap = arg; struct ieee80211_node *ni = tap->txa_ni; KASSERT((tap->txa_flags & IEEE80211_AGGR_XCHGPEND) == 0, ("bar/addba collision, flags 0x%x", tap->txa_flags)); IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: tid %u flags 0x%x attempts %d", __func__, tap->txa_ac, tap->txa_flags, tap->txa_attempts); /* guard against race with bar_tx_complete */ if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0) return; /* XXX ? */ if (tap->txa_attempts >= ieee80211_bar_maxtries) ieee80211_ampdu_stop(ni, tap, IEEE80211_REASON_TIMEOUT); else ieee80211_send_bar(ni, tap, tap->txa_seqpending); } static void bar_start_timer(struct ieee80211_tx_ampdu *tap) { callout_reset(&tap->txa_timer, ieee80211_bar_timeout, bar_timeout, tap); } static void bar_stop_timer(struct ieee80211_tx_ampdu *tap) { callout_stop(&tap->txa_timer); } static void bar_tx_complete(struct ieee80211_node *ni, void *arg, int status) { struct ieee80211_tx_ampdu *tap = arg; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "%s: tid %u flags 0x%x pending %d status %d", __func__, tap->txa_ac, tap->txa_flags, callout_pending(&tap->txa_timer), status); /* XXX locking */ if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) && callout_pending(&tap->txa_timer)) { struct ieee80211com *ic = ni->ni_ic; if (status == 0) /* ACK'd */ bar_stop_timer(tap); ic->ic_bar_response(ni, tap, status); /* NB: just let timer expire so we pace requests */ } } static void ieee80211_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status) { if (status == 0) { /* got ACK */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, "BAR moves BA win <%u:%u> (%u frames) txseq %u tid %u", tap->txa_start, IEEE80211_SEQ_ADD(tap->txa_start, tap->txa_wnd-1), tap->txa_qframes, tap->txa_seqpending, WME_AC_TO_TID(tap->txa_ac)); /* NB: timer already stopped in bar_tx_complete */ tap->txa_start = tap->txa_seqpending; tap->txa_flags &= ~IEEE80211_AGGR_BARPEND; } } /* * Transmit a BAR frame to the specified node. The * BAR contents are drawn from the supplied aggregation * state associated with the node. * * NB: we only handle immediate ACK w/ compressed bitmap. */ int ieee80211_send_bar(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, ieee80211_seq seq) { #define senderr(_x, _v) do { vap->iv_stats._v++; ret = _x; goto bad; } while (0) struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_frame_bar *bar; struct mbuf *m; uint16_t barctl, barseqctl; uint8_t *frm; int tid, ret; if ((tap->txa_flags & IEEE80211_AGGR_RUNNING) == 0) { /* no ADDBA response, should not happen */ /* XXX stat+msg */ return EINVAL; } /* XXX locking */ bar_stop_timer(tap); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom, sizeof(*bar)); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); if (!ieee80211_add_callback(m, bar_tx_complete, tap)) { m_freem(m); senderr(ENOMEM, is_tx_nobuf); /* XXX */ /* NOTREACHED */ } bar = mtod(m, struct ieee80211_frame_bar *); bar->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_BAR; bar->i_fc[1] = 0; IEEE80211_ADDR_COPY(bar->i_ra, ni->ni_macaddr); IEEE80211_ADDR_COPY(bar->i_ta, vap->iv_myaddr); tid = WME_AC_TO_TID(tap->txa_ac); barctl = (tap->txa_flags & IEEE80211_AGGR_IMMEDIATE ? 0 : IEEE80211_BAR_NOACK) | IEEE80211_BAR_COMP | SM(tid, IEEE80211_BAR_TID) ; barseqctl = SM(seq, IEEE80211_BAR_SEQ_START); /* NB: known to have proper alignment */ bar->i_ctl = htole16(barctl); bar->i_seq = htole16(barseqctl); m->m_pkthdr.len = m->m_len = sizeof(struct ieee80211_frame_bar); M_WME_SETAC(m, WME_AC_VO); IEEE80211_NODE_STAT(ni, tx_mgmt); /* XXX tx_ctl? */ /* XXX locking */ /* init/bump attempts counter */ if ((tap->txa_flags & IEEE80211_AGGR_BARPEND) == 0) tap->txa_attempts = 1; else tap->txa_attempts++; tap->txa_seqpending = seq; tap->txa_flags |= IEEE80211_AGGR_BARPEND; IEEE80211_NOTE(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_11N, ni, "send BAR: tid %u ctl 0x%x start %u (attempt %d)", tid, barctl, seq, tap->txa_attempts); /* * ic_raw_xmit will free the node reference * regardless of queue/TX success or failure. */ ret = ic->ic_raw_xmit(ni, m, NULL); if (ret != 0) { /* xmit failed, clear state flag */ tap->txa_flags &= ~IEEE80211_AGGR_BARPEND; return ret; } /* XXX hack against tx complete happening before timer is started */ if (tap->txa_flags & IEEE80211_AGGR_BARPEND) bar_start_timer(tap); return 0; bad: ieee80211_free_node(ni); return ret; #undef senderr } static int ht_action_output(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211_bpf_params params; memset(¶ms, 0, sizeof(params)); params.ibp_pri = WME_AC_VO; params.ibp_rate0 = ni->ni_txparms->mgmtrate; /* NB: we know all frames are unicast */ params.ibp_try0 = ni->ni_txparms->maxretry; params.ibp_power = ni->ni_txpower; return ieee80211_mgmt_output(ni, m, IEEE80211_FC0_SUBTYPE_ACTION, ¶ms); } #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) /* * Send an action management frame. The arguments are stuff * into a frame without inspection; the caller is assumed to * prepare them carefully (e.g. based on the aggregation state). */ static int ht_send_action_ba_addba(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = arg0; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "send ADDBA %s: dialogtoken %d status %d " "baparamset 0x%x (tid %d) batimeout 0x%x baseqctl 0x%x", (action == IEEE80211_ACTION_BA_ADDBA_REQUEST) ? "request" : "response", args[0], args[1], args[2], MS(args[2], IEEE80211_BAPS_TID), args[3], args[4]); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ /* XXX may action payload */ + sizeof(struct ieee80211_action_ba_addbaresponse) ); if (m != NULL) { *frm++ = category; *frm++ = action; *frm++ = args[0]; /* dialog token */ if (action == IEEE80211_ACTION_BA_ADDBA_RESPONSE) ADDSHORT(frm, args[1]); /* status code */ ADDSHORT(frm, args[2]); /* baparamset */ ADDSHORT(frm, args[3]); /* batimeout */ if (action == IEEE80211_ACTION_BA_ADDBA_REQUEST) ADDSHORT(frm, args[4]); /* baseqctl */ m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return ht_action_output(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int ht_send_action_ba_delba(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = arg0; struct mbuf *m; uint16_t baparamset; uint8_t *frm; baparamset = SM(args[0], IEEE80211_DELBAPS_TID) | args[1] ; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "send DELBA action: tid %d, initiator %d reason %d", args[0], args[1], args[2]); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ /* XXX may action payload */ + sizeof(struct ieee80211_action_ba_addbaresponse) ); if (m != NULL) { *frm++ = category; *frm++ = action; ADDSHORT(frm, baparamset); ADDSHORT(frm, args[2]); /* reason code */ m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return ht_action_output(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int ht_send_action_ht_txchwidth(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_11N, ni, "send HT txchwidth: width %d", IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? 40 : 20); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ /* XXX may action payload */ + sizeof(struct ieee80211_action_ba_addbaresponse) ); if (m != NULL) { *frm++ = category; *frm++ = action; *frm++ = IEEE80211_IS_CHAN_HT40(ni->ni_chan) ? IEEE80211_A_HT_TXCHWIDTH_2040 : IEEE80211_A_HT_TXCHWIDTH_20; m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return ht_action_output(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } #undef ADDSHORT /* * Construct the MCS bit mask for inclusion in an HT capabilities * information element. */ static void ieee80211_set_mcsset(struct ieee80211com *ic, uint8_t *frm) { int i; uint8_t txparams; KASSERT((ic->ic_rxstream > 0 && ic->ic_rxstream <= 4), ("ic_rxstream %d out of range", ic->ic_rxstream)); KASSERT((ic->ic_txstream > 0 && ic->ic_txstream <= 4), ("ic_txstream %d out of range", ic->ic_txstream)); for (i = 0; i < ic->ic_rxstream * 8; i++) setbit(frm, i); if ((ic->ic_htcaps & IEEE80211_HTCAP_CHWIDTH40) && (ic->ic_htcaps & IEEE80211_HTC_RXMCS32)) setbit(frm, 32); if (ic->ic_htcaps & IEEE80211_HTC_RXUNEQUAL) { if (ic->ic_rxstream >= 2) { for (i = 33; i <= 38; i++) setbit(frm, i); } if (ic->ic_rxstream >= 3) { for (i = 39; i <= 52; i++) setbit(frm, i); } if (ic->ic_txstream >= 4) { for (i = 53; i <= 76; i++) setbit(frm, i); } } if (ic->ic_rxstream != ic->ic_txstream) { txparams = 0x1; /* TX MCS set defined */ txparams |= 0x2; /* TX RX MCS not equal */ txparams |= (ic->ic_txstream - 1) << 2; /* num TX streams */ if (ic->ic_htcaps & IEEE80211_HTC_TXUNEQUAL) txparams |= 0x16; /* TX unequal modulation sup */ } else txparams = 0; frm[12] = txparams; } /* * Add body of an HTCAP information element. */ static uint8_t * ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni) { #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; uint16_t caps, extcaps; int rxmax, density; /* HT capabilities */ caps = vap->iv_htcaps & 0xffff; /* * Note channel width depends on whether we are operating as * a sta or not. When operating as a sta we are generating * a request based on our desired configuration. Otherwise * we are operational and the channel attributes identify * how we've been setup (which might be different if a fixed * channel is specified). */ if (vap->iv_opmode == IEEE80211_M_STA) { /* override 20/40 use based on config */ if (vap->iv_flags_ht & IEEE80211_FHT_USEHT40) caps |= IEEE80211_HTCAP_CHWIDTH40; else caps &= ~IEEE80211_HTCAP_CHWIDTH40; /* use advertised setting (XXX locally constraint) */ rxmax = MS(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU); density = MS(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY); /* * NB: Hardware might support HT40 on some but not all * channels. We can't determine this earlier because only * after association the channel is upgraded to HT based * on the negotiated capabilities. */ if (ni->ni_chan != IEEE80211_CHAN_ANYC && findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40U) == NULL && findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40D) == NULL) caps &= ~IEEE80211_HTCAP_CHWIDTH40; } else { /* override 20/40 use based on current channel */ if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) caps |= IEEE80211_HTCAP_CHWIDTH40; else caps &= ~IEEE80211_HTCAP_CHWIDTH40; rxmax = vap->iv_ampdu_rxmax; density = vap->iv_ampdu_density; } /* adjust short GI based on channel and config */ if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) caps &= ~IEEE80211_HTCAP_SHORTGI20; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0 || (caps & IEEE80211_HTCAP_CHWIDTH40) == 0) caps &= ~IEEE80211_HTCAP_SHORTGI40; ADDSHORT(frm, caps); /* HT parameters */ *frm = SM(rxmax, IEEE80211_HTCAP_MAXRXAMPDU) | SM(density, IEEE80211_HTCAP_MPDUDENSITY) ; frm++; /* pre-zero remainder of ie */ memset(frm, 0, sizeof(struct ieee80211_ie_htcap) - __offsetof(struct ieee80211_ie_htcap, hc_mcsset)); /* supported MCS set */ /* * XXX: For sta mode the rate set should be restricted based * on the AP's capabilities, but ni_htrates isn't setup when * we're called to form an AssocReq frame so for now we're * restricted to the device capabilities. */ ieee80211_set_mcsset(ni->ni_ic, frm); frm += __offsetof(struct ieee80211_ie_htcap, hc_extcap) - __offsetof(struct ieee80211_ie_htcap, hc_mcsset); /* HT extended capabilities */ extcaps = vap->iv_htextcaps & 0xffff; ADDSHORT(frm, extcaps); frm += sizeof(struct ieee80211_ie_htcap) - __offsetof(struct ieee80211_ie_htcap, hc_txbf); return frm; #undef ADDSHORT } /* * Add 802.11n HT capabilities information element */ uint8_t * ieee80211_add_htcap(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_HTCAP; frm[1] = sizeof(struct ieee80211_ie_htcap) - 2; return ieee80211_add_htcap_body(frm + 2, ni); } /* * Add Broadcom OUI wrapped standard HTCAP ie; this is * used for compatibility w/ pre-draft implementations. */ uint8_t * ieee80211_add_htcap_vendor(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_VENDOR; frm[1] = 4 + sizeof(struct ieee80211_ie_htcap) - 2; frm[2] = (BCM_OUI >> 0) & 0xff; frm[3] = (BCM_OUI >> 8) & 0xff; frm[4] = (BCM_OUI >> 16) & 0xff; frm[5] = BCM_OUI_HTCAP; return ieee80211_add_htcap_body(frm + 6, ni); } /* * Construct the MCS bit mask of basic rates * for inclusion in an HT information element. */ static void ieee80211_set_basic_htrates(uint8_t *frm, const struct ieee80211_htrateset *rs) { int i; for (i = 0; i < rs->rs_nrates; i++) { int r = rs->rs_rates[i] & IEEE80211_RATE_VAL; if ((rs->rs_rates[i] & IEEE80211_RATE_BASIC) && r < IEEE80211_HTRATE_MAXSIZE) { /* NB: this assumes a particular implementation */ setbit(frm, r); } } } /* * Update the HTINFO ie for a beacon frame. */ void ieee80211_ht_update_beacon(struct ieee80211vap *vap, struct ieee80211_beacon_offsets *bo) { #define PROTMODE (IEEE80211_HTINFO_OPMODE|IEEE80211_HTINFO_NONHT_PRESENT) const struct ieee80211_channel *bsschan = vap->iv_bss->ni_chan; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_ie_htinfo *ht = (struct ieee80211_ie_htinfo *) bo->bo_htinfo; /* XXX only update on channel change */ ht->hi_ctrlchannel = ieee80211_chan2ieee(ic, bsschan); if (vap->iv_flags_ht & IEEE80211_FHT_RIFS) ht->hi_byte1 = IEEE80211_HTINFO_RIFSMODE_PERM; else ht->hi_byte1 = IEEE80211_HTINFO_RIFSMODE_PROH; if (IEEE80211_IS_CHAN_HT40U(bsschan)) ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(bsschan)) ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_BELOW; else ht->hi_byte1 |= IEEE80211_HTINFO_2NDCHAN_NONE; if (IEEE80211_IS_CHAN_HT40(bsschan)) ht->hi_byte1 |= IEEE80211_HTINFO_TXWIDTH_2040; /* protection mode */ ht->hi_byte2 = (ht->hi_byte2 &~ PROTMODE) | ic->ic_curhtprotmode; /* XXX propagate to vendor ie's */ #undef PROTMODE } /* * Add body of an HTINFO information element. * * NB: We don't use struct ieee80211_ie_htinfo because we can * be called to fillin both a standard ie and a compat ie that * has a vendor OUI at the front. */ static uint8_t * ieee80211_add_htinfo_body(uint8_t *frm, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; /* pre-zero remainder of ie */ memset(frm, 0, sizeof(struct ieee80211_ie_htinfo) - 2); /* primary/control channel center */ *frm++ = ieee80211_chan2ieee(ic, ni->ni_chan); if (vap->iv_flags_ht & IEEE80211_FHT_RIFS) frm[0] = IEEE80211_HTINFO_RIFSMODE_PERM; else frm[0] = IEEE80211_HTINFO_RIFSMODE_PROH; if (IEEE80211_IS_CHAN_HT40U(ni->ni_chan)) frm[0] |= IEEE80211_HTINFO_2NDCHAN_ABOVE; else if (IEEE80211_IS_CHAN_HT40D(ni->ni_chan)) frm[0] |= IEEE80211_HTINFO_2NDCHAN_BELOW; else frm[0] |= IEEE80211_HTINFO_2NDCHAN_NONE; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) frm[0] |= IEEE80211_HTINFO_TXWIDTH_2040; frm[1] = ic->ic_curhtprotmode; frm += 5; /* basic MCS set */ ieee80211_set_basic_htrates(frm, &ni->ni_htrates); frm += sizeof(struct ieee80211_ie_htinfo) - __offsetof(struct ieee80211_ie_htinfo, hi_basicmcsset); return frm; } /* * Add 802.11n HT information information element. */ uint8_t * ieee80211_add_htinfo(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_HTINFO; frm[1] = sizeof(struct ieee80211_ie_htinfo) - 2; return ieee80211_add_htinfo_body(frm + 2, ni); } /* * Add Broadcom OUI wrapped standard HTINFO ie; this is * used for compatibility w/ pre-draft implementations. */ uint8_t * ieee80211_add_htinfo_vendor(uint8_t *frm, struct ieee80211_node *ni) { frm[0] = IEEE80211_ELEMID_VENDOR; frm[1] = 4 + sizeof(struct ieee80211_ie_htinfo) - 2; frm[2] = (BCM_OUI >> 0) & 0xff; frm[3] = (BCM_OUI >> 8) & 0xff; frm[4] = (BCM_OUI >> 16) & 0xff; frm[5] = BCM_OUI_HTINFO; return ieee80211_add_htinfo_body(frm + 6, ni); } Index: stable/9/sys/net80211/ieee80211_hwmp.c =================================================================== --- stable/9/sys/net80211/ieee80211_hwmp.c (revision 273911) +++ stable/9/sys/net80211/ieee80211_hwmp.c (revision 273912) @@ -1,1438 +1,1438 @@ /*- * Copyright (c) 2009 The FreeBSD Foundation * All rights reserved. * * This software was developed by Rui Paulo under sponsorship from the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #ifdef __FreeBSD__ __FBSDID("$FreeBSD$"); #endif /* * IEEE 802.11s Hybrid Wireless Mesh Protocol, HWMP. * * Based on March 2009, D3.0 802.11s draft spec. */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void hwmp_vattach(struct ieee80211vap *); static void hwmp_vdetach(struct ieee80211vap *); static int hwmp_newstate(struct ieee80211vap *, enum ieee80211_state, int); static int hwmp_send_action(struct ieee80211_node *, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], uint8_t *, size_t); static uint8_t * hwmp_add_meshpreq(uint8_t *, const struct ieee80211_meshpreq_ie *); static uint8_t * hwmp_add_meshprep(uint8_t *, const struct ieee80211_meshprep_ie *); static uint8_t * hwmp_add_meshperr(uint8_t *, const struct ieee80211_meshperr_ie *); static uint8_t * hwmp_add_meshrann(uint8_t *, const struct ieee80211_meshrann_ie *); static void hwmp_rootmode_setup(struct ieee80211vap *); static void hwmp_rootmode_cb(void *); static void hwmp_rootmode_rann_cb(void *); static void hwmp_recv_preq(struct ieee80211vap *, struct ieee80211_node *, const struct ieee80211_frame *, const struct ieee80211_meshpreq_ie *); static int hwmp_send_preq(struct ieee80211_node *, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], struct ieee80211_meshpreq_ie *); static void hwmp_recv_prep(struct ieee80211vap *, struct ieee80211_node *, const struct ieee80211_frame *, const struct ieee80211_meshprep_ie *); static int hwmp_send_prep(struct ieee80211_node *, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], struct ieee80211_meshprep_ie *); static void hwmp_recv_perr(struct ieee80211vap *, struct ieee80211_node *, const struct ieee80211_frame *, const struct ieee80211_meshperr_ie *); static int hwmp_send_perr(struct ieee80211_node *, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], struct ieee80211_meshperr_ie *); static void hwmp_recv_rann(struct ieee80211vap *, struct ieee80211_node *, const struct ieee80211_frame *, const struct ieee80211_meshrann_ie *); static int hwmp_send_rann(struct ieee80211_node *, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN], struct ieee80211_meshrann_ie *); static struct ieee80211_node * hwmp_discover(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN], struct mbuf *); static void hwmp_peerdown(struct ieee80211_node *); static struct timeval ieee80211_hwmp_preqminint = { 0, 100000 }; static struct timeval ieee80211_hwmp_perrminint = { 0, 100000 }; /* unalligned little endian access */ #define LE_WRITE_2(p, v) do { \ ((uint8_t *)(p))[0] = (v) & 0xff; \ ((uint8_t *)(p))[1] = ((v) >> 8) & 0xff; \ } while (0) #define LE_WRITE_4(p, v) do { \ ((uint8_t *)(p))[0] = (v) & 0xff; \ ((uint8_t *)(p))[1] = ((v) >> 8) & 0xff; \ ((uint8_t *)(p))[2] = ((v) >> 16) & 0xff; \ ((uint8_t *)(p))[3] = ((v) >> 24) & 0xff; \ } while (0) /* NB: the Target Address set in a Proactive PREQ is the broadcast address. */ static const uint8_t broadcastaddr[IEEE80211_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; typedef uint32_t ieee80211_hwmp_seq; #define HWMP_SEQ_LT(a, b) ((int32_t)((a)-(b)) < 0) #define HWMP_SEQ_LEQ(a, b) ((int32_t)((a)-(b)) <= 0) #define HWMP_SEQ_GT(a, b) ((int32_t)((a)-(b)) > 0) #define HWMP_SEQ_GEQ(a, b) ((int32_t)((a)-(b)) >= 0) /* * Private extension of ieee80211_mesh_route. */ struct ieee80211_hwmp_route { ieee80211_hwmp_seq hr_seq; /* last HWMP seq seen from dst*/ ieee80211_hwmp_seq hr_preqid; /* last PREQ ID seen from dst */ ieee80211_hwmp_seq hr_origseq; /* seq. no. on our latest PREQ*/ int hr_preqretries; }; struct ieee80211_hwmp_state { ieee80211_hwmp_seq hs_seq; /* next seq to be used */ ieee80211_hwmp_seq hs_preqid; /* next PREQ ID to be used */ struct timeval hs_lastpreq; /* last time we sent a PREQ */ struct timeval hs_lastperr; /* last time we sent a PERR */ int hs_rootmode; /* proactive HWMP */ struct callout hs_roottimer; uint8_t hs_maxhops; /* max hop count */ }; static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD, 0, "IEEE 802.11s HWMP parameters"); static int ieee80211_hwmp_targetonly = 0; -SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, targetonly, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, targetonly, CTLFLAG_RW, &ieee80211_hwmp_targetonly, 0, "Set TO bit on generated PREQs"); static int ieee80211_hwmp_replyforward = 1; -SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, replyforward, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, replyforward, CTLFLAG_RW, &ieee80211_hwmp_replyforward, 0, "Set RF bit on generated PREQs"); static int ieee80211_hwmp_pathtimeout = -1; SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_pathtimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "path entry lifetime (ms)"); static int ieee80211_hwmp_roottimeout = -1; SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_roottimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "root PREQ timeout (ms)"); static int ieee80211_hwmp_rootint = -1; SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_rootint, 0, ieee80211_sysctl_msecs_ticks, "I", "root interval (ms)"); static int ieee80211_hwmp_rannint = -1; SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_rannint, 0, ieee80211_sysctl_msecs_ticks, "I", "root announcement interval (ms)"); #define IEEE80211_HWMP_DEFAULT_MAXHOPS 31 static ieee80211_recv_action_func hwmp_recv_action_meshpath; static struct ieee80211_mesh_proto_path mesh_proto_hwmp = { .mpp_descr = "HWMP", .mpp_ie = IEEE80211_MESHCONF_PATH_HWMP, .mpp_discover = hwmp_discover, .mpp_peerdown = hwmp_peerdown, .mpp_vattach = hwmp_vattach, .mpp_vdetach = hwmp_vdetach, .mpp_newstate = hwmp_newstate, .mpp_privlen = sizeof(struct ieee80211_hwmp_route), }; SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact, CTLTYPE_INT | CTLFLAG_RW, &mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I", "mesh route inactivity timeout (ms)"); static void ieee80211_hwmp_init(void) { ieee80211_hwmp_pathtimeout = msecs_to_ticks(5*1000); ieee80211_hwmp_roottimeout = msecs_to_ticks(5*1000); ieee80211_hwmp_rootint = msecs_to_ticks(2*1000); ieee80211_hwmp_rannint = msecs_to_ticks(1*1000); /* * Register action frame handler. */ ieee80211_recv_action_register(IEEE80211_ACTION_CAT_MESHPATH, IEEE80211_ACTION_MESHPATH_SEL, hwmp_recv_action_meshpath); /* NB: default is 5 secs per spec */ mesh_proto_hwmp.mpp_inact = msecs_to_ticks(5*1000); /* * Register HWMP. */ ieee80211_mesh_register_proto_path(&mesh_proto_hwmp); } SYSINIT(wlan_hwmp, SI_SUB_DRIVERS, SI_ORDER_SECOND, ieee80211_hwmp_init, NULL); void hwmp_vattach(struct ieee80211vap *vap) { struct ieee80211_hwmp_state *hs; KASSERT(vap->iv_opmode == IEEE80211_M_MBSS, ("not a mesh vap, opmode %d", vap->iv_opmode)); hs = malloc(sizeof(struct ieee80211_hwmp_state), M_80211_VAP, M_NOWAIT | M_ZERO); if (hs == NULL) { printf("%s: couldn't alloc HWMP state\n", __func__); return; } hs->hs_maxhops = IEEE80211_HWMP_DEFAULT_MAXHOPS; callout_init(&hs->hs_roottimer, CALLOUT_MPSAFE); vap->iv_hwmp = hs; } void hwmp_vdetach(struct ieee80211vap *vap) { struct ieee80211_hwmp_state *hs = vap->iv_hwmp; callout_drain(&hs->hs_roottimer); free(vap->iv_hwmp, M_80211_VAP); vap->iv_hwmp = NULL; } int hwmp_newstate(struct ieee80211vap *vap, enum ieee80211_state ostate, int arg) { enum ieee80211_state nstate = vap->iv_state; struct ieee80211_hwmp_state *hs = vap->iv_hwmp; IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s -> %s (%d)\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate], arg); if (nstate != IEEE80211_S_RUN && ostate == IEEE80211_S_RUN) callout_drain(&hs->hs_roottimer); if (nstate == IEEE80211_S_RUN) hwmp_rootmode_setup(vap); return 0; } static int hwmp_recv_action_meshpath(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_meshpreq_ie preq; struct ieee80211_meshprep_ie prep; struct ieee80211_meshperr_ie perr; struct ieee80211_meshrann_ie rann; const uint8_t *iefrm = frm + 2; /* action + code */ int found = 0; while (efrm - iefrm > 1) { IEEE80211_VERIFY_LENGTH(efrm - iefrm, iefrm[1] + 2, return 0); switch (*iefrm) { case IEEE80211_ELEMID_MESHPREQ: { const struct ieee80211_meshpreq_ie *mpreq = (const struct ieee80211_meshpreq_ie *) iefrm; /* XXX > 1 target */ if (mpreq->preq_len != sizeof(struct ieee80211_meshpreq_ie) - 2) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_HWMP, wh, NULL, "%s", "PREQ with wrong len"); vap->iv_stats.is_rx_mgtdiscard++; break; } memcpy(&preq, mpreq, sizeof(preq)); preq.preq_id = LE_READ_4(&mpreq->preq_id); preq.preq_origseq = LE_READ_4(&mpreq->preq_origseq); preq.preq_lifetime = LE_READ_4(&mpreq->preq_lifetime); preq.preq_metric = LE_READ_4(&mpreq->preq_metric); preq.preq_targets[0].target_seq = LE_READ_4(&mpreq->preq_targets[0].target_seq); hwmp_recv_preq(vap, ni, wh, &preq); found++; break; } case IEEE80211_ELEMID_MESHPREP: { const struct ieee80211_meshprep_ie *mprep = (const struct ieee80211_meshprep_ie *) iefrm; if (mprep->prep_len != sizeof(struct ieee80211_meshprep_ie) - 2) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_HWMP, wh, NULL, "%s", "PREP with wrong len"); vap->iv_stats.is_rx_mgtdiscard++; break; } memcpy(&prep, mprep, sizeof(prep)); prep.prep_targetseq = LE_READ_4(&mprep->prep_targetseq); prep.prep_lifetime = LE_READ_4(&mprep->prep_lifetime); prep.prep_metric = LE_READ_4(&mprep->prep_metric); prep.prep_origseq = LE_READ_4(&mprep->prep_origseq); hwmp_recv_prep(vap, ni, wh, &prep); found++; break; } case IEEE80211_ELEMID_MESHPERR: { const struct ieee80211_meshperr_ie *mperr = (const struct ieee80211_meshperr_ie *) iefrm; /* XXX > 1 target */ if (mperr->perr_len != sizeof(struct ieee80211_meshperr_ie) - 2) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_HWMP, wh, NULL, "%s", "PERR with wrong len"); vap->iv_stats.is_rx_mgtdiscard++; break; } memcpy(&perr, mperr, sizeof(perr)); perr.perr_dests[0].dest_seq = LE_READ_4(&mperr->perr_dests[0].dest_seq); hwmp_recv_perr(vap, ni, wh, &perr); found++; break; } case IEEE80211_ELEMID_MESHRANN: { const struct ieee80211_meshrann_ie *mrann = (const struct ieee80211_meshrann_ie *) iefrm; if (mrann->rann_len != sizeof(struct ieee80211_meshrann_ie) - 2) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_HWMP, wh, NULL, "%s", "RAN with wrong len"); vap->iv_stats.is_rx_mgtdiscard++; return 1; } memcpy(&rann, mrann, sizeof(rann)); rann.rann_seq = LE_READ_4(&mrann->rann_seq); rann.rann_metric = LE_READ_4(&mrann->rann_metric); hwmp_recv_rann(vap, ni, wh, &rann); found++; break; } } iefrm += iefrm[1] + 2; } if (!found) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_HWMP, wh, NULL, "%s", "PATH SEL action without IE"); vap->iv_stats.is_rx_mgtdiscard++; } return 0; } static int hwmp_send_action(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], uint8_t *ie, size_t len) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_bpf_params params; struct mbuf *m; uint8_t *frm; if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT, ni, "block %s frame in CAC state", "HWMP action"); vap->iv_stats.is_tx_badstate++; return EIO; /* XXX */ } KASSERT(ni != NULL, ("null node")); /* * Hold a reference on the node so it doesn't go away until after * the xmit is complete all the way in the driver. On error we * will remove our reference. */ #ifdef IEEE80211_DEBUG_REFCNT IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); #endif ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(struct ieee80211_action) + len ); if (m == NULL) { ieee80211_free_node(ni); vap->iv_stats.is_tx_nobuf++; return ENOMEM; } *frm++ = IEEE80211_ACTION_CAT_MESHPATH; *frm++ = IEEE80211_ACTION_MESHPATH_SEL; switch (*ie) { case IEEE80211_ELEMID_MESHPREQ: frm = hwmp_add_meshpreq(frm, (struct ieee80211_meshpreq_ie *)ie); break; case IEEE80211_ELEMID_MESHPREP: frm = hwmp_add_meshprep(frm, (struct ieee80211_meshprep_ie *)ie); break; case IEEE80211_ELEMID_MESHPERR: frm = hwmp_add_meshperr(frm, (struct ieee80211_meshperr_ie *)ie); break; case IEEE80211_ELEMID_MESHRANN: frm = hwmp_add_meshrann(frm, (struct ieee80211_meshrann_ie *)ie); break; } m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); M_PREPEND(m, sizeof(struct ieee80211_frame), M_DONTWAIT); if (m == NULL) { ieee80211_free_node(ni); vap->iv_stats.is_tx_nobuf++; return ENOMEM; } ieee80211_send_setup(ni, m, IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_ACTION, IEEE80211_NONQOS_TID, sa, da, sa); m->m_flags |= M_ENCAP; /* mark encapsulated */ IEEE80211_NODE_STAT(ni, tx_mgmt); memset(¶ms, 0, sizeof(params)); params.ibp_pri = WME_AC_VO; params.ibp_rate0 = ni->ni_txparms->mgmtrate; if (IEEE80211_IS_MULTICAST(da)) params.ibp_try0 = 1; else params.ibp_try0 = ni->ni_txparms->maxretry; params.ibp_power = ni->ni_txpower; return ic->ic_raw_xmit(ni, m, ¶ms); } #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) #define ADDWORD(frm, v) do { \ LE_WRITE_4(frm, v); \ frm += 4; \ } while (0) /* * Add a Mesh Path Request IE to a frame. */ static uint8_t * hwmp_add_meshpreq(uint8_t *frm, const struct ieee80211_meshpreq_ie *preq) { int i; *frm++ = IEEE80211_ELEMID_MESHPREQ; *frm++ = sizeof(struct ieee80211_meshpreq_ie) - 2 + (preq->preq_tcount - 1) * sizeof(*preq->preq_targets); *frm++ = preq->preq_flags; *frm++ = preq->preq_hopcount; *frm++ = preq->preq_ttl; ADDWORD(frm, preq->preq_id); IEEE80211_ADDR_COPY(frm, preq->preq_origaddr); frm += 6; ADDWORD(frm, preq->preq_origseq); ADDWORD(frm, preq->preq_lifetime); ADDWORD(frm, preq->preq_metric); *frm++ = preq->preq_tcount; for (i = 0; i < preq->preq_tcount; i++) { *frm++ = preq->preq_targets[i].target_flags; IEEE80211_ADDR_COPY(frm, preq->preq_targets[i].target_addr); frm += 6; ADDWORD(frm, preq->preq_targets[i].target_seq); } return frm; } /* * Add a Mesh Path Reply IE to a frame. */ static uint8_t * hwmp_add_meshprep(uint8_t *frm, const struct ieee80211_meshprep_ie *prep) { *frm++ = IEEE80211_ELEMID_MESHPREP; *frm++ = sizeof(struct ieee80211_meshprep_ie) - 2; *frm++ = prep->prep_flags; *frm++ = prep->prep_hopcount; *frm++ = prep->prep_ttl; IEEE80211_ADDR_COPY(frm, prep->prep_targetaddr); frm += 6; ADDWORD(frm, prep->prep_targetseq); ADDWORD(frm, prep->prep_lifetime); ADDWORD(frm, prep->prep_metric); IEEE80211_ADDR_COPY(frm, prep->prep_origaddr); frm += 6; ADDWORD(frm, prep->prep_origseq); return frm; } /* * Add a Mesh Path Error IE to a frame. */ static uint8_t * hwmp_add_meshperr(uint8_t *frm, const struct ieee80211_meshperr_ie *perr) { int i; *frm++ = IEEE80211_ELEMID_MESHPERR; *frm++ = sizeof(struct ieee80211_meshperr_ie) - 2 + (perr->perr_ndests - 1) * sizeof(*perr->perr_dests); *frm++ = perr->perr_ttl; *frm++ = perr->perr_ndests; for (i = 0; i < perr->perr_ndests; i++) { *frm++ = perr->perr_dests[i].dest_flags; IEEE80211_ADDR_COPY(frm, perr->perr_dests[i].dest_addr); frm += 6; ADDWORD(frm, perr->perr_dests[i].dest_seq); ADDSHORT(frm, perr->perr_dests[i].dest_rcode); } return frm; } /* * Add a Root Annoucement IE to a frame. */ static uint8_t * hwmp_add_meshrann(uint8_t *frm, const struct ieee80211_meshrann_ie *rann) { *frm++ = IEEE80211_ELEMID_MESHRANN; *frm++ = sizeof(struct ieee80211_meshrann_ie) - 2; *frm++ = rann->rann_flags; *frm++ = rann->rann_hopcount; *frm++ = rann->rann_ttl; IEEE80211_ADDR_COPY(frm, rann->rann_addr); frm += 6; ADDWORD(frm, rann->rann_seq); ADDWORD(frm, rann->rann_metric); return frm; } static void hwmp_rootmode_setup(struct ieee80211vap *vap) { struct ieee80211_hwmp_state *hs = vap->iv_hwmp; switch (hs->hs_rootmode) { case IEEE80211_HWMP_ROOTMODE_DISABLED: callout_drain(&hs->hs_roottimer); break; case IEEE80211_HWMP_ROOTMODE_NORMAL: case IEEE80211_HWMP_ROOTMODE_PROACTIVE: callout_reset(&hs->hs_roottimer, ieee80211_hwmp_rootint, hwmp_rootmode_cb, vap); break; case IEEE80211_HWMP_ROOTMODE_RANN: callout_reset(&hs->hs_roottimer, ieee80211_hwmp_rannint, hwmp_rootmode_rann_cb, vap); break; } } /* * Send a broadcast Path Request to find all nodes on the mesh. We are * called when the vap is configured as a HWMP root node. */ #define PREQ_TFLAGS(n) preq.preq_targets[n].target_flags #define PREQ_TADDR(n) preq.preq_targets[n].target_addr #define PREQ_TSEQ(n) preq.preq_targets[n].target_seq static void hwmp_rootmode_cb(void *arg) { struct ieee80211vap *vap = (struct ieee80211vap *)arg; struct ieee80211_hwmp_state *hs = vap->iv_hwmp; struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_meshpreq_ie preq; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, vap->iv_bss, "%s", "send broadcast PREQ"); preq.preq_flags = IEEE80211_MESHPREQ_FLAGS_AM; if (ms->ms_flags & IEEE80211_MESHFLAGS_PORTAL) preq.preq_flags |= IEEE80211_MESHPREQ_FLAGS_PR; if (hs->hs_rootmode == IEEE80211_HWMP_ROOTMODE_PROACTIVE) preq.preq_flags |= IEEE80211_MESHPREQ_FLAGS_PP; preq.preq_hopcount = 0; preq.preq_ttl = ms->ms_ttl; preq.preq_id = ++hs->hs_preqid; IEEE80211_ADDR_COPY(preq.preq_origaddr, vap->iv_myaddr); preq.preq_origseq = ++hs->hs_seq; preq.preq_lifetime = ticks_to_msecs(ieee80211_hwmp_roottimeout); preq.preq_metric = IEEE80211_MESHLMETRIC_INITIALVAL; preq.preq_tcount = 1; IEEE80211_ADDR_COPY(PREQ_TADDR(0), broadcastaddr); PREQ_TFLAGS(0) = IEEE80211_MESHPREQ_TFLAGS_TO | IEEE80211_MESHPREQ_TFLAGS_RF; PREQ_TSEQ(0) = 0; vap->iv_stats.is_hwmp_rootreqs++; hwmp_send_preq(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &preq); hwmp_rootmode_setup(vap); } #undef PREQ_TFLAGS #undef PREQ_TADDR #undef PREQ_TSEQ /* * Send a Root Annoucement (RANN) to find all the nodes on the mesh. We are * called when the vap is configured as a HWMP RANN root node. */ static void hwmp_rootmode_rann_cb(void *arg) { struct ieee80211vap *vap = (struct ieee80211vap *)arg; struct ieee80211_hwmp_state *hs = vap->iv_hwmp; struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_meshrann_ie rann; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, vap->iv_bss, "%s", "send broadcast RANN"); rann.rann_flags = 0; if (ms->ms_flags & IEEE80211_MESHFLAGS_PORTAL) rann.rann_flags |= IEEE80211_MESHRANN_FLAGS_PR; rann.rann_hopcount = 0; rann.rann_ttl = ms->ms_ttl; IEEE80211_ADDR_COPY(rann.rann_addr, vap->iv_myaddr); rann.rann_seq = ++hs->hs_seq; rann.rann_metric = IEEE80211_MESHLMETRIC_INITIALVAL; vap->iv_stats.is_hwmp_rootrann++; hwmp_send_rann(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &rann); hwmp_rootmode_setup(vap); } #define PREQ_TFLAGS(n) preq->preq_targets[n].target_flags #define PREQ_TADDR(n) preq->preq_targets[n].target_addr #define PREQ_TSEQ(n) preq->preq_targets[n].target_seq static void hwmp_recv_preq(struct ieee80211vap *vap, struct ieee80211_node *ni, const struct ieee80211_frame *wh, const struct ieee80211_meshpreq_ie *preq) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt = NULL; struct ieee80211_mesh_route *rtorig = NULL; struct ieee80211_hwmp_route *hrorig; struct ieee80211_hwmp_state *hs = vap->iv_hwmp; struct ieee80211_meshprep_ie prep; if (ni == vap->iv_bss || ni->ni_mlstate != IEEE80211_NODE_MESH_ESTABLISHED) return; /* * Ignore PREQs from us. Could happen because someone forward it * back to us. */ if (IEEE80211_ADDR_EQ(vap->iv_myaddr, preq->preq_origaddr)) return; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "received PREQ, source %s", ether_sprintf(preq->preq_origaddr)); /* * Acceptance criteria: if the PREQ is not for us and * forwarding is disabled, discard this PREQ. */ if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, PREQ_TADDR(0)) && !(ms->ms_flags & IEEE80211_MESHFLAGS_FWD)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_HWMP, preq->preq_origaddr, NULL, "%s", "not accepting PREQ"); return; } rtorig = ieee80211_mesh_rt_find(vap, preq->preq_origaddr); if (rtorig == NULL) rtorig = ieee80211_mesh_rt_add(vap, preq->preq_origaddr); if (rtorig == NULL) { /* XXX stat */ return; } hrorig = IEEE80211_MESH_ROUTE_PRIV(rtorig, struct ieee80211_hwmp_route); /* * Sequence number validation. */ if (HWMP_SEQ_LEQ(preq->preq_id, hrorig->hr_preqid) && HWMP_SEQ_LEQ(preq->preq_origseq, hrorig->hr_seq)) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "discard PREQ from %s, old seq no %u <= %u", ether_sprintf(preq->preq_origaddr), preq->preq_origseq, hrorig->hr_seq); return; } hrorig->hr_preqid = preq->preq_id; hrorig->hr_seq = preq->preq_origseq; /* * Check if the PREQ is addressed to us. */ if (IEEE80211_ADDR_EQ(vap->iv_myaddr, PREQ_TADDR(0))) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "reply to %s", ether_sprintf(preq->preq_origaddr)); /* * Build and send a PREP frame. */ prep.prep_flags = 0; prep.prep_hopcount = 0; prep.prep_ttl = ms->ms_ttl; IEEE80211_ADDR_COPY(prep.prep_targetaddr, vap->iv_myaddr); prep.prep_targetseq = ++hs->hs_seq; prep.prep_lifetime = preq->preq_lifetime; prep.prep_metric = IEEE80211_MESHLMETRIC_INITIALVAL; IEEE80211_ADDR_COPY(prep.prep_origaddr, preq->preq_origaddr); prep.prep_origseq = preq->preq_origseq; hwmp_send_prep(ni, vap->iv_myaddr, wh->i_addr2, &prep); /* * Build the reverse path, if we don't have it already. */ rt = ieee80211_mesh_rt_find(vap, preq->preq_origaddr); if (rt == NULL) hwmp_discover(vap, preq->preq_origaddr, NULL); else if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0) hwmp_discover(vap, rt->rt_dest, NULL); return; } /* * Proactive PREQ: reply with a proactive PREP to the * root STA if requested. */ if (IEEE80211_ADDR_EQ(PREQ_TADDR(0), broadcastaddr) && (PREQ_TFLAGS(0) & ((IEEE80211_MESHPREQ_TFLAGS_TO|IEEE80211_MESHPREQ_TFLAGS_RF) == (IEEE80211_MESHPREQ_TFLAGS_TO|IEEE80211_MESHPREQ_TFLAGS_RF)))) { uint8_t rootmac[IEEE80211_ADDR_LEN]; IEEE80211_ADDR_COPY(rootmac, preq->preq_origaddr); rt = ieee80211_mesh_rt_find(vap, rootmac); if (rt == NULL) { rt = ieee80211_mesh_rt_add(vap, rootmac); if (rt == NULL) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "unable to add root mesh path to %s", ether_sprintf(rootmac)); vap->iv_stats.is_mesh_rtaddfailed++; return; } } IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "root mesh station @ %s", ether_sprintf(rootmac)); /* * Reply with a PREP if we don't have a path to the root * or if the root sent us a proactive PREQ. */ if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0 || (preq->preq_flags & IEEE80211_MESHPREQ_FLAGS_PP)) { prep.prep_flags = 0; prep.prep_hopcount = 0; prep.prep_ttl = ms->ms_ttl; IEEE80211_ADDR_COPY(prep.prep_origaddr, rootmac); prep.prep_origseq = preq->preq_origseq; prep.prep_lifetime = preq->preq_lifetime; prep.prep_metric = IEEE80211_MESHLMETRIC_INITIALVAL; IEEE80211_ADDR_COPY(prep.prep_targetaddr, vap->iv_myaddr); prep.prep_targetseq = ++hs->hs_seq; hwmp_send_prep(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &prep); } hwmp_discover(vap, rootmac, NULL); return; } rt = ieee80211_mesh_rt_find(vap, PREQ_TADDR(0)); /* * Forwarding and Intermediate reply for PREQs with 1 target. */ if (preq->preq_tcount == 1) { struct ieee80211_meshpreq_ie ppreq; /* propagated PREQ */ memcpy(&ppreq, preq, sizeof(ppreq)); /* * We have a valid route to this node. */ if (rt != NULL && (rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID)) { if (preq->preq_ttl > 1 && preq->preq_hopcount < hs->hs_maxhops) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "forward PREQ from %s", ether_sprintf(preq->preq_origaddr)); /* * Propagate the original PREQ. */ ppreq.preq_hopcount += 1; ppreq.preq_ttl -= 1; ppreq.preq_metric += ms->ms_pmetric->mpm_metric(ni); /* * Set TO and unset RF bits because we are going * to send a PREP next. */ ppreq.preq_targets[0].target_flags |= IEEE80211_MESHPREQ_TFLAGS_TO; ppreq.preq_targets[0].target_flags &= ~IEEE80211_MESHPREQ_TFLAGS_RF; hwmp_send_preq(ni, vap->iv_myaddr, broadcastaddr, &ppreq); } /* * Check if we can send an intermediate Path Reply, * i.e., Target Only bit is not set. */ if (!(PREQ_TFLAGS(0) & IEEE80211_MESHPREQ_TFLAGS_TO)) { struct ieee80211_meshprep_ie prep; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "intermediate reply for PREQ from %s", ether_sprintf(preq->preq_origaddr)); prep.prep_flags = 0; prep.prep_hopcount = rt->rt_nhops + 1; prep.prep_ttl = ms->ms_ttl; IEEE80211_ADDR_COPY(&prep.prep_targetaddr, PREQ_TADDR(0)); prep.prep_targetseq = hrorig->hr_seq; prep.prep_lifetime = preq->preq_lifetime; prep.prep_metric = rt->rt_metric + ms->ms_pmetric->mpm_metric(ni); IEEE80211_ADDR_COPY(&prep.prep_origaddr, preq->preq_origaddr); prep.prep_origseq = hrorig->hr_seq; hwmp_send_prep(ni, vap->iv_myaddr, broadcastaddr, &prep); } /* * We have no information about this path, * propagate the PREQ. */ } else if (preq->preq_ttl > 1 && preq->preq_hopcount < hs->hs_maxhops) { if (rt == NULL) { rt = ieee80211_mesh_rt_add(vap, PREQ_TADDR(0)); if (rt == NULL) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "unable to add PREQ path to %s", ether_sprintf(PREQ_TADDR(0))); vap->iv_stats.is_mesh_rtaddfailed++; return; } } rt->rt_metric = preq->preq_metric; rt->rt_lifetime = preq->preq_lifetime; hrorig = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); hrorig->hr_seq = preq->preq_origseq; hrorig->hr_preqid = preq->preq_id; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "forward PREQ from %s", ether_sprintf(preq->preq_origaddr)); ppreq.preq_hopcount += 1; ppreq.preq_ttl -= 1; ppreq.preq_metric += ms->ms_pmetric->mpm_metric(ni); hwmp_send_preq(ni, vap->iv_myaddr, broadcastaddr, &ppreq); } } } #undef PREQ_TFLAGS #undef PREQ_TADDR #undef PREQ_TSEQ static int hwmp_send_preq(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], struct ieee80211_meshpreq_ie *preq) { struct ieee80211_hwmp_state *hs = ni->ni_vap->iv_hwmp; /* * Enforce PREQ interval. */ if (ratecheck(&hs->hs_lastpreq, &ieee80211_hwmp_preqminint) == 0) return EALREADY; getmicrouptime(&hs->hs_lastpreq); /* * mesh preq action frame format * [6] da * [6] sa * [6] addr3 = sa * [1] action * [1] category * [tlv] mesh path request */ preq->preq_ie = IEEE80211_ELEMID_MESHPREQ; return hwmp_send_action(ni, sa, da, (uint8_t *)preq, sizeof(struct ieee80211_meshpreq_ie)); } static void hwmp_recv_prep(struct ieee80211vap *vap, struct ieee80211_node *ni, const struct ieee80211_frame *wh, const struct ieee80211_meshprep_ie *prep) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_hwmp_state *hs = vap->iv_hwmp; struct ieee80211_mesh_route *rt = NULL; struct ieee80211_hwmp_route *hr; struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; struct mbuf *m, *next; /* * Acceptance criteria: if the corresponding PREQ was not generated * by us and forwarding is disabled, discard this PREP. */ if (ni == vap->iv_bss || ni->ni_mlstate != IEEE80211_NODE_MESH_ESTABLISHED) return; if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, prep->prep_origaddr) && !(ms->ms_flags & IEEE80211_MESHFLAGS_FWD)) return; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "received PREP from %s", ether_sprintf(prep->prep_targetaddr)); rt = ieee80211_mesh_rt_find(vap, prep->prep_targetaddr); if (rt == NULL) { /* * If we have no entry this could be a reply to a root PREQ. */ if (hs->hs_rootmode != IEEE80211_HWMP_ROOTMODE_DISABLED) { rt = ieee80211_mesh_rt_add(vap, prep->prep_targetaddr); if (rt == NULL) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "unable to add PREP path to %s", ether_sprintf(prep->prep_targetaddr)); vap->iv_stats.is_mesh_rtaddfailed++; return; } IEEE80211_ADDR_COPY(rt->rt_nexthop, wh->i_addr2); rt->rt_nhops = prep->prep_hopcount; rt->rt_lifetime = prep->prep_lifetime; rt->rt_metric = prep->prep_metric; rt->rt_flags |= IEEE80211_MESHRT_FLAGS_VALID; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "add root path to %s nhops %d metric %d (PREP)", ether_sprintf(prep->prep_targetaddr), rt->rt_nhops, rt->rt_metric); return; } return; } /* * Sequence number validation. */ hr = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); if (HWMP_SEQ_LEQ(prep->prep_targetseq, hr->hr_seq)) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "discard PREP from %s, old seq no %u <= %u", ether_sprintf(prep->prep_targetaddr), prep->prep_targetseq, hr->hr_seq); return; } hr->hr_seq = prep->prep_targetseq; /* * If it's NOT for us, propagate the PREP. */ if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, prep->prep_origaddr) && prep->prep_ttl > 1 && prep->prep_hopcount < hs->hs_maxhops) { struct ieee80211_meshprep_ie pprep; /* propagated PREP */ IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "propagate PREP from %s", ether_sprintf(prep->prep_targetaddr)); memcpy(&pprep, prep, sizeof(pprep)); pprep.prep_hopcount += 1; pprep.prep_ttl -= 1; pprep.prep_metric += ms->ms_pmetric->mpm_metric(ni); IEEE80211_ADDR_COPY(pprep.prep_targetaddr, vap->iv_myaddr); hwmp_send_prep(ni, vap->iv_myaddr, broadcastaddr, &pprep); } hr = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); if (rt->rt_flags & IEEE80211_MESHRT_FLAGS_PROXY) { /* NB: never clobber a proxy entry */; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "discard PREP for %s, route is marked PROXY", ether_sprintf(prep->prep_targetaddr)); vap->iv_stats.is_hwmp_proxy++; } else if (prep->prep_origseq == hr->hr_origseq) { /* * Check if we already have a path to this node. * If we do, check if this path reply contains a * better route. */ if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0 || (prep->prep_hopcount < rt->rt_nhops || prep->prep_metric < rt->rt_metric)) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "%s path to %s, hopcount %d:%d metric %d:%d", rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID ? "prefer" : "update", ether_sprintf(prep->prep_origaddr), rt->rt_nhops, prep->prep_hopcount, rt->rt_metric, prep->prep_metric); IEEE80211_ADDR_COPY(rt->rt_nexthop, wh->i_addr2); rt->rt_nhops = prep->prep_hopcount; rt->rt_lifetime = prep->prep_lifetime; rt->rt_metric = prep->prep_metric; rt->rt_flags |= IEEE80211_MESHRT_FLAGS_VALID; } else { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "ignore PREP for %s, hopcount %d:%d metric %d:%d", ether_sprintf(prep->prep_targetaddr), rt->rt_nhops, prep->prep_hopcount, rt->rt_metric, prep->prep_metric); } } else { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "discard PREP for %s, wrong seqno %u != %u", ether_sprintf(prep->prep_targetaddr), prep->prep_origseq, hr->hr_seq); vap->iv_stats.is_hwmp_wrongseq++; } /* * Check for frames queued awaiting path discovery. * XXX probably can tell exactly and avoid remove call * NB: hash may have false matches, if so they will get * stuck back on the stageq because there won't be * a path. */ m = ieee80211_ageq_remove(&ic->ic_stageq, (struct ieee80211_node *)(uintptr_t) ieee80211_mac_hash(ic, rt->rt_dest)); for (; m != NULL; m = next) { next = m->m_nextpkt; m->m_nextpkt = NULL; IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "flush queued frame %p len %d", m, m->m_pkthdr.len); ifp->if_transmit(ifp, m); } } static int hwmp_send_prep(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], struct ieee80211_meshprep_ie *prep) { /* NB: there's no PREP minimum interval. */ /* * mesh prep action frame format * [6] da * [6] sa * [6] addr3 = sa * [1] action * [1] category * [tlv] mesh path reply */ prep->prep_ie = IEEE80211_ELEMID_MESHPREP; return hwmp_send_action(ni, sa, da, (uint8_t *)prep, sizeof(struct ieee80211_meshprep_ie)); } #define PERR_DFLAGS(n) perr.perr_dests[n].dest_flags #define PERR_DADDR(n) perr.perr_dests[n].dest_addr #define PERR_DSEQ(n) perr.perr_dests[n].dest_seq #define PERR_DRCODE(n) perr.perr_dests[n].dest_rcode static void hwmp_peerdown(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_meshperr_ie perr; struct ieee80211_mesh_route *rt; struct ieee80211_hwmp_route *hr; rt = ieee80211_mesh_rt_find(vap, ni->ni_macaddr); if (rt == NULL) return; hr = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "%s", "delete route entry"); perr.perr_ttl = ms->ms_ttl; perr.perr_ndests = 1; PERR_DFLAGS(0) = 0; if (hr->hr_seq == 0) PERR_DFLAGS(0) |= IEEE80211_MESHPERR_DFLAGS_USN; PERR_DFLAGS(0) |= IEEE80211_MESHPERR_DFLAGS_RC; IEEE80211_ADDR_COPY(PERR_DADDR(0), rt->rt_dest); PERR_DSEQ(0) = hr->hr_seq; PERR_DRCODE(0) = IEEE80211_REASON_MESH_PERR_DEST_UNREACH; /* NB: flush everything passing through peer */ ieee80211_mesh_rt_flush_peer(vap, ni->ni_macaddr); hwmp_send_perr(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &perr); } #undef PERR_DFLAGS #undef PERR_DADDR #undef PERR_DSEQ #undef PERR_DRCODE #define PERR_DFLAGS(n) perr->perr_dests[n].dest_flags #define PERR_DADDR(n) perr->perr_dests[n].dest_addr #define PERR_DSEQ(n) perr->perr_dests[n].dest_seq #define PERR_DRCODE(n) perr->perr_dests[n].dest_rcode static void hwmp_recv_perr(struct ieee80211vap *vap, struct ieee80211_node *ni, const struct ieee80211_frame *wh, const struct ieee80211_meshperr_ie *perr) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt = NULL; struct ieee80211_hwmp_route *hr; struct ieee80211_meshperr_ie pperr; int i, forward = 0; /* * Acceptance criteria: check if we received a PERR from a * neighbor and forwarding is enabled. */ if (ni == vap->iv_bss || ni->ni_mlstate != IEEE80211_NODE_MESH_ESTABLISHED || !(ms->ms_flags & IEEE80211_MESHFLAGS_FWD)) return; /* * Find all routing entries that match and delete them. */ for (i = 0; i < perr->perr_ndests; i++) { rt = ieee80211_mesh_rt_find(vap, PERR_DADDR(i)); if (rt == NULL) continue; hr = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); if (!(PERR_DFLAGS(0) & IEEE80211_MESHPERR_DFLAGS_USN) && HWMP_SEQ_GEQ(PERR_DSEQ(i), hr->hr_seq)) { ieee80211_mesh_rt_del(vap, rt->rt_dest); ieee80211_mesh_rt_flush_peer(vap, rt->rt_dest); rt = NULL; forward = 1; } } /* * Propagate the PERR if we previously found it on our routing table. * XXX handle ndest > 1 */ if (forward && perr->perr_ttl > 1) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "propagate PERR from %s", ether_sprintf(wh->i_addr2)); memcpy(&pperr, perr, sizeof(*perr)); pperr.perr_ttl--; hwmp_send_perr(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &pperr); } } #undef PEER_DADDR #undef PERR_DSEQ static int hwmp_send_perr(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], struct ieee80211_meshperr_ie *perr) { struct ieee80211_hwmp_state *hs = ni->ni_vap->iv_hwmp; /* * Enforce PERR interval. */ if (ratecheck(&hs->hs_lastperr, &ieee80211_hwmp_perrminint) == 0) return EALREADY; getmicrouptime(&hs->hs_lastperr); /* * mesh perr action frame format * [6] da * [6] sa * [6] addr3 = sa * [1] action * [1] category * [tlv] mesh path error */ perr->perr_ie = IEEE80211_ELEMID_MESHPERR; return hwmp_send_action(ni, sa, da, (uint8_t *)perr, sizeof(struct ieee80211_meshperr_ie)); } static void hwmp_recv_rann(struct ieee80211vap *vap, struct ieee80211_node *ni, const struct ieee80211_frame *wh, const struct ieee80211_meshrann_ie *rann) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_hwmp_state *hs = vap->iv_hwmp; struct ieee80211_mesh_route *rt = NULL; struct ieee80211_hwmp_route *hr; struct ieee80211_meshrann_ie prann; if (ni == vap->iv_bss || ni->ni_mlstate != IEEE80211_NODE_MESH_ESTABLISHED || IEEE80211_ADDR_EQ(rann->rann_addr, vap->iv_myaddr)) return; rt = ieee80211_mesh_rt_find(vap, rann->rann_addr); /* * Discover the path to the root mesh STA. * If we already know it, propagate the RANN element. */ if (rt == NULL) { hwmp_discover(vap, rann->rann_addr, NULL); return; } hr = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); if (HWMP_SEQ_GT(rann->rann_seq, hr->hr_seq)) { hr->hr_seq = rann->rann_seq; if (rann->rann_ttl > 1 && rann->rann_hopcount < hs->hs_maxhops && (ms->ms_flags & IEEE80211_MESHFLAGS_FWD)) { memcpy(&prann, rann, sizeof(prann)); prann.rann_hopcount += 1; prann.rann_ttl -= 1; prann.rann_metric += ms->ms_pmetric->mpm_metric(ni); hwmp_send_rann(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &prann); } } } static int hwmp_send_rann(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], struct ieee80211_meshrann_ie *rann) { /* * mesh rann action frame format * [6] da * [6] sa * [6] addr3 = sa * [1] action * [1] category * [tlv] root annoucement */ rann->rann_ie = IEEE80211_ELEMID_MESHRANN; return hwmp_send_action(ni, sa, da, (uint8_t *)rann, sizeof(struct ieee80211_meshrann_ie)); } #define PREQ_TFLAGS(n) preq.preq_targets[n].target_flags #define PREQ_TADDR(n) preq.preq_targets[n].target_addr #define PREQ_TSEQ(n) preq.preq_targets[n].target_seq static struct ieee80211_node * hwmp_discover(struct ieee80211vap *vap, const uint8_t dest[IEEE80211_ADDR_LEN], struct mbuf *m) { struct ieee80211_hwmp_state *hs = vap->iv_hwmp; struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt = NULL; struct ieee80211_hwmp_route *hr; struct ieee80211_meshpreq_ie preq; struct ieee80211_node *ni; int sendpreq = 0; KASSERT(vap->iv_opmode == IEEE80211_M_MBSS, ("not a mesh vap, opmode %d", vap->iv_opmode)); KASSERT(!IEEE80211_ADDR_EQ(vap->iv_myaddr, dest), ("%s: discovering self!", __func__)); ni = NULL; if (!IEEE80211_IS_MULTICAST(dest)) { rt = ieee80211_mesh_rt_find(vap, dest); if (rt == NULL) { rt = ieee80211_mesh_rt_add(vap, dest); if (rt == NULL) { IEEE80211_NOTE(vap, IEEE80211_MSG_HWMP, ni, "unable to add discovery path to %s", ether_sprintf(dest)); vap->iv_stats.is_mesh_rtaddfailed++; goto done; } } hr = IEEE80211_MESH_ROUTE_PRIV(rt, struct ieee80211_hwmp_route); if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0) { if (hr->hr_origseq == 0) hr->hr_origseq = ++hs->hs_seq; rt->rt_metric = IEEE80211_MESHLMETRIC_INITIALVAL; rt->rt_lifetime = ticks_to_msecs(ieee80211_hwmp_pathtimeout); /* XXX check preq retries */ sendpreq = 1; IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_HWMP, dest, "start path discovery (src %s)", m == NULL ? "" : ether_sprintf( mtod(m, struct ether_header *)->ether_shost)); /* * Try to discover the path for this node. */ preq.preq_flags = 0; preq.preq_hopcount = 0; preq.preq_ttl = ms->ms_ttl; preq.preq_id = ++hs->hs_preqid; IEEE80211_ADDR_COPY(preq.preq_origaddr, vap->iv_myaddr); preq.preq_origseq = hr->hr_origseq; preq.preq_lifetime = rt->rt_lifetime; preq.preq_metric = rt->rt_metric; preq.preq_tcount = 1; IEEE80211_ADDR_COPY(PREQ_TADDR(0), dest); PREQ_TFLAGS(0) = 0; if (ieee80211_hwmp_targetonly) PREQ_TFLAGS(0) |= IEEE80211_MESHPREQ_TFLAGS_TO; if (ieee80211_hwmp_replyforward) PREQ_TFLAGS(0) |= IEEE80211_MESHPREQ_TFLAGS_RF; PREQ_TFLAGS(0) |= IEEE80211_MESHPREQ_TFLAGS_USN; PREQ_TSEQ(0) = 0; /* XXX check return value */ hwmp_send_preq(vap->iv_bss, vap->iv_myaddr, broadcastaddr, &preq); } if (rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) ni = ieee80211_find_txnode(vap, rt->rt_nexthop); } else { ni = ieee80211_find_txnode(vap, dest); /* NB: if null then we leak mbuf */ KASSERT(ni != NULL, ("leak mcast frame")); return ni; } done: if (ni == NULL && m != NULL) { if (sendpreq) { struct ieee80211com *ic = vap->iv_ic; /* * Queue packet for transmit when path discovery * completes. If discovery never completes the * frame will be flushed by way of the aging timer. */ IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_HWMP, dest, "%s", "queue frame until path found"); m->m_pkthdr.rcvif = (void *)(uintptr_t) ieee80211_mac_hash(ic, dest); /* XXX age chosen randomly */ ieee80211_ageq_append(&ic->ic_stageq, m, IEEE80211_INACT_WAIT); } else { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_HWMP, dest, NULL, "%s", "no valid path to this node"); m_freem(m); } } return ni; } #undef PREQ_TFLAGS #undef PREQ_TADDR #undef PREQ_TSEQ static int hwmp_ioctl_get80211(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_hwmp_state *hs = vap->iv_hwmp; int error; if (vap->iv_opmode != IEEE80211_M_MBSS) return ENOSYS; error = 0; switch (ireq->i_type) { case IEEE80211_IOC_HWMP_ROOTMODE: ireq->i_val = hs->hs_rootmode; break; case IEEE80211_IOC_HWMP_MAXHOPS: ireq->i_val = hs->hs_maxhops; break; default: return ENOSYS; } return error; } IEEE80211_IOCTL_GET(hwmp, hwmp_ioctl_get80211); static int hwmp_ioctl_set80211(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_hwmp_state *hs = vap->iv_hwmp; int error; if (vap->iv_opmode != IEEE80211_M_MBSS) return ENOSYS; error = 0; switch (ireq->i_type) { case IEEE80211_IOC_HWMP_ROOTMODE: if (ireq->i_val < 0 || ireq->i_val > 3) return EINVAL; hs->hs_rootmode = ireq->i_val; hwmp_rootmode_setup(vap); break; case IEEE80211_IOC_HWMP_MAXHOPS: if (ireq->i_val <= 0 || ireq->i_val > 255) return EINVAL; hs->hs_maxhops = ireq->i_val; break; default: return ENOSYS; } return error; } IEEE80211_IOCTL_SET(hwmp, hwmp_ioctl_set80211); Index: stable/9/sys/net80211/ieee80211_mesh.c =================================================================== --- stable/9/sys/net80211/ieee80211_mesh.c (revision 273911) +++ stable/9/sys/net80211/ieee80211_mesh.c (revision 273912) @@ -1,2765 +1,2765 @@ /*- * Copyright (c) 2009 The FreeBSD Foundation * All rights reserved. * * This software was developed by Rui Paulo under sponsorship from the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #ifdef __FreeBSD__ __FBSDID("$FreeBSD$"); #endif /* * IEEE 802.11s Mesh Point (MBSS) support. * * Based on March 2009, D3.0 802.11s draft spec. */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void mesh_rt_flush_invalid(struct ieee80211vap *); static int mesh_select_proto_path(struct ieee80211vap *, const char *); static int mesh_select_proto_metric(struct ieee80211vap *, const char *); static void mesh_vattach(struct ieee80211vap *); static int mesh_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void mesh_rt_cleanup_cb(void *); static void mesh_linkchange(struct ieee80211_node *, enum ieee80211_mesh_mlstate); static void mesh_checkid(void *, struct ieee80211_node *); static uint32_t mesh_generateid(struct ieee80211vap *); static int mesh_checkpseq(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN], uint32_t); static struct ieee80211_node * mesh_find_txnode(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void mesh_forward(struct ieee80211vap *, struct mbuf *, const struct ieee80211_meshcntl *); static int mesh_input(struct ieee80211_node *, struct mbuf *, int, int); static void mesh_recv_mgmt(struct ieee80211_node *, struct mbuf *, int, int, int); static void mesh_recv_ctl(struct ieee80211_node *, struct mbuf *, int); static void mesh_peer_timeout_setup(struct ieee80211_node *); static void mesh_peer_timeout_backoff(struct ieee80211_node *); static void mesh_peer_timeout_cb(void *); static __inline void mesh_peer_timeout_stop(struct ieee80211_node *); static int mesh_verify_meshid(struct ieee80211vap *, const uint8_t *); static int mesh_verify_meshconf(struct ieee80211vap *, const uint8_t *); static int mesh_verify_meshpeer(struct ieee80211vap *, uint8_t, const uint8_t *); uint32_t mesh_airtime_calc(struct ieee80211_node *); /* * Timeout values come from the specification and are in milliseconds. */ static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD, 0, "IEEE 802.11s parameters"); static int ieee80211_mesh_retrytimeout = -1; SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_mesh_retrytimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Retry timeout (msec)"); static int ieee80211_mesh_holdingtimeout = -1; SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_mesh_holdingtimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Holding state timeout (msec)"); static int ieee80211_mesh_confirmtimeout = -1; SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_mesh_confirmtimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Confirm state timeout (msec)"); static int ieee80211_mesh_maxretries = 2; -SYSCTL_INT(_net_wlan_mesh, OID_AUTO, maxretries, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_INT(_net_wlan_mesh, OID_AUTO, maxretries, CTLFLAG_RW, &ieee80211_mesh_maxretries, 0, "Maximum retries during peer link establishment"); static const uint8_t broadcastaddr[IEEE80211_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static ieee80211_recv_action_func mesh_recv_action_meshpeering_open; static ieee80211_recv_action_func mesh_recv_action_meshpeering_confirm; static ieee80211_recv_action_func mesh_recv_action_meshpeering_close; static ieee80211_recv_action_func mesh_recv_action_meshlmetric_req; static ieee80211_recv_action_func mesh_recv_action_meshlmetric_rep; static ieee80211_send_action_func mesh_send_action_meshpeering_open; static ieee80211_send_action_func mesh_send_action_meshpeering_confirm; static ieee80211_send_action_func mesh_send_action_meshpeering_close; static ieee80211_send_action_func mesh_send_action_meshlink_request; static ieee80211_send_action_func mesh_send_action_meshlink_reply; static const struct ieee80211_mesh_proto_metric mesh_metric_airtime = { .mpm_descr = "AIRTIME", .mpm_ie = IEEE80211_MESHCONF_METRIC_AIRTIME, .mpm_metric = mesh_airtime_calc, }; static struct ieee80211_mesh_proto_path mesh_proto_paths[4]; static struct ieee80211_mesh_proto_metric mesh_proto_metrics[4]; #define MESH_RT_LOCK(ms) mtx_lock(&(ms)->ms_rt_lock) #define MESH_RT_LOCK_ASSERT(ms) mtx_assert(&(ms)->ms_rt_lock, MA_OWNED) #define MESH_RT_UNLOCK(ms) mtx_unlock(&(ms)->ms_rt_lock) MALLOC_DEFINE(M_80211_MESH_RT, "80211mesh", "802.11s routing table"); /* * Helper functions to manipulate the Mesh routing table. */ static struct ieee80211_mesh_route * mesh_rt_find_locked(struct ieee80211_mesh_state *ms, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_route *rt; MESH_RT_LOCK_ASSERT(ms); TAILQ_FOREACH(rt, &ms->ms_routes, rt_next) { if (IEEE80211_ADDR_EQ(dest, rt->rt_dest)) return rt; } return NULL; } static struct ieee80211_mesh_route * mesh_rt_add_locked(struct ieee80211_mesh_state *ms, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_route *rt; KASSERT(!IEEE80211_ADDR_EQ(broadcastaddr, dest), ("%s: adding broadcast to the routing table", __func__)); MESH_RT_LOCK_ASSERT(ms); rt = malloc(ALIGN(sizeof(struct ieee80211_mesh_route)) + ms->ms_ppath->mpp_privlen, M_80211_MESH_RT, M_NOWAIT | M_ZERO); if (rt != NULL) { IEEE80211_ADDR_COPY(rt->rt_dest, dest); rt->rt_priv = (void *)ALIGN(&rt[1]); rt->rt_crtime = ticks; TAILQ_INSERT_TAIL(&ms->ms_routes, rt, rt_next); } return rt; } struct ieee80211_mesh_route * ieee80211_mesh_rt_find(struct ieee80211vap *vap, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt; MESH_RT_LOCK(ms); rt = mesh_rt_find_locked(ms, dest); MESH_RT_UNLOCK(ms); return rt; } struct ieee80211_mesh_route * ieee80211_mesh_rt_add(struct ieee80211vap *vap, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt; KASSERT(ieee80211_mesh_rt_find(vap, dest) == NULL, ("%s: duplicate entry in the routing table", __func__)); KASSERT(!IEEE80211_ADDR_EQ(vap->iv_myaddr, dest), ("%s: adding self to the routing table", __func__)); MESH_RT_LOCK(ms); rt = mesh_rt_add_locked(ms, dest); MESH_RT_UNLOCK(ms); return rt; } /* * Add a proxy route (as needed) for the specified destination. */ void ieee80211_mesh_proxy_check(struct ieee80211vap *vap, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt; MESH_RT_LOCK(ms); rt = mesh_rt_find_locked(ms, dest); if (rt == NULL) { rt = mesh_rt_add_locked(ms, dest); if (rt == NULL) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, dest, "%s", "unable to add proxy entry"); vap->iv_stats.is_mesh_rtaddfailed++; } else { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, dest, "%s", "add proxy entry"); IEEE80211_ADDR_COPY(rt->rt_nexthop, vap->iv_myaddr); rt->rt_flags |= IEEE80211_MESHRT_FLAGS_VALID | IEEE80211_MESHRT_FLAGS_PROXY; } /* XXX assert PROXY? */ } else if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0) { struct ieee80211com *ic = vap->iv_ic; /* * Fix existing entry created by received frames from * stations that have some memory of dest. We also * flush any frames held on the staging queue; delivering * them is too much trouble right now. */ IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, dest, "%s", "fix proxy entry"); IEEE80211_ADDR_COPY(rt->rt_nexthop, vap->iv_myaddr); rt->rt_flags |= IEEE80211_MESHRT_FLAGS_VALID | IEEE80211_MESHRT_FLAGS_PROXY; /* XXX belongs in hwmp */ ieee80211_ageq_drain_node(&ic->ic_stageq, (void *)(uintptr_t) ieee80211_mac_hash(ic, dest)); /* XXX stat? */ } MESH_RT_UNLOCK(ms); } static __inline void mesh_rt_del(struct ieee80211_mesh_state *ms, struct ieee80211_mesh_route *rt) { TAILQ_REMOVE(&ms->ms_routes, rt, rt_next); free(rt, M_80211_MESH_RT); } void ieee80211_mesh_rt_del(struct ieee80211vap *vap, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt, *next; MESH_RT_LOCK(ms); TAILQ_FOREACH_SAFE(rt, &ms->ms_routes, rt_next, next) { if (IEEE80211_ADDR_EQ(rt->rt_dest, dest)) { mesh_rt_del(ms, rt); MESH_RT_UNLOCK(ms); return; } } MESH_RT_UNLOCK(ms); } void ieee80211_mesh_rt_flush(struct ieee80211vap *vap) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt, *next; if (ms == NULL) return; MESH_RT_LOCK(ms); TAILQ_FOREACH_SAFE(rt, &ms->ms_routes, rt_next, next) mesh_rt_del(ms, rt); MESH_RT_UNLOCK(ms); } void ieee80211_mesh_rt_flush_peer(struct ieee80211vap *vap, const uint8_t peer[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt, *next; MESH_RT_LOCK(ms); TAILQ_FOREACH_SAFE(rt, &ms->ms_routes, rt_next, next) { if (IEEE80211_ADDR_EQ(rt->rt_nexthop, peer)) mesh_rt_del(ms, rt); } MESH_RT_UNLOCK(ms); } /* * Flush expired routing entries, i.e. those in invalid state for * some time. */ static void mesh_rt_flush_invalid(struct ieee80211vap *vap) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_mesh_route *rt, *next; if (ms == NULL) return; MESH_RT_LOCK(ms); TAILQ_FOREACH_SAFE(rt, &ms->ms_routes, rt_next, next) { if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0 && ticks - rt->rt_crtime >= ms->ms_ppath->mpp_inact) mesh_rt_del(ms, rt); } MESH_RT_UNLOCK(ms); } #define N(a) (sizeof(a) / sizeof(a[0])) int ieee80211_mesh_register_proto_path(const struct ieee80211_mesh_proto_path *mpp) { int i, firstempty = -1; for (i = 0; i < N(mesh_proto_paths); i++) { if (strncmp(mpp->mpp_descr, mesh_proto_paths[i].mpp_descr, IEEE80211_MESH_PROTO_DSZ) == 0) return EEXIST; if (!mesh_proto_paths[i].mpp_active && firstempty == -1) firstempty = i; } if (firstempty < 0) return ENOSPC; memcpy(&mesh_proto_paths[firstempty], mpp, sizeof(*mpp)); mesh_proto_paths[firstempty].mpp_active = 1; return 0; } int ieee80211_mesh_register_proto_metric(const struct ieee80211_mesh_proto_metric *mpm) { int i, firstempty = -1; for (i = 0; i < N(mesh_proto_metrics); i++) { if (strncmp(mpm->mpm_descr, mesh_proto_metrics[i].mpm_descr, IEEE80211_MESH_PROTO_DSZ) == 0) return EEXIST; if (!mesh_proto_metrics[i].mpm_active && firstempty == -1) firstempty = i; } if (firstempty < 0) return ENOSPC; memcpy(&mesh_proto_metrics[firstempty], mpm, sizeof(*mpm)); mesh_proto_metrics[firstempty].mpm_active = 1; return 0; } static int mesh_select_proto_path(struct ieee80211vap *vap, const char *name) { struct ieee80211_mesh_state *ms = vap->iv_mesh; int i; for (i = 0; i < N(mesh_proto_paths); i++) { if (strcasecmp(mesh_proto_paths[i].mpp_descr, name) == 0) { ms->ms_ppath = &mesh_proto_paths[i]; return 0; } } return ENOENT; } static int mesh_select_proto_metric(struct ieee80211vap *vap, const char *name) { struct ieee80211_mesh_state *ms = vap->iv_mesh; int i; for (i = 0; i < N(mesh_proto_metrics); i++) { if (strcasecmp(mesh_proto_metrics[i].mpm_descr, name) == 0) { ms->ms_pmetric = &mesh_proto_metrics[i]; return 0; } } return ENOENT; } #undef N static void ieee80211_mesh_init(void) { memset(mesh_proto_paths, 0, sizeof(mesh_proto_paths)); memset(mesh_proto_metrics, 0, sizeof(mesh_proto_metrics)); /* * Setup mesh parameters that depends on the clock frequency. */ ieee80211_mesh_retrytimeout = msecs_to_ticks(40); ieee80211_mesh_holdingtimeout = msecs_to_ticks(40); ieee80211_mesh_confirmtimeout = msecs_to_ticks(40); /* * Register action frame handlers. */ ieee80211_recv_action_register(IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_OPEN, mesh_recv_action_meshpeering_open); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, mesh_recv_action_meshpeering_confirm); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, mesh_recv_action_meshpeering_close); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_MESHLMETRIC, IEEE80211_ACTION_MESHLMETRIC_REQ, mesh_recv_action_meshlmetric_req); ieee80211_recv_action_register(IEEE80211_ACTION_CAT_MESHLMETRIC, IEEE80211_ACTION_MESHLMETRIC_REP, mesh_recv_action_meshlmetric_rep); ieee80211_send_action_register(IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_OPEN, mesh_send_action_meshpeering_open); ieee80211_send_action_register(IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, mesh_send_action_meshpeering_confirm); ieee80211_send_action_register(IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, mesh_send_action_meshpeering_close); ieee80211_send_action_register(IEEE80211_ACTION_CAT_MESHLMETRIC, IEEE80211_ACTION_MESHLMETRIC_REQ, mesh_send_action_meshlink_request); ieee80211_send_action_register(IEEE80211_ACTION_CAT_MESHLMETRIC, IEEE80211_ACTION_MESHLMETRIC_REP, mesh_send_action_meshlink_reply); /* * Register Airtime Link Metric. */ ieee80211_mesh_register_proto_metric(&mesh_metric_airtime); } SYSINIT(wlan_mesh, SI_SUB_DRIVERS, SI_ORDER_FIRST, ieee80211_mesh_init, NULL); void ieee80211_mesh_attach(struct ieee80211com *ic) { ic->ic_vattach[IEEE80211_M_MBSS] = mesh_vattach; } void ieee80211_mesh_detach(struct ieee80211com *ic) { } static void mesh_vdetach_peers(void *arg, struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; uint16_t args[3]; if (ni->ni_mlstate == IEEE80211_NODE_MESH_ESTABLISHED) { args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; args[2] = IEEE80211_REASON_PEER_LINK_CANCELED; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); } callout_drain(&ni->ni_mltimer); /* XXX belongs in hwmp */ ieee80211_ageq_drain_node(&ic->ic_stageq, (void *)(uintptr_t) ieee80211_mac_hash(ic, ni->ni_macaddr)); } static void mesh_vdetach(struct ieee80211vap *vap) { struct ieee80211_mesh_state *ms = vap->iv_mesh; callout_drain(&ms->ms_cleantimer); ieee80211_iterate_nodes(&vap->iv_ic->ic_sta, mesh_vdetach_peers, NULL); ieee80211_mesh_rt_flush(vap); mtx_destroy(&ms->ms_rt_lock); ms->ms_ppath->mpp_vdetach(vap); free(vap->iv_mesh, M_80211_VAP); vap->iv_mesh = NULL; } static void mesh_vattach(struct ieee80211vap *vap) { struct ieee80211_mesh_state *ms; vap->iv_newstate = mesh_newstate; vap->iv_input = mesh_input; vap->iv_opdetach = mesh_vdetach; vap->iv_recv_mgmt = mesh_recv_mgmt; vap->iv_recv_ctl = mesh_recv_ctl; ms = malloc(sizeof(struct ieee80211_mesh_state), M_80211_VAP, M_NOWAIT | M_ZERO); if (ms == NULL) { printf("%s: couldn't alloc MBSS state\n", __func__); return; } vap->iv_mesh = ms; ms->ms_seq = 0; ms->ms_flags = (IEEE80211_MESHFLAGS_AP | IEEE80211_MESHFLAGS_FWD); ms->ms_ttl = IEEE80211_MESH_DEFAULT_TTL; TAILQ_INIT(&ms->ms_routes); mtx_init(&ms->ms_rt_lock, "MBSS", "802.11s routing table", MTX_DEF); callout_init(&ms->ms_cleantimer, CALLOUT_MPSAFE); mesh_select_proto_metric(vap, "AIRTIME"); KASSERT(ms->ms_pmetric, ("ms_pmetric == NULL")); mesh_select_proto_path(vap, "HWMP"); KASSERT(ms->ms_ppath, ("ms_ppath == NULL")); ms->ms_ppath->mpp_vattach(vap); } /* * IEEE80211_M_MBSS vap state machine handler. */ static int mesh_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni; enum ieee80211_state ostate; IEEE80211_LOCK_ASSERT(ic); ostate = vap->iv_state; IEEE80211_DPRINTF(vap, IEEE80211_MSG_STATE, "%s: %s -> %s (%d)\n", __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate], arg); vap->iv_state = nstate; /* state transition */ if (ostate != IEEE80211_S_SCAN) ieee80211_cancel_scan(vap); /* background scan */ ni = vap->iv_bss; /* NB: no reference held */ if (nstate != IEEE80211_S_RUN && ostate == IEEE80211_S_RUN) callout_drain(&ms->ms_cleantimer); switch (nstate) { case IEEE80211_S_INIT: switch (ostate) { case IEEE80211_S_SCAN: ieee80211_cancel_scan(vap); break; case IEEE80211_S_CAC: ieee80211_dfs_cac_stop(vap); break; case IEEE80211_S_RUN: ieee80211_iterate_nodes(&ic->ic_sta, mesh_vdetach_peers, NULL); break; default: break; } if (ostate != IEEE80211_S_INIT) { /* NB: optimize INIT -> INIT case */ ieee80211_reset_bss(vap); ieee80211_mesh_rt_flush(vap); } break; case IEEE80211_S_SCAN: switch (ostate) { case IEEE80211_S_INIT: if (vap->iv_des_chan != IEEE80211_CHAN_ANYC && !IEEE80211_IS_CHAN_RADAR(vap->iv_des_chan) && ms->ms_idlen != 0) { /* * Already have a channel and a mesh ID; bypass * the scan and startup immediately. */ ieee80211_create_ibss(vap, vap->iv_des_chan); break; } /* * Initiate a scan. We can come here as a result * of an IEEE80211_IOC_SCAN_REQ too in which case * the vap will be marked with IEEE80211_FEXT_SCANREQ * and the scan request parameters will be present * in iv_scanreq. Otherwise we do the default. */ if (vap->iv_flags_ext & IEEE80211_FEXT_SCANREQ) { ieee80211_check_scan(vap, vap->iv_scanreq_flags, vap->iv_scanreq_duration, vap->iv_scanreq_mindwell, vap->iv_scanreq_maxdwell, vap->iv_scanreq_nssid, vap->iv_scanreq_ssid); vap->iv_flags_ext &= ~IEEE80211_FEXT_SCANREQ; } else ieee80211_check_scan_current(vap); break; default: break; } break; case IEEE80211_S_CAC: /* * Start CAC on a DFS channel. We come here when starting * a bss on a DFS channel (see ieee80211_create_ibss). */ ieee80211_dfs_cac_start(vap); break; case IEEE80211_S_RUN: switch (ostate) { case IEEE80211_S_INIT: /* * Already have a channel; bypass the * scan and startup immediately. * Note that ieee80211_create_ibss will call * back to do a RUN->RUN state change. */ ieee80211_create_ibss(vap, ieee80211_ht_adjust_channel(ic, ic->ic_curchan, vap->iv_flags_ht)); /* NB: iv_bss is changed on return */ break; case IEEE80211_S_CAC: /* * NB: This is the normal state change when CAC * expires and no radar was detected; no need to * clear the CAC timer as it's already expired. */ /* fall thru... */ case IEEE80211_S_CSA: #if 0 /* * Shorten inactivity timer of associated stations * to weed out sta's that don't follow a CSA. */ ieee80211_iterate_nodes(&ic->ic_sta, sta_csa, vap); #endif /* * Update bss node channel to reflect where * we landed after CSA. */ ieee80211_node_set_chan(vap->iv_bss, ieee80211_ht_adjust_channel(ic, ic->ic_curchan, ieee80211_htchanflags(vap->iv_bss->ni_chan))); /* XXX bypass debug msgs */ break; case IEEE80211_S_SCAN: case IEEE80211_S_RUN: #ifdef IEEE80211_DEBUG if (ieee80211_msg_debug(vap)) { struct ieee80211_node *ni = vap->iv_bss; ieee80211_note(vap, "synchronized with %s meshid ", ether_sprintf(ni->ni_meshid)); ieee80211_print_essid(ni->ni_meshid, ni->ni_meshidlen); /* XXX MCS/HT */ printf(" channel %d\n", ieee80211_chan2ieee(ic, ic->ic_curchan)); } #endif break; default: break; } ieee80211_node_authorize(vap->iv_bss); callout_reset(&ms->ms_cleantimer, ms->ms_ppath->mpp_inact, mesh_rt_cleanup_cb, vap); break; default: break; } /* NB: ostate not nstate */ ms->ms_ppath->mpp_newstate(vap, ostate, arg); return 0; } static void mesh_rt_cleanup_cb(void *arg) { struct ieee80211vap *vap = arg; struct ieee80211_mesh_state *ms = vap->iv_mesh; mesh_rt_flush_invalid(vap); callout_reset(&ms->ms_cleantimer, ms->ms_ppath->mpp_inact, mesh_rt_cleanup_cb, vap); } /* * Helper function to note the Mesh Peer Link FSM change. */ static void mesh_linkchange(struct ieee80211_node *ni, enum ieee80211_mesh_mlstate state) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_mesh_state *ms = vap->iv_mesh; #ifdef IEEE80211_DEBUG static const char *meshlinkstates[] = { [IEEE80211_NODE_MESH_IDLE] = "IDLE", [IEEE80211_NODE_MESH_OPENSNT] = "OPEN SENT", [IEEE80211_NODE_MESH_OPENRCV] = "OPEN RECEIVED", [IEEE80211_NODE_MESH_CONFIRMRCV] = "CONFIRM RECEIVED", [IEEE80211_NODE_MESH_ESTABLISHED] = "ESTABLISHED", [IEEE80211_NODE_MESH_HOLDING] = "HOLDING" }; #endif IEEE80211_NOTE(vap, IEEE80211_MSG_MESH, ni, "peer link: %s -> %s", meshlinkstates[ni->ni_mlstate], meshlinkstates[state]); /* track neighbor count */ if (state == IEEE80211_NODE_MESH_ESTABLISHED && ni->ni_mlstate != IEEE80211_NODE_MESH_ESTABLISHED) { KASSERT(ms->ms_neighbors < 65535, ("neighbor count overflow")); ms->ms_neighbors++; ieee80211_beacon_notify(vap, IEEE80211_BEACON_MESHCONF); } else if (ni->ni_mlstate == IEEE80211_NODE_MESH_ESTABLISHED && state != IEEE80211_NODE_MESH_ESTABLISHED) { KASSERT(ms->ms_neighbors > 0, ("neighbor count 0")); ms->ms_neighbors--; ieee80211_beacon_notify(vap, IEEE80211_BEACON_MESHCONF); } ni->ni_mlstate = state; switch (state) { case IEEE80211_NODE_MESH_HOLDING: ms->ms_ppath->mpp_peerdown(ni); break; case IEEE80211_NODE_MESH_ESTABLISHED: ieee80211_mesh_discover(vap, ni->ni_macaddr, NULL); break; default: break; } } /* * Helper function to generate a unique local ID required for mesh * peer establishment. */ static void mesh_checkid(void *arg, struct ieee80211_node *ni) { uint16_t *r = arg; if (*r == ni->ni_mllid) *(uint16_t *)arg = 0; } static uint32_t mesh_generateid(struct ieee80211vap *vap) { int maxiter = 4; uint16_t r; do { get_random_bytes(&r, 2); ieee80211_iterate_nodes(&vap->iv_ic->ic_sta, mesh_checkid, &r); maxiter--; } while (r == 0 && maxiter > 0); return r; } /* * Verifies if we already received this packet by checking its * sequence number. * Returns 0 if the frame is to be accepted, 1 otherwise. */ static int mesh_checkpseq(struct ieee80211vap *vap, const uint8_t source[IEEE80211_ADDR_LEN], uint32_t seq) { struct ieee80211_mesh_route *rt; rt = ieee80211_mesh_rt_find(vap, source); if (rt == NULL) { rt = ieee80211_mesh_rt_add(vap, source); if (rt == NULL) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, source, "%s", "add mcast route failed"); vap->iv_stats.is_mesh_rtaddfailed++; return 1; } IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, source, "add mcast route, mesh seqno %d", seq); rt->rt_lastmseq = seq; return 0; } if (IEEE80211_MESH_SEQ_GEQ(rt->rt_lastmseq, seq)) { return 1; } else { rt->rt_lastmseq = seq; return 0; } } /* * Iterate the routing table and locate the next hop. */ static struct ieee80211_node * mesh_find_txnode(struct ieee80211vap *vap, const uint8_t dest[IEEE80211_ADDR_LEN]) { struct ieee80211_mesh_route *rt; rt = ieee80211_mesh_rt_find(vap, dest); if (rt == NULL) return NULL; if ((rt->rt_flags & IEEE80211_MESHRT_FLAGS_VALID) == 0 || (rt->rt_flags & IEEE80211_MESHRT_FLAGS_PROXY)) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, dest, "%s: !valid or proxy, flags 0x%x", __func__, rt->rt_flags); /* XXX stat */ return NULL; } return ieee80211_find_txnode(vap, rt->rt_nexthop); } /* * Forward the specified frame. * Decrement the TTL and set TA to our MAC address. */ static void mesh_forward(struct ieee80211vap *vap, struct mbuf *m, const struct ieee80211_meshcntl *mc) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ifnet *ifp = vap->iv_ifp; struct ifnet *parent = ic->ic_ifp; const struct ieee80211_frame *wh = mtod(m, const struct ieee80211_frame *); struct mbuf *mcopy; struct ieee80211_meshcntl *mccopy; struct ieee80211_frame *whcopy; struct ieee80211_node *ni; int err; if (mc->mc_ttl == 0) { IEEE80211_NOTE_FRAME(vap, IEEE80211_MSG_MESH, wh, "%s", "frame not fwd'd, ttl 0"); vap->iv_stats.is_mesh_fwd_ttl++; return; } if (!(ms->ms_flags & IEEE80211_MESHFLAGS_FWD)) { IEEE80211_NOTE_FRAME(vap, IEEE80211_MSG_MESH, wh, "%s", "frame not fwd'd, fwding disabled"); vap->iv_stats.is_mesh_fwd_disabled++; return; } mcopy = m_dup(m, M_DONTWAIT); if (mcopy == NULL) { IEEE80211_NOTE_FRAME(vap, IEEE80211_MSG_MESH, wh, "%s", "frame not fwd'd, cannot dup"); vap->iv_stats.is_mesh_fwd_nobuf++; ifp->if_oerrors++; return; } mcopy = m_pullup(mcopy, ieee80211_hdrspace(ic, wh) + sizeof(struct ieee80211_meshcntl)); if (mcopy == NULL) { IEEE80211_NOTE_FRAME(vap, IEEE80211_MSG_MESH, wh, "%s", "frame not fwd'd, too short"); vap->iv_stats.is_mesh_fwd_tooshort++; ifp->if_oerrors++; m_freem(mcopy); return; } whcopy = mtod(mcopy, struct ieee80211_frame *); mccopy = (struct ieee80211_meshcntl *) (mtod(mcopy, uint8_t *) + ieee80211_hdrspace(ic, wh)); /* XXX clear other bits? */ whcopy->i_fc[1] &= ~IEEE80211_FC1_RETRY; IEEE80211_ADDR_COPY(whcopy->i_addr2, vap->iv_myaddr); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) { ni = ieee80211_ref_node(vap->iv_bss); mcopy->m_flags |= M_MCAST; } else { ni = mesh_find_txnode(vap, whcopy->i_addr3); if (ni == NULL) { IEEE80211_NOTE_FRAME(vap, IEEE80211_MSG_MESH, wh, "%s", "frame not fwd'd, no path"); vap->iv_stats.is_mesh_fwd_nopath++; m_freem(mcopy); return; } IEEE80211_ADDR_COPY(whcopy->i_addr1, ni->ni_macaddr); } KASSERT(mccopy->mc_ttl > 0, ("%s called with wrong ttl", __func__)); mccopy->mc_ttl--; /* XXX calculate priority so drivers can find the tx queue */ M_WME_SETAC(mcopy, WME_AC_BE); /* XXX do we know m_nextpkt is NULL? */ mcopy->m_pkthdr.rcvif = (void *) ni; err = parent->if_transmit(parent, mcopy); if (err != 0) { /* NB: IFQ_HANDOFF reclaims mbuf */ ieee80211_free_node(ni); } else { ifp->if_opackets++; } } static struct mbuf * mesh_decap(struct ieee80211vap *vap, struct mbuf *m, int hdrlen, int meshdrlen) { #define WHDIR(wh) ((wh)->i_fc[1] & IEEE80211_FC1_DIR_MASK) uint8_t b[sizeof(struct ieee80211_qosframe_addr4) + sizeof(struct ieee80211_meshcntl_ae11)]; const struct ieee80211_qosframe_addr4 *wh; const struct ieee80211_meshcntl_ae10 *mc; struct ether_header *eh; struct llc *llc; int ae; if (m->m_len < hdrlen + sizeof(*llc) && (m = m_pullup(m, hdrlen + sizeof(*llc))) == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_ANY, "discard data frame: %s", "m_pullup failed"); vap->iv_stats.is_rx_tooshort++; return NULL; } memcpy(b, mtod(m, caddr_t), hdrlen); wh = (const struct ieee80211_qosframe_addr4 *)&b[0]; mc = (const struct ieee80211_meshcntl_ae10 *)&b[hdrlen - meshdrlen]; KASSERT(WHDIR(wh) == IEEE80211_FC1_DIR_FROMDS || WHDIR(wh) == IEEE80211_FC1_DIR_DSTODS, ("bogus dir, fc 0x%x:0x%x", wh->i_fc[0], wh->i_fc[1])); llc = (struct llc *)(mtod(m, caddr_t) + hdrlen); if (llc->llc_dsap == LLC_SNAP_LSAP && llc->llc_ssap == LLC_SNAP_LSAP && llc->llc_control == LLC_UI && llc->llc_snap.org_code[0] == 0 && llc->llc_snap.org_code[1] == 0 && llc->llc_snap.org_code[2] == 0 && /* NB: preserve AppleTalk frames that have a native SNAP hdr */ !(llc->llc_snap.ether_type == htons(ETHERTYPE_AARP) || llc->llc_snap.ether_type == htons(ETHERTYPE_IPX))) { m_adj(m, hdrlen + sizeof(struct llc) - sizeof(*eh)); llc = NULL; } else { m_adj(m, hdrlen - sizeof(*eh)); } eh = mtod(m, struct ether_header *); ae = mc->mc_flags & 3; if (WHDIR(wh) == IEEE80211_FC1_DIR_FROMDS) { IEEE80211_ADDR_COPY(eh->ether_dhost, wh->i_addr1); if (ae == 0) { IEEE80211_ADDR_COPY(eh->ether_shost, wh->i_addr3); } else if (ae == 1) { IEEE80211_ADDR_COPY(eh->ether_shost, mc->mc_addr4); } else { IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY, (const struct ieee80211_frame *)wh, NULL, "bad AE %d", ae); vap->iv_stats.is_mesh_badae++; m_freem(m); return NULL; } } else { if (ae == 0) { IEEE80211_ADDR_COPY(eh->ether_dhost, wh->i_addr3); IEEE80211_ADDR_COPY(eh->ether_shost, wh->i_addr4); } else if (ae == 2) { IEEE80211_ADDR_COPY(eh->ether_dhost, mc->mc_addr4); IEEE80211_ADDR_COPY(eh->ether_shost, mc->mc_addr5); } else { IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY, (const struct ieee80211_frame *)wh, NULL, "bad AE %d", ae); vap->iv_stats.is_mesh_badae++; m_freem(m); return NULL; } } #ifdef ALIGNED_POINTER if (!ALIGNED_POINTER(mtod(m, caddr_t) + sizeof(*eh), uint32_t)) { m = ieee80211_realign(vap, m, sizeof(*eh)); if (m == NULL) return NULL; } #endif /* ALIGNED_POINTER */ if (llc != NULL) { eh = mtod(m, struct ether_header *); eh->ether_type = htons(m->m_pkthdr.len - sizeof(*eh)); } return m; #undef WDIR } /* * Return non-zero if the unicast mesh data frame should be processed * locally. Frames that are not proxy'd have our address, otherwise * we need to consult the routing table to look for a proxy entry. */ static __inline int mesh_isucastforme(struct ieee80211vap *vap, const struct ieee80211_frame *wh, const struct ieee80211_meshcntl *mc) { int ae = mc->mc_flags & 3; KASSERT((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) == IEEE80211_FC1_DIR_DSTODS, ("bad dir 0x%x:0x%x", wh->i_fc[0], wh->i_fc[1])); KASSERT(ae == 0 || ae == 2, ("bad AE %d", ae)); if (ae == 2) { /* ucast w/ proxy */ const struct ieee80211_meshcntl_ae10 *mc10 = (const struct ieee80211_meshcntl_ae10 *) mc; struct ieee80211_mesh_route *rt = ieee80211_mesh_rt_find(vap, mc10->mc_addr4); /* check for proxy route to ourself */ return (rt != NULL && (rt->rt_flags & IEEE80211_MESHRT_FLAGS_PROXY)); } else /* ucast w/o proxy */ return IEEE80211_ADDR_EQ(wh->i_addr3, vap->iv_myaddr); } static int mesh_input(struct ieee80211_node *ni, struct mbuf *m, int rssi, int nf) { #define HAS_SEQ(type) ((type & 0x4) == 0) struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ifnet *ifp = vap->iv_ifp; struct ieee80211_frame *wh; const struct ieee80211_meshcntl *mc; int hdrspace, meshdrlen, need_tap; uint8_t dir, type, subtype, qos; uint32_t seq; uint8_t *addr; ieee80211_seq rxseq; KASSERT(ni != NULL, ("null node")); ni->ni_inact = ni->ni_inact_reload; need_tap = 1; /* mbuf need to be tapped. */ type = -1; /* undefined */ if (m->m_pkthdr.len < sizeof(struct ieee80211_frame_min)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, NULL, "too short (1): len %u", m->m_pkthdr.len); vap->iv_stats.is_rx_tooshort++; goto out; } /* * Bit of a cheat here, we use a pointer for a 3-address * frame format but don't reference fields past outside * ieee80211_frame_min w/o first validating the data is * present. */ wh = mtod(m, struct ieee80211_frame *); if ((wh->i_fc[0] & IEEE80211_FC0_VERSION_MASK) != IEEE80211_FC0_VERSION_0) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, NULL, "wrong version %x", wh->i_fc[0]); vap->iv_stats.is_rx_badversion++; goto err; } dir = wh->i_fc[1] & IEEE80211_FC1_DIR_MASK; type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; if ((ic->ic_flags & IEEE80211_F_SCAN) == 0) { IEEE80211_RSSI_LPF(ni->ni_avgrssi, rssi); ni->ni_noise = nf; if (HAS_SEQ(type)) { uint8_t tid = ieee80211_gettid(wh); if (IEEE80211_QOS_HAS_SEQ(wh) && TID_TO_WME_AC(tid) >= WME_AC_VI) ic->ic_wme.wme_hipri_traffic++; rxseq = le16toh(*(uint16_t *)wh->i_seq); if (! ieee80211_check_rxseq(ni, wh)) { /* duplicate, discard */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT, wh->i_addr1, "duplicate", "seqno <%u,%u> fragno <%u,%u> tid %u", rxseq >> IEEE80211_SEQ_SEQ_SHIFT, ni->ni_rxseqs[tid] >> IEEE80211_SEQ_SEQ_SHIFT, rxseq & IEEE80211_SEQ_FRAG_MASK, ni->ni_rxseqs[tid] & IEEE80211_SEQ_FRAG_MASK, tid); vap->iv_stats.is_rx_dup++; IEEE80211_NODE_STAT(ni, rx_dup); goto out; } ni->ni_rxseqs[tid] = rxseq; } } #ifdef IEEE80211_DEBUG /* * It's easier, but too expensive, to simulate different mesh * topologies by consulting the ACL policy very early, so do this * only under DEBUG. * * NB: this check is also done upon peering link initiation. */ if (vap->iv_acl != NULL && !vap->iv_acl->iac_check(vap, wh->i_addr2)) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACL, wh, NULL, "%s", "disallowed by ACL"); vap->iv_stats.is_rx_acl++; goto out; } #endif switch (type) { case IEEE80211_FC0_TYPE_DATA: if (ni == vap->iv_bss) goto out; if (ni->ni_mlstate != IEEE80211_NODE_MESH_ESTABLISHED) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_MESH, ni->ni_macaddr, NULL, "peer link not yet established (%d)", ni->ni_mlstate); vap->iv_stats.is_mesh_nolink++; goto out; } if (dir != IEEE80211_FC1_DIR_FROMDS && dir != IEEE80211_FC1_DIR_DSTODS) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, "data", "incorrect dir 0x%x", dir); vap->iv_stats.is_rx_wrongdir++; goto err; } /* pull up enough to get to the mesh control */ hdrspace = ieee80211_hdrspace(ic, wh); if (m->m_len < hdrspace + sizeof(struct ieee80211_meshcntl) && (m = m_pullup(m, hdrspace + sizeof(struct ieee80211_meshcntl))) == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, NULL, "data too short: expecting %u", hdrspace); vap->iv_stats.is_rx_tooshort++; goto out; /* XXX */ } /* * Now calculate the full extent of the headers. Note * mesh_decap will pull up anything we didn't get * above when it strips the 802.11 headers. */ mc = (const struct ieee80211_meshcntl *) (mtod(m, const uint8_t *) + hdrspace); meshdrlen = sizeof(struct ieee80211_meshcntl) + (mc->mc_flags & 3) * IEEE80211_ADDR_LEN; hdrspace += meshdrlen; seq = LE_READ_4(mc->mc_seq); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) addr = wh->i_addr3; else addr = ((struct ieee80211_qosframe_addr4 *)wh)->i_addr4; if (IEEE80211_ADDR_EQ(vap->iv_myaddr, addr)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT, addr, "data", "%s", "not to me"); vap->iv_stats.is_rx_wrongbss++; /* XXX kinda */ goto out; } if (mesh_checkpseq(vap, addr, seq) != 0) { vap->iv_stats.is_rx_dup++; goto out; } /* * Potentially forward packet. See table s36 (p140) * for the rules. XXX tap fwd'd packets not for us? */ if (dir == IEEE80211_FC1_DIR_FROMDS || !mesh_isucastforme(vap, wh, mc)) { mesh_forward(vap, m, mc); if (dir == IEEE80211_FC1_DIR_DSTODS) goto out; /* NB: fall thru to deliver mcast frames locally */ } /* * Save QoS bits for use below--before we strip the header. */ if (subtype == IEEE80211_FC0_SUBTYPE_QOS) { qos = (dir == IEEE80211_FC1_DIR_DSTODS) ? ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] : ((struct ieee80211_qosframe *)wh)->i_qos[0]; } else qos = 0; /* * Next up, any fragmentation. */ if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { m = ieee80211_defrag(ni, m, hdrspace); if (m == NULL) { /* Fragment dropped or frame not complete yet */ goto out; } } wh = NULL; /* no longer valid, catch any uses */ if (ieee80211_radiotap_active_vap(vap)) ieee80211_radiotap_rx(vap, m); need_tap = 0; /* * Finally, strip the 802.11 header. */ m = mesh_decap(vap, m, hdrspace, meshdrlen); if (m == NULL) { /* XXX mask bit to check for both */ /* don't count Null data frames as errors */ if (subtype == IEEE80211_FC0_SUBTYPE_NODATA || subtype == IEEE80211_FC0_SUBTYPE_QOS_NULL) goto out; IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT, ni->ni_macaddr, "data", "%s", "decap error"); vap->iv_stats.is_rx_decap++; IEEE80211_NODE_STAT(ni, rx_decap); goto err; } if (qos & IEEE80211_QOS_AMSDU) { m = ieee80211_decap_amsdu(ni, m); if (m == NULL) return IEEE80211_FC0_TYPE_DATA; } ieee80211_deliver_data(vap, ni, m); return type; case IEEE80211_FC0_TYPE_MGT: vap->iv_stats.is_rx_mgmt++; IEEE80211_NODE_STAT(ni, rx_mgmt); if (dir != IEEE80211_FC1_DIR_NODS) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, "mgt", "incorrect dir 0x%x", dir); vap->iv_stats.is_rx_wrongdir++; goto err; } if (m->m_pkthdr.len < sizeof(struct ieee80211_frame)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "mgt", "too short: len %u", m->m_pkthdr.len); vap->iv_stats.is_rx_tooshort++; goto out; } #ifdef IEEE80211_DEBUG if ((ieee80211_msg_debug(vap) && (vap->iv_ic->ic_flags & IEEE80211_F_SCAN)) || ieee80211_msg_dumppkts(vap)) { if_printf(ifp, "received %s from %s rssi %d\n", ieee80211_mgt_subtype_name[subtype >> IEEE80211_FC0_SUBTYPE_SHIFT], ether_sprintf(wh->i_addr2), rssi); } #endif if (wh->i_fc[1] & IEEE80211_FC1_WEP) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "%s", "WEP set but not permitted"); vap->iv_stats.is_rx_mgtdiscard++; /* XXX */ goto out; } vap->iv_recv_mgmt(ni, m, subtype, rssi, nf); goto out; case IEEE80211_FC0_TYPE_CTL: vap->iv_stats.is_rx_ctl++; IEEE80211_NODE_STAT(ni, rx_ctrl); goto out; default: IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY, wh, "bad", "frame type 0x%x", type); /* should not come here */ break; } err: ifp->if_ierrors++; out: if (m != NULL) { if (need_tap && ieee80211_radiotap_active_vap(vap)) ieee80211_radiotap_rx(vap, m); m_freem(m); } return type; } static void mesh_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0, int subtype, int rssi, int nf) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_frame *wh; uint8_t *frm, *efrm; wh = mtod(m0, struct ieee80211_frame *); frm = (uint8_t *)&wh[1]; efrm = mtod(m0, uint8_t *) + m0->m_len; switch (subtype) { case IEEE80211_FC0_SUBTYPE_PROBE_RESP: case IEEE80211_FC0_SUBTYPE_BEACON: { struct ieee80211_scanparams scan; /* * We process beacon/probe response * frames to discover neighbors. */ if (ieee80211_parse_beacon(ni, m0, &scan) != 0) return; /* * Count frame now that we know it's to be processed. */ if (subtype == IEEE80211_FC0_SUBTYPE_BEACON) { vap->iv_stats.is_rx_beacon++; /* XXX remove */ IEEE80211_NODE_STAT(ni, rx_beacons); } else IEEE80211_NODE_STAT(ni, rx_proberesp); /* * If scanning, just pass information to the scan module. */ if (ic->ic_flags & IEEE80211_F_SCAN) { if (ic->ic_flags_ext & IEEE80211_FEXT_PROBECHAN) { /* * Actively scanning a channel marked passive; * send a probe request now that we know there * is 802.11 traffic present. * * XXX check if the beacon we recv'd gives * us what we need and suppress the probe req */ ieee80211_probe_curchan(vap, 1); ic->ic_flags_ext &= ~IEEE80211_FEXT_PROBECHAN; } ieee80211_add_scan(vap, &scan, wh, subtype, rssi, nf); return; } /* The rest of this code assumes we are running */ if (vap->iv_state != IEEE80211_S_RUN) return; /* * Ignore non-mesh STAs. */ if ((scan.capinfo & (IEEE80211_CAPINFO_ESS|IEEE80211_CAPINFO_IBSS)) || scan.meshid == NULL || scan.meshconf == NULL) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, "beacon", "%s", "not a mesh sta"); vap->iv_stats.is_mesh_wrongmesh++; return; } /* * Ignore STAs for other mesh networks. */ if (memcmp(scan.meshid+2, ms->ms_id, ms->ms_idlen) != 0 || mesh_verify_meshconf(vap, scan.meshconf)) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, "beacon", "%s", "not for our mesh"); vap->iv_stats.is_mesh_wrongmesh++; return; } /* * Peer only based on the current ACL policy. */ if (vap->iv_acl != NULL && !vap->iv_acl->iac_check(vap, wh->i_addr2)) { IEEE80211_DISCARD(vap, IEEE80211_MSG_ACL, wh, NULL, "%s", "disallowed by ACL"); vap->iv_stats.is_rx_acl++; return; } /* * Do neighbor discovery. */ if (!IEEE80211_ADDR_EQ(wh->i_addr2, ni->ni_macaddr)) { /* * Create a new entry in the neighbor table. */ ni = ieee80211_add_neighbor(vap, wh, &scan); } /* * Automatically peer with discovered nodes if possible. * XXX backoff on repeated failure */ if (ni != vap->iv_bss && (ms->ms_flags & IEEE80211_MESHFLAGS_AP) && ni->ni_mlstate == IEEE80211_NODE_MESH_IDLE) { uint16_t args[1]; ni->ni_mlpid = mesh_generateid(vap); if (ni->ni_mlpid == 0) return; mesh_linkchange(ni, IEEE80211_NODE_MESH_OPENSNT); args[0] = ni->ni_mlpid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_OPEN, args); ni->ni_mlrcnt = 0; mesh_peer_timeout_setup(ni); } break; } case IEEE80211_FC0_SUBTYPE_PROBE_REQ: { uint8_t *ssid, *meshid, *rates, *xrates; uint8_t *sfrm; if (vap->iv_state != IEEE80211_S_RUN) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "wrong state %s", ieee80211_state_name[vap->iv_state]); vap->iv_stats.is_rx_mgtdiscard++; return; } if (IEEE80211_IS_MULTICAST(wh->i_addr2)) { /* frame must be directed */ IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "%s", "not unicast"); vap->iv_stats.is_rx_mgtdiscard++; /* XXX stat */ return; } /* * prreq frame format * [tlv] ssid * [tlv] supported rates * [tlv] extended supported rates * [tlv] mesh id */ ssid = meshid = rates = xrates = NULL; sfrm = frm; while (efrm - frm > 1) { IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1] + 2, return); switch (*frm) { case IEEE80211_ELEMID_SSID: ssid = frm; break; case IEEE80211_ELEMID_RATES: rates = frm; break; case IEEE80211_ELEMID_XRATES: xrates = frm; break; case IEEE80211_ELEMID_MESHID: meshid = frm; break; } frm += frm[1] + 2; } IEEE80211_VERIFY_ELEMENT(ssid, IEEE80211_NWID_LEN, return); IEEE80211_VERIFY_ELEMENT(rates, IEEE80211_RATE_MAXSIZE, return); if (xrates != NULL) IEEE80211_VERIFY_ELEMENT(xrates, IEEE80211_RATE_MAXSIZE - rates[1], return); if (meshid != NULL) { IEEE80211_VERIFY_ELEMENT(meshid, IEEE80211_MESHID_LEN, return); /* NB: meshid, not ssid */ IEEE80211_VERIFY_SSID(vap->iv_bss, meshid, return); } /* XXX find a better class or define it's own */ IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_INPUT, wh->i_addr2, "%s", "recv probe req"); /* * Some legacy 11b clients cannot hack a complete * probe response frame. When the request includes * only a bare-bones rate set, communicate this to * the transmit side. */ ieee80211_send_proberesp(vap, wh->i_addr2, 0); break; } case IEEE80211_FC0_SUBTYPE_ACTION: case IEEE80211_FC0_SUBTYPE_ACTION_NOACK: if (ni == vap->iv_bss) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "%s", "unknown node"); vap->iv_stats.is_rx_mgtdiscard++; } else if (!IEEE80211_ADDR_EQ(vap->iv_myaddr, wh->i_addr1) && !IEEE80211_IS_MULTICAST(wh->i_addr1)) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "%s", "not for us"); vap->iv_stats.is_rx_mgtdiscard++; } else if (vap->iv_state != IEEE80211_S_RUN) { IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "wrong state %s", ieee80211_state_name[vap->iv_state]); vap->iv_stats.is_rx_mgtdiscard++; } else { if (ieee80211_parse_action(ni, m0) == 0) (void)ic->ic_recv_action(ni, wh, frm, efrm); } break; case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: case IEEE80211_FC0_SUBTYPE_REASSOC_REQ: case IEEE80211_FC0_SUBTYPE_REASSOC_RESP: case IEEE80211_FC0_SUBTYPE_ATIM: case IEEE80211_FC0_SUBTYPE_DISASSOC: case IEEE80211_FC0_SUBTYPE_AUTH: case IEEE80211_FC0_SUBTYPE_DEAUTH: IEEE80211_DISCARD(vap, IEEE80211_MSG_INPUT, wh, NULL, "%s", "not handled"); vap->iv_stats.is_rx_mgtdiscard++; break; default: IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY, wh, "mgt", "subtype 0x%x not handled", subtype); vap->iv_stats.is_rx_badsubtype++; break; } } static void mesh_recv_ctl(struct ieee80211_node *ni, struct mbuf *m, int subtype) { switch (subtype) { case IEEE80211_FC0_SUBTYPE_BAR: ieee80211_recv_bar(ni, m); break; } } /* * Parse meshpeering action ie's for open+confirm frames; the * important bits are returned in the supplied structure. */ static const struct ieee80211_meshpeer_ie * mesh_parse_meshpeering_action(struct ieee80211_node *ni, const struct ieee80211_frame *wh, /* XXX for VERIFY_LENGTH */ const uint8_t *frm, const uint8_t *efrm, struct ieee80211_meshpeer_ie *mp, uint8_t subtype) { struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_meshpeer_ie *mpie; const uint8_t *meshid, *meshconf, *meshpeer; meshid = meshconf = meshpeer = NULL; while (efrm - frm > 1) { IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1] + 2, return NULL); switch (*frm) { case IEEE80211_ELEMID_MESHID: meshid = frm; break; case IEEE80211_ELEMID_MESHCONF: meshconf = frm; break; case IEEE80211_ELEMID_MESHPEER: meshpeer = frm; mpie = (const struct ieee80211_meshpeer_ie *) frm; memset(mp, 0, sizeof(*mp)); mp->peer_llinkid = LE_READ_2(&mpie->peer_llinkid); /* NB: peer link ID is optional on these frames */ if (subtype == IEEE80211_MESH_PEER_LINK_CLOSE && mpie->peer_len == 8) { mp->peer_linkid = 0; mp->peer_rcode = LE_READ_2(&mpie->peer_linkid); } else { mp->peer_linkid = LE_READ_2(&mpie->peer_linkid); mp->peer_rcode = LE_READ_2(&mpie->peer_rcode); } break; } frm += frm[1] + 2; } /* * Verify the contents of the frame. Action frames with * close subtype don't have a Mesh Configuration IE. * If if fails validation, close the peer link. */ KASSERT(meshpeer != NULL && subtype != IEEE80211_ACTION_MESHPEERING_CLOSE, ("parsing close action")); if (mesh_verify_meshid(vap, meshid) || mesh_verify_meshpeer(vap, subtype, meshpeer) || mesh_verify_meshconf(vap, meshconf)) { uint16_t args[3]; IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, wh, NULL, "%s", "not for our mesh"); vap->iv_stats.is_rx_mgtdiscard++; switch (ni->ni_mlstate) { case IEEE80211_NODE_MESH_IDLE: case IEEE80211_NODE_MESH_ESTABLISHED: case IEEE80211_NODE_MESH_HOLDING: /* ignore */ break; case IEEE80211_NODE_MESH_OPENSNT: case IEEE80211_NODE_MESH_OPENRCV: case IEEE80211_NODE_MESH_CONFIRMRCV: args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; args[2] = IEEE80211_REASON_PEER_LINK_CANCELED; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); break; } return NULL; } return (const struct ieee80211_meshpeer_ie *) mp; } static int mesh_recv_action_meshpeering_open(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_meshpeer_ie ie; const struct ieee80211_meshpeer_ie *meshpeer; uint16_t args[3]; /* +2+2 for action + code + capabilites */ meshpeer = mesh_parse_meshpeering_action(ni, wh, frm+2+2, efrm, &ie, IEEE80211_ACTION_MESHPEERING_OPEN); if (meshpeer == NULL) { return 0; } /* XXX move up */ IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "recv PEER OPEN, lid 0x%x", meshpeer->peer_llinkid); switch (ni->ni_mlstate) { case IEEE80211_NODE_MESH_IDLE: mesh_linkchange(ni, IEEE80211_NODE_MESH_OPENRCV); ni->ni_mllid = meshpeer->peer_llinkid; ni->ni_mlpid = mesh_generateid(vap); if (ni->ni_mlpid == 0) return 0; /* XXX */ args[0] = ni->ni_mlpid; /* Announce we're open too... */ ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_OPEN, args); /* ...and confirm the link. */ args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, args); mesh_peer_timeout_setup(ni); break; case IEEE80211_NODE_MESH_OPENRCV: /* Wrong Link ID */ if (ni->ni_mllid != meshpeer->peer_llinkid) { args[0] = ni->ni_mllid; args[1] = ni->ni_mlpid; args[2] = IEEE80211_REASON_PEER_LINK_CANCELED; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); break; } /* Duplicate open, confirm again. */ args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, args); break; case IEEE80211_NODE_MESH_OPENSNT: ni->ni_mllid = meshpeer->peer_llinkid; mesh_linkchange(ni, IEEE80211_NODE_MESH_OPENRCV); args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, args); /* NB: don't setup/clear any timeout */ break; case IEEE80211_NODE_MESH_CONFIRMRCV: if (ni->ni_mlpid != meshpeer->peer_linkid || ni->ni_mllid != meshpeer->peer_llinkid) { args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; args[2] = IEEE80211_REASON_PEER_LINK_CANCELED; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); break; } mesh_linkchange(ni, IEEE80211_NODE_MESH_ESTABLISHED); ni->ni_mllid = meshpeer->peer_llinkid; args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, args); mesh_peer_timeout_stop(ni); break; case IEEE80211_NODE_MESH_ESTABLISHED: if (ni->ni_mllid != meshpeer->peer_llinkid) { args[0] = ni->ni_mllid; args[1] = ni->ni_mlpid; args[2] = IEEE80211_REASON_PEER_LINK_CANCELED; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); break; } args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CONFIRM, args); break; case IEEE80211_NODE_MESH_HOLDING: args[0] = ni->ni_mlpid; args[1] = meshpeer->peer_llinkid; args[2] = IEEE80211_REASON_MESH_MAX_RETRIES; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); break; } return 0; } static int mesh_recv_action_meshpeering_confirm(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_meshpeer_ie ie; const struct ieee80211_meshpeer_ie *meshpeer; uint16_t args[3]; /* +2+2+2+2 for action + code + capabilites + status code + AID */ meshpeer = mesh_parse_meshpeering_action(ni, wh, frm+2+2+2+2, efrm, &ie, IEEE80211_ACTION_MESHPEERING_CONFIRM); if (meshpeer == NULL) { return 0; } IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "recv PEER CONFIRM, local id 0x%x, peer id 0x%x", meshpeer->peer_llinkid, meshpeer->peer_linkid); switch (ni->ni_mlstate) { case IEEE80211_NODE_MESH_OPENRCV: mesh_linkchange(ni, IEEE80211_NODE_MESH_ESTABLISHED); mesh_peer_timeout_stop(ni); break; case IEEE80211_NODE_MESH_OPENSNT: mesh_linkchange(ni, IEEE80211_NODE_MESH_CONFIRMRCV); break; case IEEE80211_NODE_MESH_HOLDING: args[0] = ni->ni_mlpid; args[1] = meshpeer->peer_llinkid; args[2] = IEEE80211_REASON_MESH_MAX_RETRIES; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); break; case IEEE80211_NODE_MESH_CONFIRMRCV: if (ni->ni_mllid != meshpeer->peer_llinkid) { args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; args[2] = IEEE80211_REASON_PEER_LINK_CANCELED; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); } break; default: IEEE80211_DISCARD(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, wh, NULL, "received confirm in invalid state %d", ni->ni_mlstate); vap->iv_stats.is_rx_mgtdiscard++; break; } return 0; } static int mesh_recv_action_meshpeering_close(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { uint16_t args[3]; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "%s", "recv PEER CLOSE"); switch (ni->ni_mlstate) { case IEEE80211_NODE_MESH_IDLE: /* ignore */ break; case IEEE80211_NODE_MESH_OPENRCV: case IEEE80211_NODE_MESH_OPENSNT: case IEEE80211_NODE_MESH_CONFIRMRCV: case IEEE80211_NODE_MESH_ESTABLISHED: args[0] = ni->ni_mlpid; args[1] = ni->ni_mllid; args[2] = IEEE80211_REASON_MESH_CLOSE_RCVD; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); break; case IEEE80211_NODE_MESH_HOLDING: mesh_linkchange(ni, IEEE80211_NODE_MESH_IDLE); mesh_peer_timeout_setup(ni); break; } return 0; } /* * Link Metric handling. */ static int mesh_recv_action_meshlmetric_req(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { uint32_t metric; metric = mesh_airtime_calc(ni); ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHLMETRIC, IEEE80211_ACTION_MESHLMETRIC_REP, &metric); return 0; } static int mesh_recv_action_meshlmetric_rep(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { return 0; } static int mesh_send_action(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211_bpf_params params; memset(¶ms, 0, sizeof(params)); params.ibp_pri = WME_AC_VO; params.ibp_rate0 = ni->ni_txparms->mgmtrate; /* XXX ucast/mcast */ params.ibp_try0 = ni->ni_txparms->maxretry; params.ibp_power = ni->ni_txpower; return ieee80211_mgmt_output(ni, m, IEEE80211_FC0_SUBTYPE_ACTION, ¶ms); } #define ADDSHORT(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = (v) >> 8; \ frm += 2; \ } while (0) #define ADDWORD(frm, v) do { \ frm[0] = (v) & 0xff; \ frm[1] = ((v) >> 8) & 0xff; \ frm[2] = ((v) >> 16) & 0xff; \ frm[3] = ((v) >> 24) & 0xff; \ frm += 4; \ } while (0) static int mesh_send_action_meshpeering_open(struct ieee80211_node *ni, int category, int action, void *args0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = args0; const struct ieee80211_rateset *rs; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "send PEER OPEN action: localid 0x%x", args[0]); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ + sizeof(uint16_t) /* capabilites */ + 2 + IEEE80211_RATE_SIZE + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + 2 + IEEE80211_MESHID_LEN + sizeof(struct ieee80211_meshconf_ie) + sizeof(struct ieee80211_meshpeer_ie) ); if (m != NULL) { /* * mesh peer open action frame format: * [1] category * [1] action * [2] capabilities * [tlv] rates * [tlv] xrates * [tlv] mesh id * [tlv] mesh conf * [tlv] mesh peer link mgmt */ *frm++ = category; *frm++ = action; ADDSHORT(frm, ieee80211_getcapinfo(vap, ni->ni_chan)); rs = ieee80211_get_suprates(ic, ic->ic_curchan); frm = ieee80211_add_rates(frm, rs); frm = ieee80211_add_xrates(frm, rs); frm = ieee80211_add_meshid(frm, vap); frm = ieee80211_add_meshconf(frm, vap); frm = ieee80211_add_meshpeer(frm, IEEE80211_MESH_PEER_LINK_OPEN, args[0], 0, 0); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return mesh_send_action(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int mesh_send_action_meshpeering_confirm(struct ieee80211_node *ni, int category, int action, void *args0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = args0; const struct ieee80211_rateset *rs; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "send PEER CONFIRM action: localid 0x%x, peerid 0x%x", args[0], args[1]); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ + sizeof(uint16_t) /* capabilites */ + sizeof(uint16_t) /* status code */ + sizeof(uint16_t) /* AID */ + 2 + IEEE80211_RATE_SIZE + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + 2 + IEEE80211_MESHID_LEN + sizeof(struct ieee80211_meshconf_ie) + sizeof(struct ieee80211_meshpeer_ie) ); if (m != NULL) { /* * mesh peer confirm action frame format: * [1] category * [1] action * [2] capabilities * [2] status code * [2] association id (peer ID) * [tlv] rates * [tlv] xrates * [tlv] mesh id * [tlv] mesh conf * [tlv] mesh peer link mgmt */ *frm++ = category; *frm++ = action; ADDSHORT(frm, ieee80211_getcapinfo(vap, ni->ni_chan)); ADDSHORT(frm, 0); /* status code */ ADDSHORT(frm, args[1]); /* AID */ rs = ieee80211_get_suprates(ic, ic->ic_curchan); frm = ieee80211_add_rates(frm, rs); frm = ieee80211_add_xrates(frm, rs); frm = ieee80211_add_meshid(frm, vap); frm = ieee80211_add_meshconf(frm, vap); frm = ieee80211_add_meshpeer(frm, IEEE80211_MESH_PEER_LINK_CONFIRM, args[0], args[1], 0); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return mesh_send_action(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int mesh_send_action_meshpeering_close(struct ieee80211_node *ni, int category, int action, void *args0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint16_t *args = args0; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "send PEER CLOSE action: localid 0x%x, peerid 0x%x reason %d", args[0], args[1], args[2]); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ + sizeof(uint16_t) /* reason code */ + 2 + IEEE80211_MESHID_LEN + sizeof(struct ieee80211_meshpeer_ie) ); if (m != NULL) { /* * mesh peer close action frame format: * [1] category * [1] action * [2] reason code * [tlv] mesh id * [tlv] mesh peer link mgmt */ *frm++ = category; *frm++ = action; ADDSHORT(frm, args[2]); /* reason code */ frm = ieee80211_add_meshid(frm, vap); frm = ieee80211_add_meshpeer(frm, IEEE80211_MESH_PEER_LINK_CLOSE, args[0], args[1], args[2]); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return mesh_send_action(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int mesh_send_action_meshlink_request(struct ieee80211_node *ni, int category, int action, void *arg0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "%s", "send LINK METRIC REQUEST action"); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ ); if (m != NULL) { /* * mesh link metric request * [1] category * [1] action */ *frm++ = category; *frm++ = action; m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return mesh_send_action(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static int mesh_send_action_meshlink_reply(struct ieee80211_node *ni, int category, int action, void *args0) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; uint32_t *metric = args0; struct mbuf *m; uint8_t *frm; IEEE80211_NOTE(vap, IEEE80211_MSG_ACTION | IEEE80211_MSG_MESH, ni, "send LINK METRIC REPLY action: metric 0x%x", *metric); IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) /* action+category */ + sizeof(struct ieee80211_meshlmetric_ie) ); if (m != NULL) { /* * mesh link metric reply * [1] category * [1] action * [tlv] mesh link metric */ *frm++ = category; *frm++ = action; frm = ieee80211_add_meshlmetric(frm, *metric); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return mesh_send_action(ni, m); } else { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); return ENOMEM; } } static void mesh_peer_timeout_setup(struct ieee80211_node *ni) { switch (ni->ni_mlstate) { case IEEE80211_NODE_MESH_HOLDING: ni->ni_mltval = ieee80211_mesh_holdingtimeout; break; case IEEE80211_NODE_MESH_CONFIRMRCV: ni->ni_mltval = ieee80211_mesh_confirmtimeout; break; case IEEE80211_NODE_MESH_IDLE: ni->ni_mltval = 0; break; default: ni->ni_mltval = ieee80211_mesh_retrytimeout; break; } if (ni->ni_mltval) callout_reset(&ni->ni_mltimer, ni->ni_mltval, mesh_peer_timeout_cb, ni); } /* * Same as above but backoffs timer statisically 50%. */ static void mesh_peer_timeout_backoff(struct ieee80211_node *ni) { uint32_t r; r = arc4random(); ni->ni_mltval += r % ni->ni_mltval; callout_reset(&ni->ni_mltimer, ni->ni_mltval, mesh_peer_timeout_cb, ni); } static __inline void mesh_peer_timeout_stop(struct ieee80211_node *ni) { callout_drain(&ni->ni_mltimer); } /* * Mesh Peer Link Management FSM timeout handling. */ static void mesh_peer_timeout_cb(void *arg) { struct ieee80211_node *ni = (struct ieee80211_node *)arg; uint16_t args[3]; IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_MESH, ni, "mesh link timeout, state %d, retry counter %d", ni->ni_mlstate, ni->ni_mlrcnt); switch (ni->ni_mlstate) { case IEEE80211_NODE_MESH_IDLE: case IEEE80211_NODE_MESH_ESTABLISHED: break; case IEEE80211_NODE_MESH_OPENSNT: case IEEE80211_NODE_MESH_OPENRCV: if (ni->ni_mlrcnt == ieee80211_mesh_maxretries) { args[0] = ni->ni_mlpid; args[2] = IEEE80211_REASON_MESH_MAX_RETRIES; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); ni->ni_mlrcnt = 0; mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); } else { args[0] = ni->ni_mlpid; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_OPEN, args); ni->ni_mlrcnt++; mesh_peer_timeout_backoff(ni); } break; case IEEE80211_NODE_MESH_CONFIRMRCV: if (ni->ni_mlrcnt == ieee80211_mesh_maxretries) { args[0] = ni->ni_mlpid; args[2] = IEEE80211_REASON_MESH_CONFIRM_TIMEOUT; ieee80211_send_action(ni, IEEE80211_ACTION_CAT_MESHPEERING, IEEE80211_ACTION_MESHPEERING_CLOSE, args); ni->ni_mlrcnt = 0; mesh_linkchange(ni, IEEE80211_NODE_MESH_HOLDING); mesh_peer_timeout_setup(ni); } else { ni->ni_mlrcnt++; mesh_peer_timeout_setup(ni); } break; case IEEE80211_NODE_MESH_HOLDING: mesh_linkchange(ni, IEEE80211_NODE_MESH_IDLE); break; } } static int mesh_verify_meshid(struct ieee80211vap *vap, const uint8_t *ie) { struct ieee80211_mesh_state *ms = vap->iv_mesh; if (ie == NULL || ie[1] != ms->ms_idlen) return 1; return memcmp(ms->ms_id, ie + 2, ms->ms_idlen); } /* * Check if we are using the same algorithms for this mesh. */ static int mesh_verify_meshconf(struct ieee80211vap *vap, const uint8_t *ie) { const struct ieee80211_meshconf_ie *meshconf = (const struct ieee80211_meshconf_ie *) ie; const struct ieee80211_mesh_state *ms = vap->iv_mesh; uint16_t cap; if (meshconf == NULL) return 1; if (meshconf->conf_pselid != ms->ms_ppath->mpp_ie) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH, "unknown path selection algorithm: 0x%x\n", meshconf->conf_pselid); return 1; } if (meshconf->conf_pmetid != ms->ms_pmetric->mpm_ie) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH, "unknown path metric algorithm: 0x%x\n", meshconf->conf_pmetid); return 1; } if (meshconf->conf_ccid != 0) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH, "unknown congestion control algorithm: 0x%x\n", meshconf->conf_ccid); return 1; } if (meshconf->conf_syncid != IEEE80211_MESHCONF_SYNC_NEIGHOFF) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH, "unknown sync algorithm: 0x%x\n", meshconf->conf_syncid); return 1; } if (meshconf->conf_authid != 0) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH, "unknown auth auth algorithm: 0x%x\n", meshconf->conf_pselid); return 1; } /* NB: conf_cap is only read correctly here */ cap = LE_READ_2(&meshconf->conf_cap); /* Not accepting peers */ if (!(cap & IEEE80211_MESHCONF_CAP_AP)) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_MESH, "not accepting peers: 0x%x\n", meshconf->conf_cap); return 1; } return 0; } static int mesh_verify_meshpeer(struct ieee80211vap *vap, uint8_t subtype, const uint8_t *ie) { const struct ieee80211_meshpeer_ie *meshpeer = (const struct ieee80211_meshpeer_ie *) ie; if (meshpeer == NULL || meshpeer->peer_len < 6 || meshpeer->peer_len > 10) return 1; switch (subtype) { case IEEE80211_MESH_PEER_LINK_OPEN: if (meshpeer->peer_len != 6) return 1; break; case IEEE80211_MESH_PEER_LINK_CONFIRM: if (meshpeer->peer_len != 8) return 1; break; case IEEE80211_MESH_PEER_LINK_CLOSE: if (meshpeer->peer_len < 8) return 1; if (meshpeer->peer_len == 8 && meshpeer->peer_linkid != 0) return 1; if (meshpeer->peer_rcode == 0) return 1; break; } return 0; } /* * Add a Mesh ID IE to a frame. */ uint8_t * ieee80211_add_meshid(uint8_t *frm, struct ieee80211vap *vap) { struct ieee80211_mesh_state *ms = vap->iv_mesh; KASSERT(vap->iv_opmode == IEEE80211_M_MBSS, ("not a mbss vap")); *frm++ = IEEE80211_ELEMID_MESHID; *frm++ = ms->ms_idlen; memcpy(frm, ms->ms_id, ms->ms_idlen); return frm + ms->ms_idlen; } /* * Add a Mesh Configuration IE to a frame. * For now just use HWMP routing, Airtime link metric, Null Congestion * Signaling, Null Sync Protocol and Null Authentication. */ uint8_t * ieee80211_add_meshconf(uint8_t *frm, struct ieee80211vap *vap) { const struct ieee80211_mesh_state *ms = vap->iv_mesh; uint16_t caps; KASSERT(vap->iv_opmode == IEEE80211_M_MBSS, ("not a MBSS vap")); *frm++ = IEEE80211_ELEMID_MESHCONF; *frm++ = sizeof(struct ieee80211_meshconf_ie) - 2; *frm++ = ms->ms_ppath->mpp_ie; /* path selection */ *frm++ = ms->ms_pmetric->mpm_ie; /* link metric */ *frm++ = IEEE80211_MESHCONF_CC_DISABLED; *frm++ = IEEE80211_MESHCONF_SYNC_NEIGHOFF; *frm++ = IEEE80211_MESHCONF_AUTH_DISABLED; /* NB: set the number of neighbors before the rest */ *frm = (ms->ms_neighbors > 15 ? 15 : ms->ms_neighbors) << 1; if (ms->ms_flags & IEEE80211_MESHFLAGS_PORTAL) *frm |= IEEE80211_MESHCONF_FORM_MP; frm += 1; caps = 0; if (ms->ms_flags & IEEE80211_MESHFLAGS_AP) caps |= IEEE80211_MESHCONF_CAP_AP; if (ms->ms_flags & IEEE80211_MESHFLAGS_FWD) caps |= IEEE80211_MESHCONF_CAP_FWRD; ADDSHORT(frm, caps); return frm; } /* * Add a Mesh Peer Management IE to a frame. */ uint8_t * ieee80211_add_meshpeer(uint8_t *frm, uint8_t subtype, uint16_t localid, uint16_t peerid, uint16_t reason) { /* XXX change for AH */ static const uint8_t meshpeerproto[4] = IEEE80211_MESH_PEER_PROTO; KASSERT(localid != 0, ("localid == 0")); *frm++ = IEEE80211_ELEMID_MESHPEER; switch (subtype) { case IEEE80211_MESH_PEER_LINK_OPEN: *frm++ = 6; /* length */ memcpy(frm, meshpeerproto, 4); frm += 4; ADDSHORT(frm, localid); /* local ID */ break; case IEEE80211_MESH_PEER_LINK_CONFIRM: KASSERT(peerid != 0, ("sending peer confirm without peer id")); *frm++ = 8; /* length */ memcpy(frm, meshpeerproto, 4); frm += 4; ADDSHORT(frm, localid); /* local ID */ ADDSHORT(frm, peerid); /* peer ID */ break; case IEEE80211_MESH_PEER_LINK_CLOSE: if (peerid) *frm++ = 10; /* length */ else *frm++ = 8; /* length */ memcpy(frm, meshpeerproto, 4); frm += 4; ADDSHORT(frm, localid); /* local ID */ if (peerid) ADDSHORT(frm, peerid); /* peer ID */ ADDSHORT(frm, reason); break; } return frm; } /* * Compute an Airtime Link Metric for the link with this node. * * Based on Draft 3.0 spec (11B.10, p.149). */ /* * Max 802.11s overhead. */ #define IEEE80211_MESH_MAXOVERHEAD \ (sizeof(struct ieee80211_qosframe_addr4) \ + sizeof(struct ieee80211_meshcntl_ae11) \ + sizeof(struct llc) \ + IEEE80211_ADDR_LEN \ + IEEE80211_WEP_IVLEN \ + IEEE80211_WEP_KIDLEN \ + IEEE80211_WEP_CRCLEN \ + IEEE80211_WEP_MICLEN \ + IEEE80211_CRC_LEN) uint32_t mesh_airtime_calc(struct ieee80211_node *ni) { #define M_BITS 8 #define S_FACTOR (2 * M_BITS) struct ieee80211com *ic = ni->ni_ic; struct ifnet *ifp = ni->ni_vap->iv_ifp; const static int nbits = 8192 << M_BITS; uint32_t overhead, rate, errrate; uint64_t res; /* Time to transmit a frame */ rate = ni->ni_txrate; overhead = ieee80211_compute_duration(ic->ic_rt, ifp->if_mtu + IEEE80211_MESH_MAXOVERHEAD, rate, 0) << M_BITS; /* Error rate in percentage */ /* XXX assuming small failures are ok */ errrate = (((ifp->if_oerrors + ifp->if_ierrors) / 100) << M_BITS) / 100; res = (overhead + (nbits / rate)) * ((1 << S_FACTOR) / ((1 << M_BITS) - errrate)); return (uint32_t)(res >> S_FACTOR); #undef M_BITS #undef S_FACTOR } /* * Add a Mesh Link Metric report IE to a frame. */ uint8_t * ieee80211_add_meshlmetric(uint8_t *frm, uint32_t metric) { *frm++ = IEEE80211_ELEMID_MESHLINK; *frm++ = 4; ADDWORD(frm, metric); return frm; } #undef ADDSHORT #undef ADDWORD /* * Initialize any mesh-specific node state. */ void ieee80211_mesh_node_init(struct ieee80211vap *vap, struct ieee80211_node *ni) { ni->ni_flags |= IEEE80211_NODE_QOS; callout_init(&ni->ni_mltimer, CALLOUT_MPSAFE); } /* * Cleanup any mesh-specific node state. */ void ieee80211_mesh_node_cleanup(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_mesh_state *ms = vap->iv_mesh; callout_drain(&ni->ni_mltimer); /* NB: short-circuit callbacks after mesh_vdetach */ if (vap->iv_mesh != NULL) ms->ms_ppath->mpp_peerdown(ni); } void ieee80211_parse_meshid(struct ieee80211_node *ni, const uint8_t *ie) { ni->ni_meshidlen = ie[1]; memcpy(ni->ni_meshid, ie + 2, ie[1]); } /* * Setup mesh-specific node state on neighbor discovery. */ void ieee80211_mesh_init_neighbor(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const struct ieee80211_scanparams *sp) { ieee80211_parse_meshid(ni, sp->meshid); } void ieee80211_mesh_update_beacon(struct ieee80211vap *vap, struct ieee80211_beacon_offsets *bo) { KASSERT(vap->iv_opmode == IEEE80211_M_MBSS, ("not a MBSS vap")); if (isset(bo->bo_flags, IEEE80211_BEACON_MESHCONF)) { (void)ieee80211_add_meshconf(bo->bo_meshconf, vap); clrbit(bo->bo_flags, IEEE80211_BEACON_MESHCONF); } } static int mesh_ioctl_get80211(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_mesh_state *ms = vap->iv_mesh; uint8_t tmpmeshid[IEEE80211_NWID_LEN]; struct ieee80211_mesh_route *rt; struct ieee80211req_mesh_route *imr; size_t len, off; uint8_t *p; int error; if (vap->iv_opmode != IEEE80211_M_MBSS) return ENOSYS; error = 0; switch (ireq->i_type) { case IEEE80211_IOC_MESH_ID: ireq->i_len = ms->ms_idlen; memcpy(tmpmeshid, ms->ms_id, ireq->i_len); error = copyout(tmpmeshid, ireq->i_data, ireq->i_len); break; case IEEE80211_IOC_MESH_AP: ireq->i_val = (ms->ms_flags & IEEE80211_MESHFLAGS_AP) != 0; break; case IEEE80211_IOC_MESH_FWRD: ireq->i_val = (ms->ms_flags & IEEE80211_MESHFLAGS_FWD) != 0; break; case IEEE80211_IOC_MESH_TTL: ireq->i_val = ms->ms_ttl; break; case IEEE80211_IOC_MESH_RTCMD: switch (ireq->i_val) { case IEEE80211_MESH_RTCMD_LIST: len = 0; MESH_RT_LOCK(ms); TAILQ_FOREACH(rt, &ms->ms_routes, rt_next) { len += sizeof(*imr); } MESH_RT_UNLOCK(ms); if (len > ireq->i_len || ireq->i_len < sizeof(*imr)) { ireq->i_len = len; return ENOMEM; } ireq->i_len = len; /* XXX M_WAIT? */ p = malloc(len, M_TEMP, M_NOWAIT | M_ZERO); if (p == NULL) return ENOMEM; off = 0; MESH_RT_LOCK(ms); TAILQ_FOREACH(rt, &ms->ms_routes, rt_next) { if (off >= len) break; imr = (struct ieee80211req_mesh_route *) (p + off); imr->imr_flags = rt->rt_flags; IEEE80211_ADDR_COPY(imr->imr_dest, rt->rt_dest); IEEE80211_ADDR_COPY(imr->imr_nexthop, rt->rt_nexthop); imr->imr_metric = rt->rt_metric; imr->imr_nhops = rt->rt_nhops; imr->imr_lifetime = rt->rt_lifetime; imr->imr_lastmseq = rt->rt_lastmseq; off += sizeof(*imr); } MESH_RT_UNLOCK(ms); error = copyout(p, (uint8_t *)ireq->i_data, ireq->i_len); free(p, M_TEMP); break; case IEEE80211_MESH_RTCMD_FLUSH: case IEEE80211_MESH_RTCMD_ADD: case IEEE80211_MESH_RTCMD_DELETE: return EINVAL; default: return ENOSYS; } break; case IEEE80211_IOC_MESH_PR_METRIC: len = strlen(ms->ms_pmetric->mpm_descr); if (ireq->i_len < len) return EINVAL; ireq->i_len = len; error = copyout(ms->ms_pmetric->mpm_descr, (uint8_t *)ireq->i_data, len); break; case IEEE80211_IOC_MESH_PR_PATH: len = strlen(ms->ms_ppath->mpp_descr); if (ireq->i_len < len) return EINVAL; ireq->i_len = len; error = copyout(ms->ms_ppath->mpp_descr, (uint8_t *)ireq->i_data, len); break; default: return ENOSYS; } return error; } IEEE80211_IOCTL_GET(mesh, mesh_ioctl_get80211); static int mesh_ioctl_set80211(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_mesh_state *ms = vap->iv_mesh; uint8_t tmpmeshid[IEEE80211_NWID_LEN]; uint8_t tmpaddr[IEEE80211_ADDR_LEN]; char tmpproto[IEEE80211_MESH_PROTO_DSZ]; int error; if (vap->iv_opmode != IEEE80211_M_MBSS) return ENOSYS; error = 0; switch (ireq->i_type) { case IEEE80211_IOC_MESH_ID: if (ireq->i_val != 0 || ireq->i_len > IEEE80211_MESHID_LEN) return EINVAL; error = copyin(ireq->i_data, tmpmeshid, ireq->i_len); if (error != 0) break; memset(ms->ms_id, 0, IEEE80211_NWID_LEN); ms->ms_idlen = ireq->i_len; memcpy(ms->ms_id, tmpmeshid, ireq->i_len); error = ENETRESET; break; case IEEE80211_IOC_MESH_AP: if (ireq->i_val) ms->ms_flags |= IEEE80211_MESHFLAGS_AP; else ms->ms_flags &= ~IEEE80211_MESHFLAGS_AP; error = ENETRESET; break; case IEEE80211_IOC_MESH_FWRD: if (ireq->i_val) ms->ms_flags |= IEEE80211_MESHFLAGS_FWD; else ms->ms_flags &= ~IEEE80211_MESHFLAGS_FWD; break; case IEEE80211_IOC_MESH_TTL: ms->ms_ttl = (uint8_t) ireq->i_val; break; case IEEE80211_IOC_MESH_RTCMD: switch (ireq->i_val) { case IEEE80211_MESH_RTCMD_LIST: return EINVAL; case IEEE80211_MESH_RTCMD_FLUSH: ieee80211_mesh_rt_flush(vap); break; case IEEE80211_MESH_RTCMD_ADD: if (IEEE80211_ADDR_EQ(vap->iv_myaddr, ireq->i_data) || IEEE80211_ADDR_EQ(broadcastaddr, ireq->i_data)) return EINVAL; error = copyin(ireq->i_data, &tmpaddr, IEEE80211_ADDR_LEN); if (error == 0) ieee80211_mesh_discover(vap, tmpaddr, NULL); break; case IEEE80211_MESH_RTCMD_DELETE: ieee80211_mesh_rt_del(vap, ireq->i_data); break; default: return ENOSYS; } break; case IEEE80211_IOC_MESH_PR_METRIC: error = copyin(ireq->i_data, tmpproto, sizeof(tmpproto)); if (error == 0) { error = mesh_select_proto_metric(vap, tmpproto); if (error == 0) error = ENETRESET; } break; case IEEE80211_IOC_MESH_PR_PATH: error = copyin(ireq->i_data, tmpproto, sizeof(tmpproto)); if (error == 0) { error = mesh_select_proto_path(vap, tmpproto); if (error == 0) error = ENETRESET; } break; default: return ENOSYS; } return error; } IEEE80211_IOCTL_SET(mesh, mesh_ioctl_set80211); Index: stable/9/sys/net80211/ieee80211_superg.c =================================================================== --- stable/9/sys/net80211/ieee80211_superg.c (revision 273911) +++ stable/9/sys/net80211/ieee80211_superg.c (revision 273912) @@ -1,900 +1,900 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Atheros fast-frame encapsulation format. * FF max payload: * 802.2 + FFHDR + HPAD + 802.3 + 802.2 + 1500 + SPAD + 802.3 + 802.2 + 1500: * 8 + 4 + 4 + 14 + 8 + 1500 + 6 + 14 + 8 + 1500 * = 3066 */ /* fast frame header is 32-bits */ #define ATH_FF_PROTO 0x0000003f /* protocol */ #define ATH_FF_PROTO_S 0 #define ATH_FF_FTYPE 0x000000c0 /* frame type */ #define ATH_FF_FTYPE_S 6 #define ATH_FF_HLEN32 0x00000300 /* optional hdr length */ #define ATH_FF_HLEN32_S 8 #define ATH_FF_SEQNUM 0x001ffc00 /* sequence number */ #define ATH_FF_SEQNUM_S 10 #define ATH_FF_OFFSET 0xffe00000 /* offset to 2nd payload */ #define ATH_FF_OFFSET_S 21 #define ATH_FF_MAX_HDR_PAD 4 #define ATH_FF_MAX_SEP_PAD 6 #define ATH_FF_MAX_HDR 30 #define ATH_FF_PROTO_L2TUNNEL 0 /* L2 tunnel protocol */ #define ATH_FF_ETH_TYPE 0x88bd /* Ether type for encapsulated frames */ #define ATH_FF_SNAP_ORGCODE_0 0x00 #define ATH_FF_SNAP_ORGCODE_1 0x03 #define ATH_FF_SNAP_ORGCODE_2 0x7f #define ATH_FF_TXQMIN 2 /* min txq depth for staging */ #define ATH_FF_TXQMAX 50 /* maximum # of queued frames allowed */ #define ATH_FF_STAGEMAX 5 /* max waiting period for staged frame*/ #define ETHER_HEADER_COPY(dst, src) \ memcpy(dst, src, sizeof(struct ether_header)) static int ieee80211_ffppsmin = 2; /* pps threshold for ff aggregation */ -SYSCTL_INT(_net_wlan, OID_AUTO, ffppsmin, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_INT(_net_wlan, OID_AUTO, ffppsmin, CTLFLAG_RW, &ieee80211_ffppsmin, 0, "min packet rate before fast-frame staging"); static int ieee80211_ffagemax = -1; /* max time frames held on stage q */ SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, CTLTYPE_INT | CTLFLAG_RW, &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I", "max hold time for fast-frame staging (ms)"); void ieee80211_superg_attach(struct ieee80211com *ic) { struct ieee80211_superg *sg; if (ic->ic_caps & IEEE80211_C_FF) { sg = (struct ieee80211_superg *) malloc( sizeof(struct ieee80211_superg), M_80211_VAP, M_NOWAIT | M_ZERO); if (sg == NULL) { printf("%s: cannot allocate SuperG state block\n", __func__); return; } ic->ic_superg = sg; } ieee80211_ffagemax = msecs_to_ticks(150); } void ieee80211_superg_detach(struct ieee80211com *ic) { if (ic->ic_superg != NULL) { free(ic->ic_superg, M_80211_VAP); ic->ic_superg = NULL; } } void ieee80211_superg_vattach(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; if (ic->ic_superg == NULL) /* NB: can't do fast-frames w/o state */ vap->iv_caps &= ~IEEE80211_C_FF; if (vap->iv_caps & IEEE80211_C_FF) vap->iv_flags |= IEEE80211_F_FF; /* NB: we only implement sta mode */ if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_caps & IEEE80211_C_TURBOP)) vap->iv_flags |= IEEE80211_F_TURBOP; } void ieee80211_superg_vdetach(struct ieee80211vap *vap) { } #define ATH_OUI_BYTES 0x00, 0x03, 0x7f /* * Add a WME information element to a frame. */ uint8_t * ieee80211_add_ath(uint8_t *frm, uint8_t caps, ieee80211_keyix defkeyix) { static const struct ieee80211_ath_ie info = { .ath_id = IEEE80211_ELEMID_VENDOR, .ath_len = sizeof(struct ieee80211_ath_ie) - 2, .ath_oui = { ATH_OUI_BYTES }, .ath_oui_type = ATH_OUI_TYPE, .ath_oui_subtype= ATH_OUI_SUBTYPE, .ath_version = ATH_OUI_VERSION, }; struct ieee80211_ath_ie *ath = (struct ieee80211_ath_ie *) frm; memcpy(frm, &info, sizeof(info)); ath->ath_capability = caps; if (defkeyix != IEEE80211_KEYIX_NONE) { ath->ath_defkeyix[0] = (defkeyix & 0xff); ath->ath_defkeyix[1] = ((defkeyix >> 8) & 0xff); } else { ath->ath_defkeyix[0] = 0xff; ath->ath_defkeyix[1] = 0x7f; } return frm + sizeof(info); } #undef ATH_OUI_BYTES uint8_t * ieee80211_add_athcaps(uint8_t *frm, const struct ieee80211_node *bss) { const struct ieee80211vap *vap = bss->ni_vap; return ieee80211_add_ath(frm, vap->iv_flags & IEEE80211_F_ATHEROS, ((vap->iv_flags & IEEE80211_F_WPA) == 0 && bss->ni_authmode != IEEE80211_AUTH_8021X) ? vap->iv_def_txkey : IEEE80211_KEYIX_NONE); } void ieee80211_parse_ath(struct ieee80211_node *ni, uint8_t *ie) { const struct ieee80211_ath_ie *ath = (const struct ieee80211_ath_ie *) ie; ni->ni_ath_flags = ath->ath_capability; ni->ni_ath_defkeyix = LE_READ_2(&ath->ath_defkeyix); } int ieee80211_parse_athparams(struct ieee80211_node *ni, uint8_t *frm, const struct ieee80211_frame *wh) { struct ieee80211vap *vap = ni->ni_vap; const struct ieee80211_ath_ie *ath; u_int len = frm[1]; int capschanged; uint16_t defkeyix; if (len < sizeof(struct ieee80211_ath_ie)-2) { IEEE80211_DISCARD_IE(vap, IEEE80211_MSG_ELEMID | IEEE80211_MSG_SUPERG, wh, "Atheros", "too short, len %u", len); return -1; } ath = (const struct ieee80211_ath_ie *)frm; capschanged = (ni->ni_ath_flags != ath->ath_capability); defkeyix = LE_READ_2(ath->ath_defkeyix); if (capschanged || defkeyix != ni->ni_ath_defkeyix) { ni->ni_ath_flags = ath->ath_capability; ni->ni_ath_defkeyix = defkeyix; IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni, "ath ie change: new caps 0x%x defkeyix 0x%x", ni->ni_ath_flags, ni->ni_ath_defkeyix); } if (IEEE80211_ATH_CAP(vap, ni, ATHEROS_CAP_TURBO_PRIME)) { uint16_t curflags, newflags; /* * Check for turbo mode switch. Calculate flags * for the new mode and effect the switch. */ newflags = curflags = vap->iv_ic->ic_bsschan->ic_flags; /* NB: BOOST is not in ic_flags, so get it from the ie */ if (ath->ath_capability & ATHEROS_CAP_BOOST) newflags |= IEEE80211_CHAN_TURBO; else newflags &= ~IEEE80211_CHAN_TURBO; if (newflags != curflags) ieee80211_dturbo_switch(vap, newflags); } return capschanged; } /* * Decap the encapsulated frame pair and dispatch the first * for delivery. The second frame is returned for delivery * via the normal path. */ struct mbuf * ieee80211_ff_decap(struct ieee80211_node *ni, struct mbuf *m) { #define FF_LLC_SIZE (sizeof(struct ether_header) + sizeof(struct llc)) #define MS(x,f) (((x) & f) >> f##_S) struct ieee80211vap *vap = ni->ni_vap; struct llc *llc; uint32_t ath; struct mbuf *n; int framelen; /* NB: we assume caller does this check for us */ KASSERT(IEEE80211_ATH_CAP(vap, ni, IEEE80211_NODE_FF), ("ff not negotiated")); /* * Check for fast-frame tunnel encapsulation. */ if (m->m_pkthdr.len < 3*FF_LLC_SIZE) return m; if (m->m_len < FF_LLC_SIZE && (m = m_pullup(m, FF_LLC_SIZE)) == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "fast-frame", "%s", "m_pullup(llc) failed"); vap->iv_stats.is_rx_tooshort++; return NULL; } llc = (struct llc *)(mtod(m, uint8_t *) + sizeof(struct ether_header)); if (llc->llc_snap.ether_type != htons(ATH_FF_ETH_TYPE)) return m; m_adj(m, FF_LLC_SIZE); m_copydata(m, 0, sizeof(uint32_t), (caddr_t) &ath); if (MS(ath, ATH_FF_PROTO) != ATH_FF_PROTO_L2TUNNEL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "fast-frame", "unsupport tunnel protocol, header 0x%x", ath); vap->iv_stats.is_ff_badhdr++; m_freem(m); return NULL; } /* NB: skip header and alignment padding */ m_adj(m, roundup(sizeof(uint32_t) - 2, 4) + 2); vap->iv_stats.is_ff_decap++; /* * Decap the first frame, bust it apart from the * second and deliver; then decap the second frame * and return it to the caller for normal delivery. */ m = ieee80211_decap1(m, &framelen); if (m == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "fast-frame", "%s", "first decap failed"); vap->iv_stats.is_ff_tooshort++; return NULL; } n = m_split(m, framelen, M_NOWAIT); if (n == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "fast-frame", "%s", "unable to split encapsulated frames"); vap->iv_stats.is_ff_split++; m_freem(m); /* NB: must reclaim */ return NULL; } /* XXX not right for WDS */ vap->iv_deliver_data(vap, ni, m); /* 1st of pair */ /* * Decap second frame. */ m_adj(n, roundup2(framelen, 4) - framelen); /* padding */ n = ieee80211_decap1(n, &framelen); if (n == NULL) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY, ni->ni_macaddr, "fast-frame", "%s", "second decap failed"); vap->iv_stats.is_ff_tooshort++; } /* XXX verify framelen against mbuf contents */ return n; /* 2nd delivered by caller */ #undef MS #undef FF_LLC_SIZE } /* * Do Ethernet-LLC encapsulation for each payload in a fast frame * tunnel encapsulation. The frame is assumed to have an Ethernet * header at the front that must be stripped before prepending the * LLC followed by the Ethernet header passed in (with an Ethernet * type that specifies the payload size). */ static struct mbuf * ff_encap1(struct ieee80211vap *vap, struct mbuf *m, const struct ether_header *eh) { struct llc *llc; uint16_t payload; /* XXX optimize by combining m_adj+M_PREPEND */ m_adj(m, sizeof(struct ether_header) - sizeof(struct llc)); llc = mtod(m, struct llc *); llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP; llc->llc_control = LLC_UI; llc->llc_snap.org_code[0] = 0; llc->llc_snap.org_code[1] = 0; llc->llc_snap.org_code[2] = 0; llc->llc_snap.ether_type = eh->ether_type; payload = m->m_pkthdr.len; /* NB: w/o Ethernet header */ M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT); if (m == NULL) { /* XXX cannot happen */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: no space for ether_header\n", __func__); vap->iv_stats.is_tx_nobuf++; return NULL; } ETHER_HEADER_COPY(mtod(m, void *), eh); mtod(m, struct ether_header *)->ether_type = htons(payload); return m; } /* * Fast frame encapsulation. There must be two packets * chained with m_nextpkt. We do header adjustment for * each, add the tunnel encapsulation, and then concatenate * the mbuf chains to form a single frame for transmission. */ struct mbuf * ieee80211_ff_encap(struct ieee80211vap *vap, struct mbuf *m1, int hdrspace, struct ieee80211_key *key) { struct mbuf *m2; struct ether_header eh1, eh2; struct llc *llc; struct mbuf *m; int pad; m2 = m1->m_nextpkt; if (m2 == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: only one frame\n", __func__); goto bad; } m1->m_nextpkt = NULL; /* * Include fast frame headers in adjusting header layout. */ KASSERT(m1->m_len >= sizeof(eh1), ("no ethernet header!")); ETHER_HEADER_COPY(&eh1, mtod(m1, caddr_t)); m1 = ieee80211_mbuf_adjust(vap, hdrspace + sizeof(struct llc) + sizeof(uint32_t) + 2 + sizeof(struct ether_header), key, m1); if (m1 == NULL) { /* NB: ieee80211_mbuf_adjust handles msgs+statistics */ m_freem(m2); goto bad; } /* * Copy second frame's Ethernet header out of line * and adjust for encapsulation headers. Note that * we make room for padding in case there isn't room * at the end of first frame. */ KASSERT(m2->m_len >= sizeof(eh2), ("no ethernet header!")); ETHER_HEADER_COPY(&eh2, mtod(m2, caddr_t)); m2 = ieee80211_mbuf_adjust(vap, ATH_FF_MAX_HDR_PAD + sizeof(struct ether_header), NULL, m2); if (m2 == NULL) { /* NB: ieee80211_mbuf_adjust handles msgs+statistics */ goto bad; } /* * Now do tunnel encapsulation. First, each * frame gets a standard encapsulation. */ m1 = ff_encap1(vap, m1, &eh1); if (m1 == NULL) goto bad; m2 = ff_encap1(vap, m2, &eh2); if (m2 == NULL) goto bad; /* * Pad leading frame to a 4-byte boundary. If there * is space at the end of the first frame, put it * there; otherwise prepend to the front of the second * frame. We know doing the second will always work * because we reserve space above. We prefer appending * as this typically has better DMA alignment properties. */ for (m = m1; m->m_next != NULL; m = m->m_next) ; pad = roundup2(m1->m_pkthdr.len, 4) - m1->m_pkthdr.len; if (pad) { if (M_TRAILINGSPACE(m) < pad) { /* prepend to second */ m2->m_data -= pad; m2->m_len += pad; m2->m_pkthdr.len += pad; } else { /* append to first */ m->m_len += pad; m1->m_pkthdr.len += pad; } } /* * Now, stick 'em together and prepend the tunnel headers; * first the Atheros tunnel header (all zero for now) and * then a special fast frame LLC. * * XXX optimize by prepending together */ m->m_next = m2; /* NB: last mbuf from above */ m1->m_pkthdr.len += m2->m_pkthdr.len; M_PREPEND(m1, sizeof(uint32_t)+2, M_DONTWAIT); if (m1 == NULL) { /* XXX cannot happen */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: no space for tunnel header\n", __func__); vap->iv_stats.is_tx_nobuf++; return NULL; } memset(mtod(m1, void *), 0, sizeof(uint32_t)+2); M_PREPEND(m1, sizeof(struct llc), M_DONTWAIT); if (m1 == NULL) { /* XXX cannot happen */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: no space for llc header\n", __func__); vap->iv_stats.is_tx_nobuf++; return NULL; } llc = mtod(m1, struct llc *); llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP; llc->llc_control = LLC_UI; llc->llc_snap.org_code[0] = ATH_FF_SNAP_ORGCODE_0; llc->llc_snap.org_code[1] = ATH_FF_SNAP_ORGCODE_1; llc->llc_snap.org_code[2] = ATH_FF_SNAP_ORGCODE_2; llc->llc_snap.ether_type = htons(ATH_FF_ETH_TYPE); vap->iv_stats.is_ff_encap++; return m1; bad: if (m1 != NULL) m_freem(m1); if (m2 != NULL) m_freem(m2); return NULL; } static void ff_transmit(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211vap *vap = ni->ni_vap; int error; /* encap and xmit */ m = ieee80211_encap(vap, ni, m); if (m != NULL) { struct ifnet *ifp = vap->iv_ifp; struct ifnet *parent = ni->ni_ic->ic_ifp; error = parent->if_transmit(parent, m); if (error != 0) { /* NB: IFQ_HANDOFF reclaims mbuf */ ieee80211_free_node(ni); } else { ifp->if_opackets++; } } else ieee80211_free_node(ni); } /* * Flush frames to device; note we re-use the linked list * the frames were stored on and use the sentinel (unchanged) * which may be non-NULL. */ static void ff_flush(struct mbuf *head, struct mbuf *last) { struct mbuf *m, *next; struct ieee80211_node *ni; struct ieee80211vap *vap; for (m = head; m != last; m = next) { next = m->m_nextpkt; m->m_nextpkt = NULL; ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; vap = ni->ni_vap; IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni, "%s: flush frame, age %u", __func__, M_AGE_GET(m)); vap->iv_stats.is_ff_flush++; ff_transmit(ni, m); } } /* * Age frames on the staging queue. */ void ieee80211_ff_age(struct ieee80211com *ic, struct ieee80211_stageq *sq, int quanta) { struct ieee80211_superg *sg = ic->ic_superg; struct mbuf *m, *head; struct ieee80211_node *ni; struct ieee80211_tx_ampdu *tap; KASSERT(sq->head != NULL, ("stageq empty")); IEEE80211_LOCK(ic); head = sq->head; while ((m = sq->head) != NULL && M_AGE_GET(m) < quanta) { /* clear tap ref to frame */ ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; tap = &ni->ni_tx_ampdu[M_WME_GETAC(m)]; KASSERT(tap->txa_private == m, ("staging queue empty")); tap->txa_private = NULL; sq->head = m->m_nextpkt; sq->depth--; sg->ff_stageqdepth--; } if (m == NULL) sq->tail = NULL; else M_AGE_SUB(m, quanta); IEEE80211_UNLOCK(ic); ff_flush(head, m); } static void stageq_add(struct ieee80211_stageq *sq, struct mbuf *m) { int age = ieee80211_ffagemax; if (sq->tail != NULL) { sq->tail->m_nextpkt = m; age -= M_AGE_GET(sq->head); } else sq->head = m; KASSERT(age >= 0, ("age %d", age)); M_AGE_SET(m, age); m->m_nextpkt = NULL; sq->tail = m; sq->depth++; } static void stageq_remove(struct ieee80211_stageq *sq, struct mbuf *mstaged) { struct mbuf *m, *mprev; mprev = NULL; for (m = sq->head; m != NULL; m = m->m_nextpkt) { if (m == mstaged) { if (mprev == NULL) sq->head = m->m_nextpkt; else mprev->m_nextpkt = m->m_nextpkt; if (sq->tail == m) sq->tail = mprev; sq->depth--; return; } mprev = m; } printf("%s: packet not found\n", __func__); } static uint32_t ff_approx_txtime(struct ieee80211_node *ni, const struct mbuf *m1, const struct mbuf *m2) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; uint32_t framelen; /* * Approximate the frame length to be transmitted. A swag to add * the following maximal values to the skb payload: * - 32: 802.11 encap + CRC * - 24: encryption overhead (if wep bit) * - 4 + 6: fast-frame header and padding * - 16: 2 LLC FF tunnel headers * - 14: 1 802.3 FF tunnel header (mbuf already accounts for 2nd) */ framelen = m1->m_pkthdr.len + 32 + ATH_FF_MAX_HDR_PAD + ATH_FF_MAX_SEP_PAD + ATH_FF_MAX_HDR; if (vap->iv_flags & IEEE80211_F_PRIVACY) framelen += 24; if (m2 != NULL) framelen += m2->m_pkthdr.len; return ieee80211_compute_duration(ic->ic_rt, framelen, ni->ni_txrate, 0); } /* * Check if the supplied frame can be partnered with an existing * or pending frame. Return a reference to any frame that should be * sent on return; otherwise return NULL. */ struct mbuf * ieee80211_ff_check(struct ieee80211_node *ni, struct mbuf *m) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_superg *sg = ic->ic_superg; const int pri = M_WME_GETAC(m); struct ieee80211_stageq *sq; struct ieee80211_tx_ampdu *tap; struct mbuf *mstaged; uint32_t txtime, limit; /* * Check if the supplied frame can be aggregated. * * NB: we allow EAPOL frames to be aggregated with other ucast traffic. * Do 802.1x EAPOL frames proceed in the clear? Then they couldn't * be aggregated with other types of frames when encryption is on? */ IEEE80211_LOCK(ic); tap = &ni->ni_tx_ampdu[pri]; mstaged = tap->txa_private; /* NB: we reuse AMPDU state */ ieee80211_txampdu_count_packet(tap); /* * When not in station mode never aggregate a multicast * frame; this insures, for example, that a combined frame * does not require multiple encryption keys. */ if (vap->iv_opmode != IEEE80211_M_STA && ETHER_IS_MULTICAST(mtod(m, struct ether_header *)->ether_dhost)) { /* XXX flush staged frame? */ IEEE80211_UNLOCK(ic); return m; } /* * If there is no frame to combine with and the pps is * too low; then do not attempt to aggregate this frame. */ if (mstaged == NULL && ieee80211_txampdu_getpps(tap) < ieee80211_ffppsmin) { IEEE80211_UNLOCK(ic); return m; } sq = &sg->ff_stageq[pri]; /* * Check the txop limit to insure the aggregate fits. */ limit = IEEE80211_TXOP_TO_US( ic->ic_wme.wme_chanParams.cap_wmeParams[pri].wmep_txopLimit); if (limit != 0 && (txtime = ff_approx_txtime(ni, m, mstaged)) > limit) { /* * Aggregate too long, return to the caller for direct * transmission. In addition, flush any pending frame * before sending this one. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: txtime %u exceeds txop limit %u\n", __func__, txtime, limit); tap->txa_private = NULL; if (mstaged != NULL) stageq_remove(sq, mstaged); IEEE80211_UNLOCK(ic); if (mstaged != NULL) { IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni, "%s: flush staged frame", __func__); /* encap and xmit */ ff_transmit(ni, mstaged); } return m; /* NB: original frame */ } /* * An aggregation candidate. If there's a frame to partner * with then combine and return for processing. Otherwise * save this frame and wait for a partner to show up (or * the frame to be flushed). Note that staged frames also * hold their node reference. */ if (mstaged != NULL) { tap->txa_private = NULL; stageq_remove(sq, mstaged); IEEE80211_UNLOCK(ic); IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni, "%s: aggregate fast-frame", __func__); /* * Release the node reference; we only need * the one already in mstaged. */ KASSERT(mstaged->m_pkthdr.rcvif == (void *)ni, ("rcvif %p ni %p", mstaged->m_pkthdr.rcvif, ni)); ieee80211_free_node(ni); m->m_nextpkt = NULL; mstaged->m_nextpkt = m; mstaged->m_flags |= M_FF; /* NB: mark for encap work */ } else { KASSERT(tap->txa_private == NULL, ("txa_private %p", tap->txa_private)); tap->txa_private = m; stageq_add(sq, m); sg->ff_stageqdepth++; IEEE80211_UNLOCK(ic); IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni, "%s: stage frame, %u queued", __func__, sq->depth); /* NB: mstaged is NULL */ } return mstaged; } void ieee80211_ff_node_init(struct ieee80211_node *ni) { /* * Clean FF state on re-associate. This handles the case * where a station leaves w/o notifying us and then returns * before node is reaped for inactivity. */ ieee80211_ff_node_cleanup(ni); } void ieee80211_ff_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct ieee80211_superg *sg = ic->ic_superg; struct ieee80211_tx_ampdu *tap; struct mbuf *m, *head; int ac; IEEE80211_LOCK(ic); head = NULL; for (ac = 0; ac < WME_NUM_AC; ac++) { tap = &ni->ni_tx_ampdu[ac]; m = tap->txa_private; if (m != NULL) { tap->txa_private = NULL; stageq_remove(&sg->ff_stageq[ac], m); m->m_nextpkt = head; head = m; } } IEEE80211_UNLOCK(ic); for (m = head; m != NULL; m = m->m_nextpkt) { m_freem(m); ieee80211_free_node(ni); } } /* * Switch between turbo and non-turbo operating modes. * Use the specified channel flags to locate the new * channel, update 802.11 state, and then call back into * the driver to effect the change. */ void ieee80211_dturbo_switch(struct ieee80211vap *vap, int newflags) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel *chan; chan = ieee80211_find_channel(ic, ic->ic_bsschan->ic_freq, newflags); if (chan == NULL) { /* XXX should not happen */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: no channel with freq %u flags 0x%x\n", __func__, ic->ic_bsschan->ic_freq, newflags); return; } IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: %s -> %s (freq %u flags 0x%x)\n", __func__, ieee80211_phymode_name[ieee80211_chan2mode(ic->ic_bsschan)], ieee80211_phymode_name[ieee80211_chan2mode(chan)], chan->ic_freq, chan->ic_flags); ic->ic_bsschan = chan; ic->ic_prevchan = ic->ic_curchan; ic->ic_curchan = chan; ic->ic_rt = ieee80211_get_ratetable(chan); ic->ic_set_channel(ic); ieee80211_radiotap_chan_change(ic); /* NB: do not need to reset ERP state 'cuz we're in sta mode */ } /* * Return the current ``state'' of an Atheros capbility. * If associated in station mode report the negotiated * setting. Otherwise report the current setting. */ static int getathcap(struct ieee80211vap *vap, int cap) { if (vap->iv_opmode == IEEE80211_M_STA && vap->iv_state == IEEE80211_S_RUN) return IEEE80211_ATH_CAP(vap, vap->iv_bss, cap) != 0; else return (vap->iv_flags & cap) != 0; } static int superg_ioctl_get80211(struct ieee80211vap *vap, struct ieee80211req *ireq) { switch (ireq->i_type) { case IEEE80211_IOC_FF: ireq->i_val = getathcap(vap, IEEE80211_F_FF); break; case IEEE80211_IOC_TURBOP: ireq->i_val = getathcap(vap, IEEE80211_F_TURBOP); break; default: return ENOSYS; } return 0; } IEEE80211_IOCTL_GET(superg, superg_ioctl_get80211); static int superg_ioctl_set80211(struct ieee80211vap *vap, struct ieee80211req *ireq) { switch (ireq->i_type) { case IEEE80211_IOC_FF: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_FF) == 0) return EOPNOTSUPP; vap->iv_flags |= IEEE80211_F_FF; } else vap->iv_flags &= ~IEEE80211_F_FF; return ENETRESET; case IEEE80211_IOC_TURBOP: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_TURBOP) == 0) return EOPNOTSUPP; vap->iv_flags |= IEEE80211_F_TURBOP; } else vap->iv_flags &= ~IEEE80211_F_TURBOP; return ENETRESET; default: return ENOSYS; } return 0; } IEEE80211_IOCTL_SET(superg, superg_ioctl_set80211); Index: stable/9/sys/netgraph/bluetooth/common/ng_bluetooth.c =================================================================== --- stable/9/sys/netgraph/bluetooth/common/ng_bluetooth.c (revision 273911) +++ stable/9/sys/netgraph/bluetooth/common/ng_bluetooth.c (revision 273912) @@ -1,291 +1,291 @@ /* * bluetooth.c */ /*- * Copyright (c) 2001-2002 Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: ng_bluetooth.c,v 1.3 2003/04/26 22:37:31 max Exp $ * $FreeBSD$ */ #include #include #include #include #include #include #include /* * Bluetooth stack sysctl globals */ static u_int32_t bluetooth_hci_command_timeout_value = 5; /* sec */ static u_int32_t bluetooth_hci_connect_timeout_value = 60; /* sec */ static u_int32_t bluetooth_hci_max_neighbor_age_value = 600; /* sec */ static u_int32_t bluetooth_l2cap_rtx_timeout_value = 60; /* sec */ static u_int32_t bluetooth_l2cap_ertx_timeout_value = 300; /* sec */ static u_int32_t bluetooth_sco_rtx_timeout_value = 60; /* sec */ /* * Define sysctl tree that shared by other parts of Bluetooth stack */ SYSCTL_NODE(_net, OID_AUTO, bluetooth, CTLFLAG_RW, 0, "Bluetooth family"); SYSCTL_INT(_net_bluetooth, OID_AUTO, version, - CTLFLAG_RD, 0, NG_BLUETOOTH_VERSION, "Version of the stack"); + CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_BLUETOOTH_VERSION, "Version of the stack"); /* * HCI */ SYSCTL_NODE(_net_bluetooth, OID_AUTO, hci, CTLFLAG_RW, 0, "Bluetooth HCI family"); static int bluetooth_set_hci_command_timeout_value(SYSCTL_HANDLER_ARGS) { u_int32_t value; int error; value = bluetooth_hci_command_timeout_value; error = sysctl_handle_int(oidp, &value, 0, req); if (error == 0 && req->newptr != NULL) { if (value > 0) bluetooth_hci_command_timeout_value = value; else error = EINVAL; } return (error); } /* bluetooth_set_hci_command_timeout_value */ SYSCTL_PROC(_net_bluetooth_hci, OID_AUTO, command_timeout, CTLTYPE_INT | CTLFLAG_RW, &bluetooth_hci_command_timeout_value, 5, bluetooth_set_hci_command_timeout_value, "I", "HCI command timeout (sec)"); static int bluetooth_set_hci_connect_timeout_value(SYSCTL_HANDLER_ARGS) { u_int32_t value; int error; value = bluetooth_hci_connect_timeout_value; error = sysctl_handle_int(oidp, &value, 0, req); if (error == 0 && req->newptr != NULL) { if (0 < value && value <= bluetooth_l2cap_rtx_timeout_value) bluetooth_hci_connect_timeout_value = value; else error = EINVAL; } return (error); } /* bluetooth_set_hci_connect_timeout_value */ SYSCTL_PROC(_net_bluetooth_hci, OID_AUTO, connection_timeout, CTLTYPE_INT | CTLFLAG_RW, &bluetooth_hci_connect_timeout_value, 60, bluetooth_set_hci_connect_timeout_value, "I", "HCI connect timeout (sec)"); SYSCTL_UINT(_net_bluetooth_hci, OID_AUTO, max_neighbor_age, CTLFLAG_RW, &bluetooth_hci_max_neighbor_age_value, 600, "Maximal HCI neighbor cache entry age (sec)"); /* * L2CAP */ SYSCTL_NODE(_net_bluetooth, OID_AUTO, l2cap, CTLFLAG_RW, 0, "Bluetooth L2CAP family"); static int bluetooth_set_l2cap_rtx_timeout_value(SYSCTL_HANDLER_ARGS) { u_int32_t value; int error; value = bluetooth_l2cap_rtx_timeout_value; error = sysctl_handle_int(oidp, &value, 0, req); if (error == 0 && req->newptr != NULL) { if (bluetooth_hci_connect_timeout_value <= value && value <= bluetooth_l2cap_ertx_timeout_value) bluetooth_l2cap_rtx_timeout_value = value; else error = EINVAL; } return (error); } /* bluetooth_set_l2cap_rtx_timeout_value */ SYSCTL_PROC(_net_bluetooth_l2cap, OID_AUTO, rtx_timeout, CTLTYPE_INT | CTLFLAG_RW, &bluetooth_l2cap_rtx_timeout_value, 60, bluetooth_set_l2cap_rtx_timeout_value, "I", "L2CAP RTX timeout (sec)"); static int bluetooth_set_l2cap_ertx_timeout_value(SYSCTL_HANDLER_ARGS) { u_int32_t value; int error; value = bluetooth_l2cap_ertx_timeout_value; error = sysctl_handle_int(oidp, &value, 0, req); if (error == 0 && req->newptr != NULL) { if (value >= bluetooth_l2cap_rtx_timeout_value) bluetooth_l2cap_ertx_timeout_value = value; else error = EINVAL; } return (error); } /* bluetooth_set_l2cap_ertx_timeout_value */ SYSCTL_PROC(_net_bluetooth_l2cap, OID_AUTO, ertx_timeout, CTLTYPE_INT | CTLFLAG_RW, &bluetooth_l2cap_ertx_timeout_value, 300, bluetooth_set_l2cap_ertx_timeout_value, "I", "L2CAP ERTX timeout (sec)"); /* * Return various sysctl values */ u_int32_t bluetooth_hci_command_timeout(void) { return (bluetooth_hci_command_timeout_value * hz); } /* bluetooth_hci_command_timeout */ u_int32_t bluetooth_hci_connect_timeout(void) { return (bluetooth_hci_connect_timeout_value * hz); } /* bluetooth_hci_connect_timeout */ u_int32_t bluetooth_hci_max_neighbor_age(void) { return (bluetooth_hci_max_neighbor_age_value); } /* bluetooth_hci_max_neighbor_age */ u_int32_t bluetooth_l2cap_rtx_timeout(void) { return (bluetooth_l2cap_rtx_timeout_value * hz); } /* bluetooth_l2cap_rtx_timeout */ u_int32_t bluetooth_l2cap_ertx_timeout(void) { return (bluetooth_l2cap_ertx_timeout_value * hz); } /* bluetooth_l2cap_ertx_timeout */ u_int32_t bluetooth_sco_rtx_timeout(void) { return (bluetooth_sco_rtx_timeout_value * hz); } /* bluetooth_sco_rtx_timeout */ /* * RFCOMM */ SYSCTL_NODE(_net_bluetooth, OID_AUTO, rfcomm, CTLFLAG_RW, 0, "Bluetooth RFCOMM family"); /* * SCO */ SYSCTL_NODE(_net_bluetooth, OID_AUTO, sco, CTLFLAG_RW, 0, "Bluetooth SCO family"); static int bluetooth_set_sco_rtx_timeout_value(SYSCTL_HANDLER_ARGS) { u_int32_t value; int error; value = bluetooth_sco_rtx_timeout_value; error = sysctl_handle_int(oidp, &value, 0, req); if (error == 0 && req->newptr != NULL) { if (bluetooth_hci_connect_timeout_value <= value) bluetooth_sco_rtx_timeout_value = value; else error = EINVAL; } return (error); } /* bluetooth_set_sco_rtx_timeout_value */ SYSCTL_PROC(_net_bluetooth_sco, OID_AUTO, rtx_timeout, CTLTYPE_INT | CTLFLAG_RW, &bluetooth_sco_rtx_timeout_value, 60, bluetooth_set_sco_rtx_timeout_value, "I", "SCO RTX timeout (sec)"); /* * Handle loading and unloading for this code. */ static int bluetooth_modevent(module_t mod, int event, void *data) { int error = 0; switch (event) { case MOD_LOAD: break; case MOD_UNLOAD: break; default: error = EOPNOTSUPP; break; } return (error); } /* bluetooth_modevent */ /* * Module */ static moduledata_t bluetooth_mod = { "ng_bluetooth", bluetooth_modevent, NULL }; DECLARE_MODULE(ng_bluetooth, bluetooth_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(ng_bluetooth, NG_BLUETOOTH_VERSION); Index: stable/9/sys/netgraph/ng_base.c =================================================================== --- stable/9/sys/netgraph/ng_base.c (revision 273911) +++ stable/9/sys/netgraph/ng_base.c (revision 273912) @@ -1,3848 +1,3848 @@ /*- * Copyright (c) 1996-1999 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Authors: Julian Elischer * Archie Cobbs * * $FreeBSD$ * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $ */ /* * This file implements the base netgraph code. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(netgraph, NG_ABI_VERSION); /* Mutex to protect topology events. */ static struct rwlock ng_topo_lock; #define TOPOLOGY_RLOCK() rw_rlock(&ng_topo_lock) #define TOPOLOGY_RUNLOCK() rw_runlock(&ng_topo_lock) #define TOPOLOGY_WLOCK() rw_wlock(&ng_topo_lock) #define TOPOLOGY_WUNLOCK() rw_wunlock(&ng_topo_lock) #define TOPOLOGY_NOTOWNED() rw_assert(&ng_topo_lock, RA_UNLOCKED) #ifdef NETGRAPH_DEBUG static struct mtx ng_nodelist_mtx; /* protects global node/hook lists */ static struct mtx ngq_mtx; /* protects the queue item list */ static SLIST_HEAD(, ng_node) ng_allnodes; static LIST_HEAD(, ng_node) ng_freenodes; /* in debug, we never free() them */ static SLIST_HEAD(, ng_hook) ng_allhooks; static LIST_HEAD(, ng_hook) ng_freehooks; /* in debug, we never free() them */ static void ng_dumpitems(void); static void ng_dumpnodes(void); static void ng_dumphooks(void); #endif /* NETGRAPH_DEBUG */ /* * DEAD versions of the structures. * In order to avoid races, it is sometimes neccesary to point * at SOMETHING even though theoretically, the current entity is * INVALID. Use these to avoid these races. */ struct ng_type ng_deadtype = { NG_ABI_VERSION, "dead", NULL, /* modevent */ NULL, /* constructor */ NULL, /* rcvmsg */ NULL, /* shutdown */ NULL, /* newhook */ NULL, /* findhook */ NULL, /* connect */ NULL, /* rcvdata */ NULL, /* disconnect */ NULL, /* cmdlist */ }; struct ng_node ng_deadnode = { "dead", &ng_deadtype, NGF_INVALID, 0, /* numhooks */ NULL, /* private */ 0, /* ID */ LIST_HEAD_INITIALIZER(ng_deadnode.nd_hooks), {}, /* all_nodes list entry */ {}, /* id hashtable list entry */ { 0, 0, {}, /* should never use! (should hang) */ {}, /* workqueue entry */ STAILQ_HEAD_INITIALIZER(ng_deadnode.nd_input_queue.queue), }, 1, /* refs */ NULL, /* vnet */ #ifdef NETGRAPH_DEBUG ND_MAGIC, __FILE__, __LINE__, {NULL} #endif /* NETGRAPH_DEBUG */ }; struct ng_hook ng_deadhook = { "dead", NULL, /* private */ HK_INVALID | HK_DEAD, 0, /* undefined data link type */ &ng_deadhook, /* Peer is self */ &ng_deadnode, /* attached to deadnode */ {}, /* hooks list */ NULL, /* override rcvmsg() */ NULL, /* override rcvdata() */ 1, /* refs always >= 1 */ #ifdef NETGRAPH_DEBUG HK_MAGIC, __FILE__, __LINE__, {NULL} #endif /* NETGRAPH_DEBUG */ }; /* * END DEAD STRUCTURES */ /* List nodes with unallocated work */ static STAILQ_HEAD(, ng_node) ng_worklist = STAILQ_HEAD_INITIALIZER(ng_worklist); static struct mtx ng_worklist_mtx; /* MUST LOCK NODE FIRST */ /* List of installed types */ static LIST_HEAD(, ng_type) ng_typelist; static struct rwlock ng_typelist_lock; #define TYPELIST_RLOCK() rw_rlock(&ng_typelist_lock) #define TYPELIST_RUNLOCK() rw_runlock(&ng_typelist_lock) #define TYPELIST_WLOCK() rw_wlock(&ng_typelist_lock) #define TYPELIST_WUNLOCK() rw_wunlock(&ng_typelist_lock) /* Hash related definitions. */ LIST_HEAD(nodehash, ng_node); static VNET_DEFINE(struct nodehash *, ng_ID_hash); static VNET_DEFINE(u_long, ng_ID_hmask); static VNET_DEFINE(u_long, ng_nodes); static VNET_DEFINE(struct nodehash *, ng_name_hash); static VNET_DEFINE(u_long, ng_name_hmask); static VNET_DEFINE(u_long, ng_named_nodes); #define V_ng_ID_hash VNET(ng_ID_hash) #define V_ng_ID_hmask VNET(ng_ID_hmask) #define V_ng_nodes VNET(ng_nodes) #define V_ng_name_hash VNET(ng_name_hash) #define V_ng_name_hmask VNET(ng_name_hmask) #define V_ng_named_nodes VNET(ng_named_nodes) static struct rwlock ng_idhash_lock; #define IDHASH_RLOCK() rw_rlock(&ng_idhash_lock) #define IDHASH_RUNLOCK() rw_runlock(&ng_idhash_lock) #define IDHASH_WLOCK() rw_wlock(&ng_idhash_lock) #define IDHASH_WUNLOCK() rw_wunlock(&ng_idhash_lock) /* Method to find a node.. used twice so do it here */ #define NG_IDHASH_FN(ID) ((ID) % (V_ng_ID_hmask + 1)) #define NG_IDHASH_FIND(ID, node) \ do { \ rw_assert(&ng_idhash_lock, RA_LOCKED); \ LIST_FOREACH(node, &V_ng_ID_hash[NG_IDHASH_FN(ID)], \ nd_idnodes) { \ if (NG_NODE_IS_VALID(node) \ && (NG_NODE_ID(node) == ID)) { \ break; \ } \ } \ } while (0) static struct rwlock ng_namehash_lock; #define NAMEHASH_RLOCK() rw_rlock(&ng_namehash_lock) #define NAMEHASH_RUNLOCK() rw_runlock(&ng_namehash_lock) #define NAMEHASH_WLOCK() rw_wlock(&ng_namehash_lock) #define NAMEHASH_WUNLOCK() rw_wunlock(&ng_namehash_lock) /* Internal functions */ static int ng_add_hook(node_p node, const char *name, hook_p * hookp); static int ng_generic_msg(node_p here, item_p item, hook_p lasthook); static ng_ID_t ng_decodeidname(const char *name); static int ngb_mod_event(module_t mod, int event, void *data); static void ng_worklist_add(node_p node); static void ngthread(void *); static int ng_apply_item(node_p node, item_p item, int rw); static void ng_flush_input_queue(node_p node); static node_p ng_ID2noderef(ng_ID_t ID); static int ng_con_nodes(item_p item, node_p node, const char *name, node_p node2, const char *name2); static int ng_con_part2(node_p node, item_p item, hook_p hook); static int ng_con_part3(node_p node, item_p item, hook_p hook); static int ng_mkpeer(node_p node, const char *name, const char *name2, char *type); static void ng_name_rehash(void); static void ng_ID_rehash(void); /* Imported, these used to be externally visible, some may go back. */ void ng_destroy_hook(hook_p hook); int ng_path2noderef(node_p here, const char *path, node_p *dest, hook_p *lasthook); int ng_make_node(const char *type, node_p *nodepp); int ng_path_parse(char *addr, char **node, char **path, char **hook); void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3); void ng_unname(node_p node); /* Our own netgraph malloc type */ MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages"); MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage"); static MALLOC_DEFINE(M_NETGRAPH_HOOK, "netgraph_hook", "netgraph hook structures"); static MALLOC_DEFINE(M_NETGRAPH_NODE, "netgraph_node", "netgraph node structures"); static MALLOC_DEFINE(M_NETGRAPH_ITEM, "netgraph_item", "netgraph item structures"); /* Should not be visible outside this file */ #define _NG_ALLOC_HOOK(hook) \ hook = malloc(sizeof(*hook), M_NETGRAPH_HOOK, M_NOWAIT | M_ZERO) #define _NG_ALLOC_NODE(node) \ node = malloc(sizeof(*node), M_NETGRAPH_NODE, M_NOWAIT | M_ZERO) #define NG_QUEUE_LOCK_INIT(n) \ mtx_init(&(n)->q_mtx, "ng_node", NULL, MTX_DEF) #define NG_QUEUE_LOCK(n) \ mtx_lock(&(n)->q_mtx) #define NG_QUEUE_UNLOCK(n) \ mtx_unlock(&(n)->q_mtx) #define NG_WORKLIST_LOCK_INIT() \ mtx_init(&ng_worklist_mtx, "ng_worklist", NULL, MTX_DEF) #define NG_WORKLIST_LOCK() \ mtx_lock(&ng_worklist_mtx) #define NG_WORKLIST_UNLOCK() \ mtx_unlock(&ng_worklist_mtx) #define NG_WORKLIST_SLEEP() \ mtx_sleep(&ng_worklist, &ng_worklist_mtx, PI_NET, "sleep", 0) #define NG_WORKLIST_WAKEUP() \ wakeup_one(&ng_worklist) #ifdef NETGRAPH_DEBUG /*----------------------------------------------*/ /* * In debug mode: * In an attempt to help track reference count screwups * we do not free objects back to the malloc system, but keep them * in a local cache where we can examine them and keep information safely * after they have been freed. * We use this scheme for nodes and hooks, and to some extent for items. */ static __inline hook_p ng_alloc_hook(void) { hook_p hook; SLIST_ENTRY(ng_hook) temp; mtx_lock(&ng_nodelist_mtx); hook = LIST_FIRST(&ng_freehooks); if (hook) { LIST_REMOVE(hook, hk_hooks); bcopy(&hook->hk_all, &temp, sizeof(temp)); bzero(hook, sizeof(struct ng_hook)); bcopy(&temp, &hook->hk_all, sizeof(temp)); mtx_unlock(&ng_nodelist_mtx); hook->hk_magic = HK_MAGIC; } else { mtx_unlock(&ng_nodelist_mtx); _NG_ALLOC_HOOK(hook); if (hook) { hook->hk_magic = HK_MAGIC; mtx_lock(&ng_nodelist_mtx); SLIST_INSERT_HEAD(&ng_allhooks, hook, hk_all); mtx_unlock(&ng_nodelist_mtx); } } return (hook); } static __inline node_p ng_alloc_node(void) { node_p node; SLIST_ENTRY(ng_node) temp; mtx_lock(&ng_nodelist_mtx); node = LIST_FIRST(&ng_freenodes); if (node) { LIST_REMOVE(node, nd_nodes); bcopy(&node->nd_all, &temp, sizeof(temp)); bzero(node, sizeof(struct ng_node)); bcopy(&temp, &node->nd_all, sizeof(temp)); mtx_unlock(&ng_nodelist_mtx); node->nd_magic = ND_MAGIC; } else { mtx_unlock(&ng_nodelist_mtx); _NG_ALLOC_NODE(node); if (node) { node->nd_magic = ND_MAGIC; mtx_lock(&ng_nodelist_mtx); SLIST_INSERT_HEAD(&ng_allnodes, node, nd_all); mtx_unlock(&ng_nodelist_mtx); } } return (node); } #define NG_ALLOC_HOOK(hook) do { (hook) = ng_alloc_hook(); } while (0) #define NG_ALLOC_NODE(node) do { (node) = ng_alloc_node(); } while (0) #define NG_FREE_HOOK(hook) \ do { \ mtx_lock(&ng_nodelist_mtx); \ LIST_INSERT_HEAD(&ng_freehooks, hook, hk_hooks); \ hook->hk_magic = 0; \ mtx_unlock(&ng_nodelist_mtx); \ } while (0) #define NG_FREE_NODE(node) \ do { \ mtx_lock(&ng_nodelist_mtx); \ LIST_INSERT_HEAD(&ng_freenodes, node, nd_nodes); \ node->nd_magic = 0; \ mtx_unlock(&ng_nodelist_mtx); \ } while (0) #else /* NETGRAPH_DEBUG */ /*----------------------------------------------*/ #define NG_ALLOC_HOOK(hook) _NG_ALLOC_HOOK(hook) #define NG_ALLOC_NODE(node) _NG_ALLOC_NODE(node) #define NG_FREE_HOOK(hook) do { free((hook), M_NETGRAPH_HOOK); } while (0) #define NG_FREE_NODE(node) do { free((node), M_NETGRAPH_NODE); } while (0) #endif /* NETGRAPH_DEBUG */ /*----------------------------------------------*/ /* Set this to kdb_enter("X") to catch all errors as they occur */ #ifndef TRAP_ERROR #define TRAP_ERROR() #endif static VNET_DEFINE(ng_ID_t, nextID) = 1; #define V_nextID VNET(nextID) #ifdef INVARIANTS #define CHECK_DATA_MBUF(m) do { \ struct mbuf *n; \ int total; \ \ M_ASSERTPKTHDR(m); \ for (total = 0, n = (m); n != NULL; n = n->m_next) { \ total += n->m_len; \ if (n->m_nextpkt != NULL) \ panic("%s: m_nextpkt", __func__); \ } \ \ if ((m)->m_pkthdr.len != total) { \ panic("%s: %d != %d", \ __func__, (m)->m_pkthdr.len, total); \ } \ } while (0) #else #define CHECK_DATA_MBUF(m) #endif #define ERROUT(x) do { error = (x); goto done; } while (0) /************************************************************************ Parse type definitions for generic messages ************************************************************************/ /* Handy structure parse type defining macro */ #define DEFINE_PARSE_STRUCT_TYPE(lo, up, args) \ static const struct ng_parse_struct_field \ ng_ ## lo ## _type_fields[] = NG_GENERIC_ ## up ## _INFO args; \ static const struct ng_parse_type ng_generic_ ## lo ## _type = { \ &ng_parse_struct_type, \ &ng_ ## lo ## _type_fields \ } DEFINE_PARSE_STRUCT_TYPE(mkpeer, MKPEER, ()); DEFINE_PARSE_STRUCT_TYPE(connect, CONNECT, ()); DEFINE_PARSE_STRUCT_TYPE(name, NAME, ()); DEFINE_PARSE_STRUCT_TYPE(rmhook, RMHOOK, ()); DEFINE_PARSE_STRUCT_TYPE(nodeinfo, NODEINFO, ()); DEFINE_PARSE_STRUCT_TYPE(typeinfo, TYPEINFO, ()); DEFINE_PARSE_STRUCT_TYPE(linkinfo, LINKINFO, (&ng_generic_nodeinfo_type)); /* Get length of an array when the length is stored as a 32 bit value immediately preceding the array -- as with struct namelist and struct typelist. */ static int ng_generic_list_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { return *((const u_int32_t *)(buf - 4)); } /* Get length of the array of struct linkinfo inside a struct hooklist */ static int ng_generic_linkinfo_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { const struct hooklist *hl = (const struct hooklist *)start; return hl->nodeinfo.hooks; } /* Array type for a variable length array of struct namelist */ static const struct ng_parse_array_info ng_nodeinfoarray_type_info = { &ng_generic_nodeinfo_type, &ng_generic_list_getLength }; static const struct ng_parse_type ng_generic_nodeinfoarray_type = { &ng_parse_array_type, &ng_nodeinfoarray_type_info }; /* Array type for a variable length array of struct typelist */ static const struct ng_parse_array_info ng_typeinfoarray_type_info = { &ng_generic_typeinfo_type, &ng_generic_list_getLength }; static const struct ng_parse_type ng_generic_typeinfoarray_type = { &ng_parse_array_type, &ng_typeinfoarray_type_info }; /* Array type for array of struct linkinfo in struct hooklist */ static const struct ng_parse_array_info ng_generic_linkinfo_array_type_info = { &ng_generic_linkinfo_type, &ng_generic_linkinfo_getLength }; static const struct ng_parse_type ng_generic_linkinfo_array_type = { &ng_parse_array_type, &ng_generic_linkinfo_array_type_info }; DEFINE_PARSE_STRUCT_TYPE(typelist, TYPELIST, (&ng_generic_typeinfoarray_type)); DEFINE_PARSE_STRUCT_TYPE(hooklist, HOOKLIST, (&ng_generic_nodeinfo_type, &ng_generic_linkinfo_array_type)); DEFINE_PARSE_STRUCT_TYPE(listnodes, LISTNODES, (&ng_generic_nodeinfoarray_type)); /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_generic_cmds[] = { { NGM_GENERIC_COOKIE, NGM_SHUTDOWN, "shutdown", NULL, NULL }, { NGM_GENERIC_COOKIE, NGM_MKPEER, "mkpeer", &ng_generic_mkpeer_type, NULL }, { NGM_GENERIC_COOKIE, NGM_CONNECT, "connect", &ng_generic_connect_type, NULL }, { NGM_GENERIC_COOKIE, NGM_NAME, "name", &ng_generic_name_type, NULL }, { NGM_GENERIC_COOKIE, NGM_RMHOOK, "rmhook", &ng_generic_rmhook_type, NULL }, { NGM_GENERIC_COOKIE, NGM_NODEINFO, "nodeinfo", NULL, &ng_generic_nodeinfo_type }, { NGM_GENERIC_COOKIE, NGM_LISTHOOKS, "listhooks", NULL, &ng_generic_hooklist_type }, { NGM_GENERIC_COOKIE, NGM_LISTNAMES, "listnames", NULL, &ng_generic_listnodes_type /* same as NGM_LISTNODES */ }, { NGM_GENERIC_COOKIE, NGM_LISTNODES, "listnodes", NULL, &ng_generic_listnodes_type }, { NGM_GENERIC_COOKIE, NGM_LISTTYPES, "listtypes", NULL, &ng_generic_typelist_type }, { NGM_GENERIC_COOKIE, NGM_TEXT_CONFIG, "textconfig", NULL, &ng_parse_string_type }, { NGM_GENERIC_COOKIE, NGM_TEXT_STATUS, "textstatus", NULL, &ng_parse_string_type }, { NGM_GENERIC_COOKIE, NGM_ASCII2BINARY, "ascii2binary", &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, { NGM_GENERIC_COOKIE, NGM_BINARY2ASCII, "binary2ascii", &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, { 0 } }; /************************************************************************ Node routines ************************************************************************/ /* * Instantiate a node of the requested type */ int ng_make_node(const char *typename, node_p *nodepp) { struct ng_type *type; int error; /* Check that the type makes sense */ if (typename == NULL) { TRAP_ERROR(); return (EINVAL); } /* Locate the node type. If we fail we return. Do not try to load * module. */ if ((type = ng_findtype(typename)) == NULL) return (ENXIO); /* * If we have a constructor, then make the node and * call the constructor to do type specific initialisation. */ if (type->constructor != NULL) { if ((error = ng_make_node_common(type, nodepp)) == 0) { if ((error = ((*type->constructor)(*nodepp))) != 0) { NG_NODE_UNREF(*nodepp); } } } else { /* * Node has no constructor. We cannot ask for one * to be made. It must be brought into existence by * some external agency. The external agency should * call ng_make_node_common() directly to get the * netgraph part initialised. */ TRAP_ERROR(); error = EINVAL; } return (error); } /* * Generic node creation. Called by node initialisation for externally * instantiated nodes (e.g. hardware, sockets, etc ). * The returned node has a reference count of 1. */ int ng_make_node_common(struct ng_type *type, node_p *nodepp) { node_p node; /* Require the node type to have been already installed */ if (ng_findtype(type->name) == NULL) { TRAP_ERROR(); return (EINVAL); } /* Make a node and try attach it to the type */ NG_ALLOC_NODE(node); if (node == NULL) { TRAP_ERROR(); return (ENOMEM); } node->nd_type = type; #ifdef VIMAGE node->nd_vnet = curvnet; #endif NG_NODE_REF(node); /* note reference */ type->refs++; NG_QUEUE_LOCK_INIT(&node->nd_input_queue); STAILQ_INIT(&node->nd_input_queue.queue); node->nd_input_queue.q_flags = 0; /* Initialize hook list for new node */ LIST_INIT(&node->nd_hooks); /* Get an ID and put us in the hash chain. */ IDHASH_WLOCK(); for (;;) { /* wrap protection, even if silly */ node_p node2 = NULL; node->nd_ID = V_nextID++; /* 137/sec for 1 year before wrap */ /* Is there a problem with the new number? */ NG_IDHASH_FIND(node->nd_ID, node2); /* already taken? */ if ((node->nd_ID != 0) && (node2 == NULL)) { break; } } V_ng_nodes++; if (V_ng_nodes * 2 > V_ng_ID_hmask) ng_ID_rehash(); LIST_INSERT_HEAD(&V_ng_ID_hash[NG_IDHASH_FN(node->nd_ID)], node, nd_idnodes); IDHASH_WUNLOCK(); /* Done */ *nodepp = node; return (0); } /* * Forceably start the shutdown process on a node. Either call * its shutdown method, or do the default shutdown if there is * no type-specific method. * * We can only be called from a shutdown message, so we know we have * a writer lock, and therefore exclusive access. It also means * that we should not be on the work queue, but we check anyhow. * * Persistent node types must have a type-specific method which * allocates a new node in which case, this one is irretrievably going away, * or cleans up anything it needs, and just makes the node valid again, * in which case we allow the node to survive. * * XXX We need to think of how to tell a persistent node that we * REALLY need to go away because the hardware has gone or we * are rebooting.... etc. */ void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3) { hook_p hook; /* Check if it's already shutting down */ if ((node->nd_flags & NGF_CLOSING) != 0) return; if (node == &ng_deadnode) { printf ("shutdown called on deadnode\n"); return; } /* Add an extra reference so it doesn't go away during this */ NG_NODE_REF(node); /* * Mark it invalid so any newcomers know not to try use it * Also add our own mark so we can't recurse * note that NGF_INVALID does not do this as it's also set during * creation */ node->nd_flags |= NGF_INVALID|NGF_CLOSING; /* If node has its pre-shutdown method, then call it first*/ if (node->nd_type && node->nd_type->close) (*node->nd_type->close)(node); /* Notify all remaining connected nodes to disconnect */ while ((hook = LIST_FIRST(&node->nd_hooks)) != NULL) ng_destroy_hook(hook); /* * Drain the input queue forceably. * it has no hooks so what's it going to do, bleed on someone? * Theoretically we came here from a queue entry that was added * Just before the queue was closed, so it should be empty anyway. * Also removes us from worklist if needed. */ ng_flush_input_queue(node); /* Ask the type if it has anything to do in this case */ if (node->nd_type && node->nd_type->shutdown) { (*node->nd_type->shutdown)(node); if (NG_NODE_IS_VALID(node)) { /* * Well, blow me down if the node code hasn't declared * that it doesn't want to die. * Presumably it is a persistant node. * If we REALLY want it to go away, * e.g. hardware going away, * Our caller should set NGF_REALLY_DIE in nd_flags. */ node->nd_flags &= ~(NGF_INVALID|NGF_CLOSING); NG_NODE_UNREF(node); /* Assume they still have theirs */ return; } } else { /* do the default thing */ NG_NODE_UNREF(node); } ng_unname(node); /* basically a NOP these days */ /* * Remove extra reference, possibly the last * Possible other holders of references may include * timeout callouts, but theoretically the node's supposed to * have cancelled them. Possibly hardware dependencies may * force a driver to 'linger' with a reference. */ NG_NODE_UNREF(node); } /* * Remove a reference to the node, possibly the last. * deadnode always acts as it it were the last. */ void ng_unref_node(node_p node) { if (node == &ng_deadnode) return; CURVNET_SET(node->nd_vnet); if (refcount_release(&node->nd_refs)) { /* we were the last */ node->nd_type->refs--; /* XXX maybe should get types lock? */ NAMEHASH_WLOCK(); if (NG_NODE_HAS_NAME(node)) { V_ng_named_nodes--; LIST_REMOVE(node, nd_nodes); } NAMEHASH_WUNLOCK(); IDHASH_WLOCK(); V_ng_nodes--; LIST_REMOVE(node, nd_idnodes); IDHASH_WUNLOCK(); mtx_destroy(&node->nd_input_queue.q_mtx); NG_FREE_NODE(node); } CURVNET_RESTORE(); } /************************************************************************ Node ID handling ************************************************************************/ static node_p ng_ID2noderef(ng_ID_t ID) { node_p node; IDHASH_RLOCK(); NG_IDHASH_FIND(ID, node); if (node) NG_NODE_REF(node); IDHASH_RUNLOCK(); return(node); } ng_ID_t ng_node2ID(node_p node) { return (node ? NG_NODE_ID(node) : 0); } /************************************************************************ Node name handling ************************************************************************/ /* * Assign a node a name. */ int ng_name_node(node_p node, const char *name) { uint32_t hash; node_p node2; int i; /* Check the name is valid */ for (i = 0; i < NG_NODESIZ; i++) { if (name[i] == '\0' || name[i] == '.' || name[i] == ':') break; } if (i == 0 || name[i] != '\0') { TRAP_ERROR(); return (EINVAL); } if (ng_decodeidname(name) != 0) { /* valid IDs not allowed here */ TRAP_ERROR(); return (EINVAL); } NAMEHASH_WLOCK(); if (V_ng_named_nodes * 2 > V_ng_name_hmask) ng_name_rehash(); hash = hash32_str(name, HASHINIT) & V_ng_name_hmask; /* Check the name isn't already being used. */ LIST_FOREACH(node2, &V_ng_name_hash[hash], nd_nodes) if (NG_NODE_IS_VALID(node2) && (strcmp(NG_NODE_NAME(node2), name) == 0)) { NAMEHASH_WUNLOCK(); return (EADDRINUSE); } if (NG_NODE_HAS_NAME(node)) LIST_REMOVE(node, nd_nodes); else V_ng_named_nodes++; /* Copy it. */ strlcpy(NG_NODE_NAME(node), name, NG_NODESIZ); /* Update name hash. */ LIST_INSERT_HEAD(&V_ng_name_hash[hash], node, nd_nodes); NAMEHASH_WUNLOCK(); return (0); } /* * Find a node by absolute name. The name should NOT end with ':' * The name "." means "this node" and "[xxx]" means "the node * with ID (ie, at address) xxx". * * Returns the node if found, else NULL. * Eventually should add something faster than a sequential search. * Note it acquires a reference on the node so you can be sure it's still * there. */ node_p ng_name2noderef(node_p here, const char *name) { node_p node; ng_ID_t temp; int hash; /* "." means "this node" */ if (strcmp(name, ".") == 0) { NG_NODE_REF(here); return(here); } /* Check for name-by-ID */ if ((temp = ng_decodeidname(name)) != 0) { return (ng_ID2noderef(temp)); } /* Find node by name. */ hash = hash32_str(name, HASHINIT) & V_ng_name_hmask; NAMEHASH_RLOCK(); LIST_FOREACH(node, &V_ng_name_hash[hash], nd_nodes) if (NG_NODE_IS_VALID(node) && (strcmp(NG_NODE_NAME(node), name) == 0)) { NG_NODE_REF(node); break; } NAMEHASH_RUNLOCK(); return (node); } /* * Decode an ID name, eg. "[f03034de]". Returns 0 if the * string is not valid, otherwise returns the value. */ static ng_ID_t ng_decodeidname(const char *name) { const int len = strlen(name); char *eptr; u_long val; /* Check for proper length, brackets, no leading junk */ if ((len < 3) || (name[0] != '[') || (name[len - 1] != ']') || (!isxdigit(name[1]))) return ((ng_ID_t)0); /* Decode number */ val = strtoul(name + 1, &eptr, 16); if ((eptr - name != len - 1) || (val == ULONG_MAX) || (val == 0)) return ((ng_ID_t)0); return ((ng_ID_t)val); } /* * Remove a name from a node. This should only be called * when shutting down and removing the node. */ void ng_unname(node_p node) { } /* * Allocate a bigger name hash. */ static void ng_name_rehash() { struct nodehash *new; uint32_t hash; u_long hmask; node_p node, node2; int i; new = hashinit_flags((V_ng_name_hmask + 1) * 2, M_NETGRAPH_NODE, &hmask, HASH_NOWAIT); if (new == NULL) return; for (i = 0; i <= V_ng_name_hmask; i++) LIST_FOREACH_SAFE(node, &V_ng_name_hash[i], nd_nodes, node2) { #ifdef INVARIANTS LIST_REMOVE(node, nd_nodes); #endif hash = hash32_str(NG_NODE_NAME(node), HASHINIT) & hmask; LIST_INSERT_HEAD(&new[hash], node, nd_nodes); } hashdestroy(V_ng_name_hash, M_NETGRAPH_NODE, V_ng_name_hmask); V_ng_name_hash = new; V_ng_name_hmask = hmask; } /* * Allocate a bigger ID hash. */ static void ng_ID_rehash() { struct nodehash *new; uint32_t hash; u_long hmask; node_p node, node2; int i; new = hashinit_flags((V_ng_ID_hmask + 1) * 2, M_NETGRAPH_NODE, &hmask, HASH_NOWAIT); if (new == NULL) return; for (i = 0; i <= V_ng_ID_hmask; i++) LIST_FOREACH_SAFE(node, &V_ng_ID_hash[i], nd_idnodes, node2) { #ifdef INVARIANTS LIST_REMOVE(node, nd_idnodes); #endif hash = (node->nd_ID % (hmask + 1)); LIST_INSERT_HEAD(&new[hash], node, nd_idnodes); } hashdestroy(V_ng_ID_hash, M_NETGRAPH_NODE, V_ng_name_hmask); V_ng_ID_hash = new; V_ng_ID_hmask = hmask; } /************************************************************************ Hook routines Names are not optional. Hooks are always connected, except for a brief moment within these routines. On invalidation or during creation they are connected to the 'dead' hook. ************************************************************************/ /* * Remove a hook reference */ void ng_unref_hook(hook_p hook) { if (hook == &ng_deadhook) return; if (refcount_release(&hook->hk_refs)) { /* we were the last */ if (_NG_HOOK_NODE(hook)) /* it'll probably be ng_deadnode */ _NG_NODE_UNREF((_NG_HOOK_NODE(hook))); NG_FREE_HOOK(hook); } } /* * Add an unconnected hook to a node. Only used internally. * Assumes node is locked. (XXX not yet true ) */ static int ng_add_hook(node_p node, const char *name, hook_p *hookp) { hook_p hook; int error = 0; /* Check that the given name is good */ if (name == NULL) { TRAP_ERROR(); return (EINVAL); } if (ng_findhook(node, name) != NULL) { TRAP_ERROR(); return (EEXIST); } /* Allocate the hook and link it up */ NG_ALLOC_HOOK(hook); if (hook == NULL) { TRAP_ERROR(); return (ENOMEM); } hook->hk_refs = 1; /* add a reference for us to return */ hook->hk_flags = HK_INVALID; hook->hk_peer = &ng_deadhook; /* start off this way */ hook->hk_node = node; NG_NODE_REF(node); /* each hook counts as a reference */ /* Set hook name */ strlcpy(NG_HOOK_NAME(hook), name, NG_HOOKSIZ); /* * Check if the node type code has something to say about it * If it fails, the unref of the hook will also unref the node. */ if (node->nd_type->newhook != NULL) { if ((error = (*node->nd_type->newhook)(node, hook, name))) { NG_HOOK_UNREF(hook); /* this frees the hook */ return (error); } } /* * The 'type' agrees so far, so go ahead and link it in. * We'll ask again later when we actually connect the hooks. */ LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks); node->nd_numhooks++; NG_HOOK_REF(hook); /* one for the node */ if (hookp) *hookp = hook; return (0); } /* * Find a hook * * Node types may supply their own optimized routines for finding * hooks. If none is supplied, we just do a linear search. * XXX Possibly we should add a reference to the hook? */ hook_p ng_findhook(node_p node, const char *name) { hook_p hook; if (node->nd_type->findhook != NULL) return (*node->nd_type->findhook)(node, name); LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) { if (NG_HOOK_IS_VALID(hook) && (strcmp(NG_HOOK_NAME(hook), name) == 0)) return (hook); } return (NULL); } /* * Destroy a hook * * As hooks are always attached, this really destroys two hooks. * The one given, and the one attached to it. Disconnect the hooks * from each other first. We reconnect the peer hook to the 'dead' * hook so that it can still exist after we depart. We then * send the peer its own destroy message. This ensures that we only * interact with the peer's structures when it is locked processing that * message. We hold a reference to the peer hook so we are guaranteed that * the peer hook and node are still going to exist until * we are finished there as the hook holds a ref on the node. * We run this same code again on the peer hook, but that time it is already * attached to the 'dead' hook. * * This routine is called at all stages of hook creation * on error detection and must be able to handle any such stage. */ void ng_destroy_hook(hook_p hook) { hook_p peer; node_p node; if (hook == &ng_deadhook) { /* better safe than sorry */ printf("ng_destroy_hook called on deadhook\n"); return; } /* * Protect divorce process with mutex, to avoid races on * simultaneous disconnect. */ TOPOLOGY_WLOCK(); hook->hk_flags |= HK_INVALID; peer = NG_HOOK_PEER(hook); node = NG_HOOK_NODE(hook); if (peer && (peer != &ng_deadhook)) { /* * Set the peer to point to ng_deadhook * from this moment on we are effectively independent it. * send it an rmhook message of it's own. */ peer->hk_peer = &ng_deadhook; /* They no longer know us */ hook->hk_peer = &ng_deadhook; /* Nor us, them */ if (NG_HOOK_NODE(peer) == &ng_deadnode) { /* * If it's already divorced from a node, * just free it. */ TOPOLOGY_WUNLOCK(); } else { TOPOLOGY_WUNLOCK(); ng_rmhook_self(peer); /* Send it a surprise */ } NG_HOOK_UNREF(peer); /* account for peer link */ NG_HOOK_UNREF(hook); /* account for peer link */ } else TOPOLOGY_WUNLOCK(); TOPOLOGY_NOTOWNED(); /* * Remove the hook from the node's list to avoid possible recursion * in case the disconnection results in node shutdown. */ if (node == &ng_deadnode) { /* happens if called from ng_con_nodes() */ return; } LIST_REMOVE(hook, hk_hooks); node->nd_numhooks--; if (node->nd_type->disconnect) { /* * The type handler may elect to destroy the node so don't * trust its existence after this point. (except * that we still hold a reference on it. (which we * inherrited from the hook we are destroying) */ (*node->nd_type->disconnect) (hook); } /* * Note that because we will point to ng_deadnode, the original node * is not decremented automatically so we do that manually. */ _NG_HOOK_NODE(hook) = &ng_deadnode; NG_NODE_UNREF(node); /* We no longer point to it so adjust count */ NG_HOOK_UNREF(hook); /* Account for linkage (in list) to node */ } /* * Take two hooks on a node and merge the connection so that the given node * is effectively bypassed. */ int ng_bypass(hook_p hook1, hook_p hook2) { if (hook1->hk_node != hook2->hk_node) { TRAP_ERROR(); return (EINVAL); } TOPOLOGY_WLOCK(); if (NG_HOOK_NOT_VALID(hook1) || NG_HOOK_NOT_VALID(hook2)) { TOPOLOGY_WUNLOCK(); return (EINVAL); } hook1->hk_peer->hk_peer = hook2->hk_peer; hook2->hk_peer->hk_peer = hook1->hk_peer; hook1->hk_peer = &ng_deadhook; hook2->hk_peer = &ng_deadhook; TOPOLOGY_WUNLOCK(); NG_HOOK_UNREF(hook1); NG_HOOK_UNREF(hook2); /* XXX If we ever cache methods on hooks update them as well */ ng_destroy_hook(hook1); ng_destroy_hook(hook2); return (0); } /* * Install a new netgraph type */ int ng_newtype(struct ng_type *tp) { const size_t namelen = strlen(tp->name); /* Check version and type name fields */ if ((tp->version != NG_ABI_VERSION) || (namelen == 0) || (namelen >= NG_TYPESIZ)) { TRAP_ERROR(); if (tp->version != NG_ABI_VERSION) { printf("Netgraph: Node type rejected. ABI mismatch. " "Suggest recompile\n"); } return (EINVAL); } /* Check for name collision */ if (ng_findtype(tp->name) != NULL) { TRAP_ERROR(); return (EEXIST); } /* Link in new type */ TYPELIST_WLOCK(); LIST_INSERT_HEAD(&ng_typelist, tp, types); tp->refs = 1; /* first ref is linked list */ TYPELIST_WUNLOCK(); return (0); } /* * unlink a netgraph type * If no examples exist */ int ng_rmtype(struct ng_type *tp) { /* Check for name collision */ if (tp->refs != 1) { TRAP_ERROR(); return (EBUSY); } /* Unlink type */ TYPELIST_WLOCK(); LIST_REMOVE(tp, types); TYPELIST_WUNLOCK(); return (0); } /* * Look for a type of the name given */ struct ng_type * ng_findtype(const char *typename) { struct ng_type *type; TYPELIST_RLOCK(); LIST_FOREACH(type, &ng_typelist, types) { if (strcmp(type->name, typename) == 0) break; } TYPELIST_RUNLOCK(); return (type); } /************************************************************************ Composite routines ************************************************************************/ /* * Connect two nodes using the specified hooks, using queued functions. */ static int ng_con_part3(node_p node, item_p item, hook_p hook) { int error = 0; /* * When we run, we know that the node 'node' is locked for us. * Our caller has a reference on the hook. * Our caller has a reference on the node. * (In this case our caller is ng_apply_item() ). * The peer hook has a reference on the hook. * We are all set up except for the final call to the node, and * the clearing of the INVALID flag. */ if (NG_HOOK_NODE(hook) == &ng_deadnode) { /* * The node must have been freed again since we last visited * here. ng_destry_hook() has this effect but nothing else does. * We should just release our references and * free anything we can think of. * Since we know it's been destroyed, and it's our caller * that holds the references, just return. */ ERROUT(ENOENT); } if (hook->hk_node->nd_type->connect) { if ((error = (*hook->hk_node->nd_type->connect) (hook))) { ng_destroy_hook(hook); /* also zaps peer */ printf("failed in ng_con_part3()\n"); ERROUT(error); } } /* * XXX this is wrong for SMP. Possibly we need * to separate out 'create' and 'invalid' flags. * should only set flags on hooks we have locked under our node. */ hook->hk_flags &= ~HK_INVALID; done: NG_FREE_ITEM(item); return (error); } static int ng_con_part2(node_p node, item_p item, hook_p hook) { hook_p peer; int error = 0; /* * When we run, we know that the node 'node' is locked for us. * Our caller has a reference on the hook. * Our caller has a reference on the node. * (In this case our caller is ng_apply_item() ). * The peer hook has a reference on the hook. * our node pointer points to the 'dead' node. * First check the hook name is unique. * Should not happen because we checked before queueing this. */ if (ng_findhook(node, NG_HOOK_NAME(hook)) != NULL) { TRAP_ERROR(); ng_destroy_hook(hook); /* should destroy peer too */ printf("failed in ng_con_part2()\n"); ERROUT(EEXIST); } /* * Check if the node type code has something to say about it * If it fails, the unref of the hook will also unref the attached node, * however since that node is 'ng_deadnode' this will do nothing. * The peer hook will also be destroyed. */ if (node->nd_type->newhook != NULL) { if ((error = (*node->nd_type->newhook)(node, hook, hook->hk_name))) { ng_destroy_hook(hook); /* should destroy peer too */ printf("failed in ng_con_part2()\n"); ERROUT(error); } } /* * The 'type' agrees so far, so go ahead and link it in. * We'll ask again later when we actually connect the hooks. */ hook->hk_node = node; /* just overwrite ng_deadnode */ NG_NODE_REF(node); /* each hook counts as a reference */ LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks); node->nd_numhooks++; NG_HOOK_REF(hook); /* one for the node */ /* * We now have a symmetrical situation, where both hooks have been * linked to their nodes, the newhook methods have been called * And the references are all correct. The hooks are still marked * as invalid, as we have not called the 'connect' methods * yet. * We can call the local one immediately as we have the * node locked, but we need to queue the remote one. */ if (hook->hk_node->nd_type->connect) { if ((error = (*hook->hk_node->nd_type->connect) (hook))) { ng_destroy_hook(hook); /* also zaps peer */ printf("failed in ng_con_part2(A)\n"); ERROUT(error); } } /* * Acquire topo mutex to avoid race with ng_destroy_hook(). */ TOPOLOGY_RLOCK(); peer = hook->hk_peer; if (peer == &ng_deadhook) { TOPOLOGY_RUNLOCK(); printf("failed in ng_con_part2(B)\n"); ng_destroy_hook(hook); ERROUT(ENOENT); } TOPOLOGY_RUNLOCK(); if ((error = ng_send_fn2(peer->hk_node, peer, item, &ng_con_part3, NULL, 0, NG_REUSE_ITEM))) { printf("failed in ng_con_part2(C)\n"); ng_destroy_hook(hook); /* also zaps peer */ return (error); /* item was consumed. */ } hook->hk_flags &= ~HK_INVALID; /* need both to be able to work */ return (0); /* item was consumed. */ done: NG_FREE_ITEM(item); return (error); } /* * Connect this node with another node. We assume that this node is * currently locked, as we are only called from an NGM_CONNECT message. */ static int ng_con_nodes(item_p item, node_p node, const char *name, node_p node2, const char *name2) { int error; hook_p hook; hook_p hook2; if (ng_findhook(node2, name2) != NULL) { return(EEXIST); } if ((error = ng_add_hook(node, name, &hook))) /* gives us a ref */ return (error); /* Allocate the other hook and link it up */ NG_ALLOC_HOOK(hook2); if (hook2 == NULL) { TRAP_ERROR(); ng_destroy_hook(hook); /* XXX check ref counts so far */ NG_HOOK_UNREF(hook); /* including our ref */ return (ENOMEM); } hook2->hk_refs = 1; /* start with a reference for us. */ hook2->hk_flags = HK_INVALID; hook2->hk_peer = hook; /* Link the two together */ hook->hk_peer = hook2; NG_HOOK_REF(hook); /* Add a ref for the peer to each*/ NG_HOOK_REF(hook2); hook2->hk_node = &ng_deadnode; strlcpy(NG_HOOK_NAME(hook2), name2, NG_HOOKSIZ); /* * Queue the function above. * Procesing continues in that function in the lock context of * the other node. */ if ((error = ng_send_fn2(node2, hook2, item, &ng_con_part2, NULL, 0, NG_NOFLAGS))) { printf("failed in ng_con_nodes(): %d\n", error); ng_destroy_hook(hook); /* also zaps peer */ } NG_HOOK_UNREF(hook); /* Let each hook go if it wants to */ NG_HOOK_UNREF(hook2); return (error); } /* * Make a peer and connect. * We assume that the local node is locked. * The new node probably doesn't need a lock until * it has a hook, because it cannot really have any work until then, * but we should think about it a bit more. * * The problem may come if the other node also fires up * some hardware or a timer or some other source of activation, * also it may already get a command msg via it's ID. * * We could use the same method as ng_con_nodes() but we'd have * to add ability to remove the node when failing. (Not hard, just * make arg1 point to the node to remove). * Unless of course we just ignore failure to connect and leave * an unconnected node? */ static int ng_mkpeer(node_p node, const char *name, const char *name2, char *type) { node_p node2; hook_p hook1, hook2; int error; if ((error = ng_make_node(type, &node2))) { return (error); } if ((error = ng_add_hook(node, name, &hook1))) { /* gives us a ref */ ng_rmnode(node2, NULL, NULL, 0); return (error); } if ((error = ng_add_hook(node2, name2, &hook2))) { ng_rmnode(node2, NULL, NULL, 0); ng_destroy_hook(hook1); NG_HOOK_UNREF(hook1); return (error); } /* * Actually link the two hooks together. */ hook1->hk_peer = hook2; hook2->hk_peer = hook1; /* Each hook is referenced by the other */ NG_HOOK_REF(hook1); NG_HOOK_REF(hook2); /* Give each node the opportunity to veto the pending connection */ if (hook1->hk_node->nd_type->connect) { error = (*hook1->hk_node->nd_type->connect) (hook1); } if ((error == 0) && hook2->hk_node->nd_type->connect) { error = (*hook2->hk_node->nd_type->connect) (hook2); } /* * drop the references we were holding on the two hooks. */ if (error) { ng_destroy_hook(hook2); /* also zaps hook1 */ ng_rmnode(node2, NULL, NULL, 0); } else { /* As a last act, allow the hooks to be used */ hook1->hk_flags &= ~HK_INVALID; hook2->hk_flags &= ~HK_INVALID; } NG_HOOK_UNREF(hook1); NG_HOOK_UNREF(hook2); return (error); } /************************************************************************ Utility routines to send self messages ************************************************************************/ /* Shut this node down as soon as everyone is clear of it */ /* Should add arg "immediately" to jump the queue */ int ng_rmnode_self(node_p node) { int error; if (node == &ng_deadnode) return (0); node->nd_flags |= NGF_INVALID; if (node->nd_flags & NGF_CLOSING) return (0); error = ng_send_fn(node, NULL, &ng_rmnode, NULL, 0); return (error); } static void ng_rmhook_part2(node_p node, hook_p hook, void *arg1, int arg2) { ng_destroy_hook(hook); return ; } int ng_rmhook_self(hook_p hook) { int error; node_p node = NG_HOOK_NODE(hook); if (node == &ng_deadnode) return (0); error = ng_send_fn(node, hook, &ng_rmhook_part2, NULL, 0); return (error); } /*********************************************************************** * Parse and verify a string of the form: * * Such a string can refer to a specific node or a specific hook * on a specific node, depending on how you look at it. In the * latter case, the PATH component must not end in a dot. * * Both and are optional. The is a string * of hook names separated by dots. This breaks out the original * string, setting *nodep to "NODE" (or NULL if none) and *pathp * to "PATH" (or NULL if degenerate). Also, *hookp will point to * the final hook component of , if any, otherwise NULL. * * This returns -1 if the path is malformed. The char ** are optional. ***********************************************************************/ int ng_path_parse(char *addr, char **nodep, char **pathp, char **hookp) { char *node, *path, *hook; int k; /* * Extract absolute NODE, if any */ for (path = addr; *path && *path != ':'; path++); if (*path) { node = addr; /* Here's the NODE */ *path++ = '\0'; /* Here's the PATH */ /* Node name must not be empty */ if (!*node) return -1; /* A name of "." is OK; otherwise '.' not allowed */ if (strcmp(node, ".") != 0) { for (k = 0; node[k]; k++) if (node[k] == '.') return -1; } } else { node = NULL; /* No absolute NODE */ path = addr; /* Here's the PATH */ } /* Snoop for illegal characters in PATH */ for (k = 0; path[k]; k++) if (path[k] == ':') return -1; /* Check for no repeated dots in PATH */ for (k = 0; path[k]; k++) if (path[k] == '.' && path[k + 1] == '.') return -1; /* Remove extra (degenerate) dots from beginning or end of PATH */ if (path[0] == '.') path++; if (*path && path[strlen(path) - 1] == '.') path[strlen(path) - 1] = 0; /* If PATH has a dot, then we're not talking about a hook */ if (*path) { for (hook = path, k = 0; path[k]; k++) if (path[k] == '.') { hook = NULL; break; } } else path = hook = NULL; /* Done */ if (nodep) *nodep = node; if (pathp) *pathp = path; if (hookp) *hookp = hook; return (0); } /* * Given a path, which may be absolute or relative, and a starting node, * return the destination node. */ int ng_path2noderef(node_p here, const char *address, node_p *destp, hook_p *lasthook) { char fullpath[NG_PATHSIZ]; char *nodename, *path; node_p node, oldnode; /* Initialize */ if (destp == NULL) { TRAP_ERROR(); return EINVAL; } *destp = NULL; /* Make a writable copy of address for ng_path_parse() */ strncpy(fullpath, address, sizeof(fullpath) - 1); fullpath[sizeof(fullpath) - 1] = '\0'; /* Parse out node and sequence of hooks */ if (ng_path_parse(fullpath, &nodename, &path, NULL) < 0) { TRAP_ERROR(); return EINVAL; } /* * For an absolute address, jump to the starting node. * Note that this holds a reference on the node for us. * Don't forget to drop the reference if we don't need it. */ if (nodename) { node = ng_name2noderef(here, nodename); if (node == NULL) { TRAP_ERROR(); return (ENOENT); } } else { if (here == NULL) { TRAP_ERROR(); return (EINVAL); } node = here; NG_NODE_REF(node); } if (path == NULL) { if (lasthook != NULL) *lasthook = NULL; *destp = node; return (0); } /* * Now follow the sequence of hooks * * XXXGL: The path may demolish as we go the sequence, but if * we hold the topology mutex at critical places, then, I hope, * we would always have valid pointers in hand, although the * path behind us may no longer exist. */ for (;;) { hook_p hook; char *segment; /* * Break out the next path segment. Replace the dot we just * found with a NUL; "path" points to the next segment (or the * NUL at the end). */ for (segment = path; *path != '\0'; path++) { if (*path == '.') { *path++ = '\0'; break; } } /* We have a segment, so look for a hook by that name */ hook = ng_findhook(node, segment); TOPOLOGY_WLOCK(); /* Can't get there from here... */ if (hook == NULL || NG_HOOK_PEER(hook) == NULL || NG_HOOK_NOT_VALID(hook) || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) { TRAP_ERROR(); NG_NODE_UNREF(node); TOPOLOGY_WUNLOCK(); return (ENOENT); } /* * Hop on over to the next node * XXX * Big race conditions here as hooks and nodes go away * *** Idea.. store an ng_ID_t in each hook and use that * instead of the direct hook in this crawl? */ oldnode = node; if ((node = NG_PEER_NODE(hook))) NG_NODE_REF(node); /* XXX RACE */ NG_NODE_UNREF(oldnode); /* XXX another race */ if (NG_NODE_NOT_VALID(node)) { NG_NODE_UNREF(node); /* XXX more races */ TOPOLOGY_WUNLOCK(); TRAP_ERROR(); return (ENXIO); } if (*path == '\0') { if (lasthook != NULL) { if (hook != NULL) { *lasthook = NG_HOOK_PEER(hook); NG_HOOK_REF(*lasthook); } else *lasthook = NULL; } TOPOLOGY_WUNLOCK(); *destp = node; return (0); } TOPOLOGY_WUNLOCK(); } } /***************************************************************\ * Input queue handling. * All activities are submitted to the node via the input queue * which implements a multiple-reader/single-writer gate. * Items which cannot be handled immediately are queued. * * read-write queue locking inline functions * \***************************************************************/ static __inline void ng_queue_rw(node_p node, item_p item, int rw); static __inline item_p ng_dequeue(node_p node, int *rw); static __inline item_p ng_acquire_read(node_p node, item_p item); static __inline item_p ng_acquire_write(node_p node, item_p item); static __inline void ng_leave_read(node_p node); static __inline void ng_leave_write(node_p node); /* * Definition of the bits fields in the ng_queue flag word. * Defined here rather than in netgraph.h because no-one should fiddle * with them. * * The ordering here may be important! don't shuffle these. */ /*- Safety Barrier--------+ (adjustable to suit taste) (not used yet) | V +-------+-------+-------+-------+-------+-------+-------+-------+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |A|c|t|i|v|e| |R|e|a|d|e|r| |C|o|u|n|t| | | | | | | | | |P|A| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |O|W| +-------+-------+-------+-------+-------+-------+-------+-------+ \___________________________ ____________________________/ | | V | | [active reader count] | | | | Operation Pending -------------------------------+ | | Active Writer ---------------------------------------+ Node queue has such semantics: - All flags modifications are atomic. - Reader count can be incremented only if there is no writer or pending flags. As soon as this can't be done with single operation, it is implemented with spin loop and atomic_cmpset(). - Writer flag can be set only if there is no any bits set. It is implemented with atomic_cmpset(). - Pending flag can be set any time, but to avoid collision on queue processing all queue fields are protected by the mutex. - Queue processing thread reads queue holding the mutex, but releases it while processing. When queue is empty pending flag is removed. */ #define WRITER_ACTIVE 0x00000001 #define OP_PENDING 0x00000002 #define READER_INCREMENT 0x00000004 #define READER_MASK 0xfffffffc /* Not valid if WRITER_ACTIVE is set */ #define SAFETY_BARRIER 0x00100000 /* 128K items queued should be enough */ /* Defines of more elaborate states on the queue */ /* Mask of bits a new read cares about */ #define NGQ_RMASK (WRITER_ACTIVE|OP_PENDING) /* Mask of bits a new write cares about */ #define NGQ_WMASK (NGQ_RMASK|READER_MASK) /* Test to decide if there is something on the queue. */ #define QUEUE_ACTIVE(QP) ((QP)->q_flags & OP_PENDING) /* How to decide what the next queued item is. */ #define HEAD_IS_READER(QP) NGI_QUEUED_READER(STAILQ_FIRST(&(QP)->queue)) #define HEAD_IS_WRITER(QP) NGI_QUEUED_WRITER(STAILQ_FIRST(&(QP)->queue)) /* notused */ /* Read the status to decide if the next item on the queue can now run. */ #define QUEUED_READER_CAN_PROCEED(QP) \ (((QP)->q_flags & (NGQ_RMASK & ~OP_PENDING)) == 0) #define QUEUED_WRITER_CAN_PROCEED(QP) \ (((QP)->q_flags & (NGQ_WMASK & ~OP_PENDING)) == 0) /* Is there a chance of getting ANY work off the queue? */ #define NEXT_QUEUED_ITEM_CAN_PROCEED(QP) \ ((HEAD_IS_READER(QP)) ? QUEUED_READER_CAN_PROCEED(QP) : \ QUEUED_WRITER_CAN_PROCEED(QP)) #define NGQRW_R 0 #define NGQRW_W 1 #define NGQ2_WORKQ 0x00000001 /* * Taking into account the current state of the queue and node, possibly take * the next entry off the queue and return it. Return NULL if there was * nothing we could return, either because there really was nothing there, or * because the node was in a state where it cannot yet process the next item * on the queue. */ static __inline item_p ng_dequeue(node_p node, int *rw) { item_p item; struct ng_queue *ngq = &node->nd_input_queue; /* This MUST be called with the mutex held. */ mtx_assert(&ngq->q_mtx, MA_OWNED); /* If there is nothing queued, then just return. */ if (!QUEUE_ACTIVE(ngq)) { CTR4(KTR_NET, "%20s: node [%x] (%p) queue empty; " "queue flags 0x%lx", __func__, node->nd_ID, node, ngq->q_flags); return (NULL); } /* * From here, we can assume there is a head item. * We need to find out what it is and if it can be dequeued, given * the current state of the node. */ if (HEAD_IS_READER(ngq)) { while (1) { long t = ngq->q_flags; if (t & WRITER_ACTIVE) { /* There is writer, reader can't proceed. */ CTR4(KTR_NET, "%20s: node [%x] (%p) queued " "reader can't proceed; queue flags 0x%lx", __func__, node->nd_ID, node, t); return (NULL); } if (atomic_cmpset_acq_int(&ngq->q_flags, t, t + READER_INCREMENT)) break; cpu_spinwait(); } /* We have got reader lock for the node. */ *rw = NGQRW_R; } else if (atomic_cmpset_acq_int(&ngq->q_flags, OP_PENDING, OP_PENDING + WRITER_ACTIVE)) { /* We have got writer lock for the node. */ *rw = NGQRW_W; } else { /* There is somebody other, writer can't proceed. */ CTR4(KTR_NET, "%20s: node [%x] (%p) queued writer can't " "proceed; queue flags 0x%lx", __func__, node->nd_ID, node, ngq->q_flags); return (NULL); } /* * Now we dequeue the request (whatever it may be) and correct the * pending flags and the next and last pointers. */ item = STAILQ_FIRST(&ngq->queue); STAILQ_REMOVE_HEAD(&ngq->queue, el_next); if (STAILQ_EMPTY(&ngq->queue)) atomic_clear_int(&ngq->q_flags, OP_PENDING); CTR6(KTR_NET, "%20s: node [%x] (%p) returning item %p as %s; queue " "flags 0x%lx", __func__, node->nd_ID, node, item, *rw ? "WRITER" : "READER", ngq->q_flags); return (item); } /* * Queue a packet to be picked up later by someone else. * If the queue could be run now, add node to the queue handler's worklist. */ static __inline void ng_queue_rw(node_p node, item_p item, int rw) { struct ng_queue *ngq = &node->nd_input_queue; if (rw == NGQRW_W) NGI_SET_WRITER(item); else NGI_SET_READER(item); item->depth = 1; NG_QUEUE_LOCK(ngq); /* Set OP_PENDING flag and enqueue the item. */ atomic_set_int(&ngq->q_flags, OP_PENDING); STAILQ_INSERT_TAIL(&ngq->queue, item, el_next); CTR5(KTR_NET, "%20s: node [%x] (%p) queued item %p as %s", __func__, node->nd_ID, node, item, rw ? "WRITER" : "READER" ); /* * We can take the worklist lock with the node locked * BUT NOT THE REVERSE! */ if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_worklist_add(node); NG_QUEUE_UNLOCK(ngq); } /* Acquire reader lock on node. If node is busy, queue the packet. */ static __inline item_p ng_acquire_read(node_p node, item_p item) { KASSERT(node != &ng_deadnode, ("%s: working on deadnode", __func__)); /* Reader needs node without writer and pending items. */ for (;;) { long t = node->nd_input_queue.q_flags; if (t & NGQ_RMASK) break; /* Node is not ready for reader. */ if (atomic_cmpset_acq_int(&node->nd_input_queue.q_flags, t, t + READER_INCREMENT)) { /* Successfully grabbed node */ CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p", __func__, node->nd_ID, node, item); return (item); } cpu_spinwait(); }; /* Queue the request for later. */ ng_queue_rw(node, item, NGQRW_R); return (NULL); } /* Acquire writer lock on node. If node is busy, queue the packet. */ static __inline item_p ng_acquire_write(node_p node, item_p item) { KASSERT(node != &ng_deadnode, ("%s: working on deadnode", __func__)); /* Writer needs completely idle node. */ if (atomic_cmpset_acq_int(&node->nd_input_queue.q_flags, 0, WRITER_ACTIVE)) { /* Successfully grabbed node */ CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p", __func__, node->nd_ID, node, item); return (item); } /* Queue the request for later. */ ng_queue_rw(node, item, NGQRW_W); return (NULL); } #if 0 static __inline item_p ng_upgrade_write(node_p node, item_p item) { struct ng_queue *ngq = &node->nd_input_queue; KASSERT(node != &ng_deadnode, ("%s: working on deadnode", __func__)); NGI_SET_WRITER(item); NG_QUEUE_LOCK(ngq); /* * There will never be no readers as we are there ourselves. * Set the WRITER_ACTIVE flags ASAP to block out fast track readers. * The caller we are running from will call ng_leave_read() * soon, so we must account for that. We must leave again with the * READER lock. If we find other readers, then * queue the request for later. However "later" may be rignt now * if there are no readers. We don't really care if there are queued * items as we will bypass them anyhow. */ atomic_add_int(&ngq->q_flags, WRITER_ACTIVE - READER_INCREMENT); if ((ngq->q_flags & (NGQ_WMASK & ~OP_PENDING)) == WRITER_ACTIVE) { NG_QUEUE_UNLOCK(ngq); /* It's just us, act on the item. */ /* will NOT drop writer lock when done */ ng_apply_item(node, item, 0); /* * Having acted on the item, atomically * downgrade back to READER and finish up. */ atomic_add_int(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE); /* Our caller will call ng_leave_read() */ return; } /* * It's not just us active, so queue us AT THE HEAD. * "Why?" I hear you ask. * Put us at the head of the queue as we've already been * through it once. If there is nothing else waiting, * set the correct flags. */ if (STAILQ_EMPTY(&ngq->queue)) { /* We've gone from, 0 to 1 item in the queue */ atomic_set_int(&ngq->q_flags, OP_PENDING); CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__, node->nd_ID, node); }; STAILQ_INSERT_HEAD(&ngq->queue, item, el_next); CTR4(KTR_NET, "%20s: node [%x] (%p) requeued item %p as WRITER", __func__, node->nd_ID, node, item ); /* Reverse what we did above. That downgrades us back to reader */ atomic_add_int(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE); if (QUEUE_ACTIVE(ngq) && NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_worklist_add(node); NG_QUEUE_UNLOCK(ngq); return; } #endif /* Release reader lock. */ static __inline void ng_leave_read(node_p node) { atomic_subtract_rel_int(&node->nd_input_queue.q_flags, READER_INCREMENT); } /* Release writer lock. */ static __inline void ng_leave_write(node_p node) { atomic_clear_rel_int(&node->nd_input_queue.q_flags, WRITER_ACTIVE); } /* Purge node queue. Called on node shutdown. */ static void ng_flush_input_queue(node_p node) { struct ng_queue *ngq = &node->nd_input_queue; item_p item; NG_QUEUE_LOCK(ngq); while ((item = STAILQ_FIRST(&ngq->queue)) != NULL) { STAILQ_REMOVE_HEAD(&ngq->queue, el_next); if (STAILQ_EMPTY(&ngq->queue)) atomic_clear_int(&ngq->q_flags, OP_PENDING); NG_QUEUE_UNLOCK(ngq); /* If the item is supplying a callback, call it with an error */ if (item->apply != NULL) { if (item->depth == 1) item->apply->error = ENOENT; if (refcount_release(&item->apply->refs)) { (*item->apply->apply)(item->apply->context, item->apply->error); } } NG_FREE_ITEM(item); NG_QUEUE_LOCK(ngq); } NG_QUEUE_UNLOCK(ngq); } /*********************************************************************** * Externally visible method for sending or queueing messages or data. ***********************************************************************/ /* * The module code should have filled out the item correctly by this stage: * Common: * reference to destination node. * Reference to destination rcv hook if relevant. * apply pointer must be or NULL or reference valid struct ng_apply_info. * Data: * pointer to mbuf * Control_Message: * pointer to msg. * ID of original sender node. (return address) * Function: * Function pointer * void * argument * integer argument * * The nodes have several routines and macros to help with this task: */ int ng_snd_item(item_p item, int flags) { hook_p hook; node_p node; int queue, rw; struct ng_queue *ngq; int error = 0; /* We are sending item, so it must be present! */ KASSERT(item != NULL, ("ng_snd_item: item is NULL")); #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif /* Item was sent once more, postpone apply() call. */ if (item->apply) refcount_acquire(&item->apply->refs); node = NGI_NODE(item); /* Node is never optional. */ KASSERT(node != NULL, ("ng_snd_item: node is NULL")); hook = NGI_HOOK(item); /* Valid hook and mbuf are mandatory for data. */ if ((item->el_flags & NGQF_TYPE) == NGQF_DATA) { KASSERT(hook != NULL, ("ng_snd_item: hook for data is NULL")); if (NGI_M(item) == NULL) ERROUT(EINVAL); CHECK_DATA_MBUF(NGI_M(item)); } /* * If the item or the node specifies single threading, force * writer semantics. Similarly, the node may say one hook always * produces writers. These are overrides. */ if (((item->el_flags & NGQF_RW) == NGQF_WRITER) || (node->nd_flags & NGF_FORCE_WRITER) || (hook && (hook->hk_flags & HK_FORCE_WRITER))) { rw = NGQRW_W; } else { rw = NGQRW_R; } /* * If sender or receiver requests queued delivery, or call graph * loops back from outbound to inbound path, or stack usage * level is dangerous - enqueue message. */ if ((flags & NG_QUEUE) || (hook && (hook->hk_flags & HK_QUEUE))) { queue = 1; } else if (hook && (hook->hk_flags & HK_TO_INBOUND) && curthread->td_ng_outbound) { queue = 1; } else { queue = 0; #ifdef GET_STACK_USAGE /* * Most of netgraph nodes have small stack consumption and * for them 25% of free stack space is more than enough. * Nodes/hooks with higher stack usage should be marked as * HI_STACK. For them 50% of stack will be guaranteed then. * XXX: Values 25% and 50% are completely empirical. */ size_t st, su, sl; GET_STACK_USAGE(st, su); sl = st - su; if ((sl * 4 < st) || ((sl * 2 < st) && ((node->nd_flags & NGF_HI_STACK) || (hook && (hook->hk_flags & HK_HI_STACK))))) queue = 1; #endif } if (queue) { /* Put it on the queue for that node*/ ng_queue_rw(node, item, rw); return ((flags & NG_PROGRESS) ? EINPROGRESS : 0); } /* * We already decided how we will be queueud or treated. * Try get the appropriate operating permission. */ if (rw == NGQRW_R) item = ng_acquire_read(node, item); else item = ng_acquire_write(node, item); /* Item was queued while trying to get permission. */ if (item == NULL) return ((flags & NG_PROGRESS) ? EINPROGRESS : 0); NGI_GET_NODE(item, node); /* zaps stored node */ item->depth++; error = ng_apply_item(node, item, rw); /* drops r/w lock when done */ /* If something is waiting on queue and ready, schedule it. */ ngq = &node->nd_input_queue; if (QUEUE_ACTIVE(ngq)) { NG_QUEUE_LOCK(ngq); if (QUEUE_ACTIVE(ngq) && NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_worklist_add(node); NG_QUEUE_UNLOCK(ngq); } /* * Node may go away as soon as we remove the reference. * Whatever we do, DO NOT access the node again! */ NG_NODE_UNREF(node); return (error); done: /* If was not sent, apply callback here. */ if (item->apply != NULL) { if (item->depth == 0 && error != 0) item->apply->error = error; if (refcount_release(&item->apply->refs)) { (*item->apply->apply)(item->apply->context, item->apply->error); } } NG_FREE_ITEM(item); return (error); } /* * We have an item that was possibly queued somewhere. * It should contain all the information needed * to run it on the appropriate node/hook. * If there is apply pointer and we own the last reference, call apply(). */ static int ng_apply_item(node_p node, item_p item, int rw) { hook_p hook; ng_rcvdata_t *rcvdata; ng_rcvmsg_t *rcvmsg; struct ng_apply_info *apply; int error = 0, depth; /* Node and item are never optional. */ KASSERT(node != NULL, ("ng_apply_item: node is NULL")); KASSERT(item != NULL, ("ng_apply_item: item is NULL")); NGI_GET_HOOK(item, hook); /* clears stored hook */ #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif apply = item->apply; depth = item->depth; switch (item->el_flags & NGQF_TYPE) { case NGQF_DATA: /* * Check things are still ok as when we were queued. */ KASSERT(hook != NULL, ("ng_apply_item: hook for data is NULL")); if (NG_HOOK_NOT_VALID(hook) || NG_NODE_NOT_VALID(node)) { error = EIO; NG_FREE_ITEM(item); break; } /* * If no receive method, just silently drop it. * Give preference to the hook over-ride method. */ if ((!(rcvdata = hook->hk_rcvdata)) && (!(rcvdata = NG_HOOK_NODE(hook)->nd_type->rcvdata))) { error = 0; NG_FREE_ITEM(item); break; } error = (*rcvdata)(hook, item); break; case NGQF_MESG: if (hook && NG_HOOK_NOT_VALID(hook)) { /* * The hook has been zapped then we can't use it. * Immediately drop its reference. * The message may not need it. */ NG_HOOK_UNREF(hook); hook = NULL; } /* * Similarly, if the node is a zombie there is * nothing we can do with it, drop everything. */ if (NG_NODE_NOT_VALID(node)) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); break; } /* * Call the appropriate message handler for the object. * It is up to the message handler to free the message. * If it's a generic message, handle it generically, * otherwise call the type's message handler (if it exists). * XXX (race). Remember that a queued message may * reference a node or hook that has just been * invalidated. It will exist as the queue code * is holding a reference, but.. */ if ((NGI_MSG(item)->header.typecookie == NGM_GENERIC_COOKIE) && ((NGI_MSG(item)->header.flags & NGF_RESP) == 0)) { error = ng_generic_msg(node, item, hook); break; } if (((!hook) || (!(rcvmsg = hook->hk_rcvmsg))) && (!(rcvmsg = node->nd_type->rcvmsg))) { TRAP_ERROR(); error = 0; NG_FREE_ITEM(item); break; } error = (*rcvmsg)(node, item, hook); break; case NGQF_FN: case NGQF_FN2: /* * In the case of the shutdown message we allow it to hit * even if the node is invalid. */ if (NG_NODE_NOT_VALID(node) && NGI_FN(item) != &ng_rmnode) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); break; } /* Same is about some internal functions and invalid hook. */ if (hook && NG_HOOK_NOT_VALID(hook) && NGI_FN2(item) != &ng_con_part2 && NGI_FN2(item) != &ng_con_part3 && NGI_FN(item) != &ng_rmhook_part2) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); break; } if ((item->el_flags & NGQF_TYPE) == NGQF_FN) { (*NGI_FN(item))(node, hook, NGI_ARG1(item), NGI_ARG2(item)); NG_FREE_ITEM(item); } else /* it is NGQF_FN2 */ error = (*NGI_FN2(item))(node, item, hook); break; } /* * We held references on some of the resources * that we took from the item. Now that we have * finished doing everything, drop those references. */ if (hook) NG_HOOK_UNREF(hook); if (rw == NGQRW_R) ng_leave_read(node); else ng_leave_write(node); /* Apply callback. */ if (apply != NULL) { if (depth == 1 && error != 0) apply->error = error; if (refcount_release(&apply->refs)) (*apply->apply)(apply->context, apply->error); } return (error); } /*********************************************************************** * Implement the 'generic' control messages ***********************************************************************/ static int ng_generic_msg(node_p here, item_p item, hook_p lasthook) { int error = 0; struct ng_mesg *msg; struct ng_mesg *resp = NULL; NGI_GET_MSG(item, msg); if (msg->header.typecookie != NGM_GENERIC_COOKIE) { TRAP_ERROR(); error = EINVAL; goto out; } switch (msg->header.cmd) { case NGM_SHUTDOWN: ng_rmnode(here, NULL, NULL, 0); break; case NGM_MKPEER: { struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data; if (msg->header.arglen != sizeof(*mkp)) { TRAP_ERROR(); error = EINVAL; break; } mkp->type[sizeof(mkp->type) - 1] = '\0'; mkp->ourhook[sizeof(mkp->ourhook) - 1] = '\0'; mkp->peerhook[sizeof(mkp->peerhook) - 1] = '\0'; error = ng_mkpeer(here, mkp->ourhook, mkp->peerhook, mkp->type); break; } case NGM_CONNECT: { struct ngm_connect *const con = (struct ngm_connect *) msg->data; node_p node2; if (msg->header.arglen != sizeof(*con)) { TRAP_ERROR(); error = EINVAL; break; } con->path[sizeof(con->path) - 1] = '\0'; con->ourhook[sizeof(con->ourhook) - 1] = '\0'; con->peerhook[sizeof(con->peerhook) - 1] = '\0'; /* Don't forget we get a reference.. */ error = ng_path2noderef(here, con->path, &node2, NULL); if (error) break; error = ng_con_nodes(item, here, con->ourhook, node2, con->peerhook); NG_NODE_UNREF(node2); break; } case NGM_NAME: { struct ngm_name *const nam = (struct ngm_name *) msg->data; if (msg->header.arglen != sizeof(*nam)) { TRAP_ERROR(); error = EINVAL; break; } nam->name[sizeof(nam->name) - 1] = '\0'; error = ng_name_node(here, nam->name); break; } case NGM_RMHOOK: { struct ngm_rmhook *const rmh = (struct ngm_rmhook *) msg->data; hook_p hook; if (msg->header.arglen != sizeof(*rmh)) { TRAP_ERROR(); error = EINVAL; break; } rmh->ourhook[sizeof(rmh->ourhook) - 1] = '\0'; if ((hook = ng_findhook(here, rmh->ourhook)) != NULL) ng_destroy_hook(hook); break; } case NGM_NODEINFO: { struct nodeinfo *ni; NG_MKRESPONSE(resp, msg, sizeof(*ni), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } /* Fill in node info */ ni = (struct nodeinfo *) resp->data; if (NG_NODE_HAS_NAME(here)) strcpy(ni->name, NG_NODE_NAME(here)); strcpy(ni->type, here->nd_type->name); ni->id = ng_node2ID(here); ni->hooks = here->nd_numhooks; break; } case NGM_LISTHOOKS: { const int nhooks = here->nd_numhooks; struct hooklist *hl; struct nodeinfo *ni; hook_p hook; /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*hl) + (nhooks * sizeof(struct linkinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } hl = (struct hooklist *) resp->data; ni = &hl->nodeinfo; /* Fill in node info */ if (NG_NODE_HAS_NAME(here)) strcpy(ni->name, NG_NODE_NAME(here)); strcpy(ni->type, here->nd_type->name); ni->id = ng_node2ID(here); /* Cycle through the linked list of hooks */ ni->hooks = 0; LIST_FOREACH(hook, &here->nd_hooks, hk_hooks) { struct linkinfo *const link = &hl->link[ni->hooks]; if (ni->hooks >= nhooks) { log(LOG_ERR, "%s: number of %s changed\n", __func__, "hooks"); break; } if (NG_HOOK_NOT_VALID(hook)) continue; strcpy(link->ourhook, NG_HOOK_NAME(hook)); strcpy(link->peerhook, NG_PEER_HOOK_NAME(hook)); if (NG_PEER_NODE_NAME(hook)[0] != '\0') strcpy(link->nodeinfo.name, NG_PEER_NODE_NAME(hook)); strcpy(link->nodeinfo.type, NG_PEER_NODE(hook)->nd_type->name); link->nodeinfo.id = ng_node2ID(NG_PEER_NODE(hook)); link->nodeinfo.hooks = NG_PEER_NODE(hook)->nd_numhooks; ni->hooks++; } break; } case NGM_LISTNODES: { struct namelist *nl; node_p node; int i; IDHASH_RLOCK(); /* Get response struct. */ NG_MKRESPONSE(resp, msg, sizeof(*nl) + (V_ng_nodes * sizeof(struct nodeinfo)), M_NOWAIT | M_ZERO); if (resp == NULL) { IDHASH_RUNLOCK(); error = ENOMEM; break; } nl = (struct namelist *) resp->data; /* Cycle through the lists of nodes. */ nl->numnames = 0; for (i = 0; i <= V_ng_ID_hmask; i++) { LIST_FOREACH(node, &V_ng_ID_hash[i], nd_idnodes) { struct nodeinfo *const np = &nl->nodeinfo[nl->numnames]; if (NG_NODE_NOT_VALID(node)) continue; if (NG_NODE_HAS_NAME(node)) strcpy(np->name, NG_NODE_NAME(node)); strcpy(np->type, node->nd_type->name); np->id = ng_node2ID(node); np->hooks = node->nd_numhooks; KASSERT(nl->numnames < V_ng_nodes, ("%s: no space", __func__)); nl->numnames++; } } IDHASH_RUNLOCK(); break; } case NGM_LISTNAMES: { struct namelist *nl; node_p node; int i; NAMEHASH_RLOCK(); /* Get response struct. */ NG_MKRESPONSE(resp, msg, sizeof(*nl) + (V_ng_named_nodes * sizeof(struct nodeinfo)), M_NOWAIT); if (resp == NULL) { NAMEHASH_RUNLOCK(); error = ENOMEM; break; } nl = (struct namelist *) resp->data; /* Cycle through the lists of nodes. */ nl->numnames = 0; for (i = 0; i <= V_ng_name_hmask; i++) { LIST_FOREACH(node, &V_ng_name_hash[i], nd_nodes) { struct nodeinfo *const np = &nl->nodeinfo[nl->numnames]; if (NG_NODE_NOT_VALID(node)) continue; strcpy(np->name, NG_NODE_NAME(node)); strcpy(np->type, node->nd_type->name); np->id = ng_node2ID(node); np->hooks = node->nd_numhooks; KASSERT(nl->numnames < V_ng_named_nodes, ("%s: no space", __func__)); nl->numnames++; } } NAMEHASH_RUNLOCK(); break; } case NGM_LISTTYPES: { struct typelist *tl; struct ng_type *type; int num = 0; TYPELIST_RLOCK(); /* Count number of types */ LIST_FOREACH(type, &ng_typelist, types) num++; /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*tl) + (num * sizeof(struct typeinfo)), M_NOWAIT); if (resp == NULL) { TYPELIST_RUNLOCK(); error = ENOMEM; break; } tl = (struct typelist *) resp->data; /* Cycle through the linked list of types */ tl->numtypes = 0; LIST_FOREACH(type, &ng_typelist, types) { struct typeinfo *const tp = &tl->typeinfo[tl->numtypes]; strcpy(tp->type_name, type->name); tp->numnodes = type->refs - 1; /* don't count list */ KASSERT(tl->numtypes < num, ("%s: no space", __func__)); tl->numtypes++; } TYPELIST_RUNLOCK(); break; } case NGM_BINARY2ASCII: { int bufSize = 20 * 1024; /* XXX hard coded constant */ const struct ng_parse_type *argstype; const struct ng_cmdlist *c; struct ng_mesg *binary, *ascii; /* Data area must contain a valid netgraph message */ binary = (struct ng_mesg *)msg->data; if (msg->header.arglen < sizeof(struct ng_mesg) || (msg->header.arglen - sizeof(struct ng_mesg) < binary->header.arglen)) { TRAP_ERROR(); error = EINVAL; break; } /* Get a response message with lots of room */ NG_MKRESPONSE(resp, msg, sizeof(*ascii) + bufSize, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } ascii = (struct ng_mesg *)resp->data; /* Copy binary message header to response message payload */ bcopy(binary, ascii, sizeof(*binary)); /* Find command by matching typecookie and command number */ for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL; c++) { if (binary->header.typecookie == c->cookie && binary->header.cmd == c->cmd) break; } if (c == NULL || c->name == NULL) { for (c = ng_generic_cmds; c->name != NULL; c++) { if (binary->header.typecookie == c->cookie && binary->header.cmd == c->cmd) break; } if (c->name == NULL) { NG_FREE_MSG(resp); error = ENOSYS; break; } } /* Convert command name to ASCII */ snprintf(ascii->header.cmdstr, sizeof(ascii->header.cmdstr), "%s", c->name); /* Convert command arguments to ASCII */ argstype = (binary->header.flags & NGF_RESP) ? c->respType : c->mesgType; if (argstype == NULL) { *ascii->data = '\0'; } else { if ((error = ng_unparse(argstype, (u_char *)binary->data, ascii->data, bufSize)) != 0) { NG_FREE_MSG(resp); break; } } /* Return the result as struct ng_mesg plus ASCII string */ bufSize = strlen(ascii->data) + 1; ascii->header.arglen = bufSize; resp->header.arglen = sizeof(*ascii) + bufSize; break; } case NGM_ASCII2BINARY: { int bufSize = 20 * 1024; /* XXX hard coded constant */ const struct ng_cmdlist *c; const struct ng_parse_type *argstype; struct ng_mesg *ascii, *binary; int off = 0; /* Data area must contain at least a struct ng_mesg + '\0' */ ascii = (struct ng_mesg *)msg->data; if ((msg->header.arglen < sizeof(*ascii) + 1) || (ascii->header.arglen < 1) || (msg->header.arglen < sizeof(*ascii) + ascii->header.arglen)) { TRAP_ERROR(); error = EINVAL; break; } ascii->data[ascii->header.arglen - 1] = '\0'; /* Get a response message with lots of room */ NG_MKRESPONSE(resp, msg, sizeof(*binary) + bufSize, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } binary = (struct ng_mesg *)resp->data; /* Copy ASCII message header to response message payload */ bcopy(ascii, binary, sizeof(*ascii)); /* Find command by matching ASCII command string */ for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL; c++) { if (strcmp(ascii->header.cmdstr, c->name) == 0) break; } if (c == NULL || c->name == NULL) { for (c = ng_generic_cmds; c->name != NULL; c++) { if (strcmp(ascii->header.cmdstr, c->name) == 0) break; } if (c->name == NULL) { NG_FREE_MSG(resp); error = ENOSYS; break; } } /* Convert command name to binary */ binary->header.cmd = c->cmd; binary->header.typecookie = c->cookie; /* Convert command arguments to binary */ argstype = (binary->header.flags & NGF_RESP) ? c->respType : c->mesgType; if (argstype == NULL) { bufSize = 0; } else { if ((error = ng_parse(argstype, ascii->data, &off, (u_char *)binary->data, &bufSize)) != 0) { NG_FREE_MSG(resp); break; } } /* Return the result */ binary->header.arglen = bufSize; resp->header.arglen = sizeof(*binary) + bufSize; break; } case NGM_TEXT_CONFIG: case NGM_TEXT_STATUS: /* * This one is tricky as it passes the command down to the * actual node, even though it is a generic type command. * This means we must assume that the item/msg is already freed * when control passes back to us. */ if (here->nd_type->rcvmsg != NULL) { NGI_MSG(item) = msg; /* put it back as we found it */ return((*here->nd_type->rcvmsg)(here, item, lasthook)); } /* Fall through if rcvmsg not supported */ default: TRAP_ERROR(); error = EINVAL; } /* * Sometimes a generic message may be statically allocated * to avoid problems with allocating when in tight memory situations. * Don't free it if it is so. * I break them appart here, because erros may cause a free if the item * in which case we'd be doing it twice. * they are kept together above, to simplify freeing. */ out: NG_RESPOND_MSG(error, here, item, resp); NG_FREE_MSG(msg); return (error); } /************************************************************************ Queue element get/free routines ************************************************************************/ uma_zone_t ng_qzone; uma_zone_t ng_qdzone; static int numthreads = 0; /* number of queue threads */ static int maxalloc = 4096;/* limit the damage of a leak */ static int maxdata = 512; /* limit the damage of a DoS */ TUNABLE_INT("net.graph.threads", &numthreads); SYSCTL_INT(_net_graph, OID_AUTO, threads, CTLFLAG_RDTUN, &numthreads, 0, "Number of queue processing threads"); TUNABLE_INT("net.graph.maxalloc", &maxalloc); SYSCTL_INT(_net_graph, OID_AUTO, maxalloc, CTLFLAG_RDTUN, &maxalloc, 0, "Maximum number of non-data queue items to allocate"); TUNABLE_INT("net.graph.maxdata", &maxdata); SYSCTL_INT(_net_graph, OID_AUTO, maxdata, CTLFLAG_RDTUN, &maxdata, 0, "Maximum number of data queue items to allocate"); #ifdef NETGRAPH_DEBUG static TAILQ_HEAD(, ng_item) ng_itemlist = TAILQ_HEAD_INITIALIZER(ng_itemlist); static int allocated; /* number of items malloc'd */ #endif /* * Get a queue entry. * This is usually called when a packet first enters netgraph. * By definition, this is usually from an interrupt, or from a user. * Users are not so important, but try be quick for the times that it's * an interrupt. */ static __inline item_p ng_alloc_item(int type, int flags) { item_p item; KASSERT(((type & ~NGQF_TYPE) == 0), ("%s: incorrect item type: %d", __func__, type)); item = uma_zalloc((type == NGQF_DATA) ? ng_qdzone : ng_qzone, ((flags & NG_WAITOK) ? M_WAITOK : M_NOWAIT) | M_ZERO); if (item) { item->el_flags = type; #ifdef NETGRAPH_DEBUG mtx_lock(&ngq_mtx); TAILQ_INSERT_TAIL(&ng_itemlist, item, all); allocated++; mtx_unlock(&ngq_mtx); #endif } return (item); } /* * Release a queue entry */ void ng_free_item(item_p item) { /* * The item may hold resources on it's own. We need to free * these before we can free the item. What they are depends upon * what kind of item it is. it is important that nodes zero * out pointers to resources that they remove from the item * or we release them again here. */ switch (item->el_flags & NGQF_TYPE) { case NGQF_DATA: /* If we have an mbuf still attached.. */ NG_FREE_M(_NGI_M(item)); break; case NGQF_MESG: _NGI_RETADDR(item) = 0; NG_FREE_MSG(_NGI_MSG(item)); break; case NGQF_FN: case NGQF_FN2: /* nothing to free really, */ _NGI_FN(item) = NULL; _NGI_ARG1(item) = NULL; _NGI_ARG2(item) = 0; break; } /* If we still have a node or hook referenced... */ _NGI_CLR_NODE(item); _NGI_CLR_HOOK(item); #ifdef NETGRAPH_DEBUG mtx_lock(&ngq_mtx); TAILQ_REMOVE(&ng_itemlist, item, all); allocated--; mtx_unlock(&ngq_mtx); #endif uma_zfree(((item->el_flags & NGQF_TYPE) == NGQF_DATA) ? ng_qdzone : ng_qzone, item); } /* * Change type of the queue entry. * Possibly reallocates it from another UMA zone. */ static __inline item_p ng_realloc_item(item_p pitem, int type, int flags) { item_p item; int from, to; KASSERT((pitem != NULL), ("%s: can't reallocate NULL", __func__)); KASSERT(((type & ~NGQF_TYPE) == 0), ("%s: incorrect item type: %d", __func__, type)); from = ((pitem->el_flags & NGQF_TYPE) == NGQF_DATA); to = (type == NGQF_DATA); if (from != to) { /* If reallocation is required do it and copy item. */ if ((item = ng_alloc_item(type, flags)) == NULL) { ng_free_item(pitem); return (NULL); } *item = *pitem; ng_free_item(pitem); } else item = pitem; item->el_flags = (item->el_flags & ~NGQF_TYPE) | type; return (item); } /************************************************************************ Module routines ************************************************************************/ /* * Handle the loading/unloading of a netgraph node type module */ int ng_mod_event(module_t mod, int event, void *data) { struct ng_type *const type = data; int error = 0; switch (event) { case MOD_LOAD: /* Register new netgraph node type */ if ((error = ng_newtype(type)) != 0) break; /* Call type specific code */ if (type->mod_event != NULL) if ((error = (*type->mod_event)(mod, event, data))) { TYPELIST_WLOCK(); type->refs--; /* undo it */ LIST_REMOVE(type, types); TYPELIST_WUNLOCK(); } break; case MOD_UNLOAD: if (type->refs > 1) { /* make sure no nodes exist! */ error = EBUSY; } else { if (type->refs == 0) /* failed load, nothing to undo */ break; if (type->mod_event != NULL) { /* check with type */ error = (*type->mod_event)(mod, event, data); if (error != 0) /* type refuses.. */ break; } TYPELIST_WLOCK(); LIST_REMOVE(type, types); TYPELIST_WUNLOCK(); } break; default: if (type->mod_event != NULL) error = (*type->mod_event)(mod, event, data); else error = EOPNOTSUPP; /* XXX ? */ break; } return (error); } static void vnet_netgraph_init(const void *unused __unused) { /* We start with small hashes, but they can grow. */ V_ng_ID_hash = hashinit(16, M_NETGRAPH_NODE, &V_ng_ID_hmask); V_ng_name_hash = hashinit(16, M_NETGRAPH_NODE, &V_ng_name_hmask); } VNET_SYSINIT(vnet_netgraph_init, SI_SUB_NETGRAPH, SI_ORDER_FIRST, vnet_netgraph_init, NULL); #ifdef VIMAGE static void vnet_netgraph_uninit(const void *unused __unused) { node_p node = NULL, last_killed = NULL; int i; do { /* Find a node to kill */ IDHASH_RLOCK(); for (i = 0; i <= V_ng_ID_hmask; i++) { LIST_FOREACH(node, &V_ng_ID_hash[i], nd_idnodes) { if (node != &ng_deadnode) { NG_NODE_REF(node); break; } } if (node != NULL) break; } IDHASH_RUNLOCK(); /* Attempt to kill it only if it is a regular node */ if (node != NULL) { if (node == last_killed) { /* This should never happen */ printf("ng node %s needs NGF_REALLY_DIE\n", node->nd_name); if (node->nd_flags & NGF_REALLY_DIE) panic("ng node %s won't die", node->nd_name); node->nd_flags |= NGF_REALLY_DIE; } ng_rmnode(node, NULL, NULL, 0); NG_NODE_UNREF(node); last_killed = node; } } while (node != NULL); hashdestroy(V_ng_name_hash, M_NETGRAPH_NODE, V_ng_name_hmask); hashdestroy(V_ng_ID_hash, M_NETGRAPH_NODE, V_ng_ID_hmask); } VNET_SYSUNINIT(vnet_netgraph_uninit, SI_SUB_NETGRAPH, SI_ORDER_FIRST, vnet_netgraph_uninit, NULL); #endif /* VIMAGE */ /* * Handle loading and unloading for this code. * The only thing we need to link into is the NETISR strucure. */ static int ngb_mod_event(module_t mod, int event, void *data) { struct proc *p; struct thread *td; int i, error = 0; switch (event) { case MOD_LOAD: /* Initialize everything. */ NG_WORKLIST_LOCK_INIT(); rw_init(&ng_typelist_lock, "netgraph types"); rw_init(&ng_idhash_lock, "netgraph idhash"); rw_init(&ng_namehash_lock, "netgraph namehash"); rw_init(&ng_topo_lock, "netgraph topology mutex"); #ifdef NETGRAPH_DEBUG mtx_init(&ng_nodelist_mtx, "netgraph nodelist mutex", NULL, MTX_DEF); mtx_init(&ngq_mtx, "netgraph item list mutex", NULL, MTX_DEF); #endif ng_qzone = uma_zcreate("NetGraph items", sizeof(struct ng_item), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(ng_qzone, maxalloc); ng_qdzone = uma_zcreate("NetGraph data items", sizeof(struct ng_item), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(ng_qdzone, maxdata); /* Autoconfigure number of threads. */ if (numthreads <= 0) numthreads = mp_ncpus; /* Create threads. */ p = NULL; /* start with no process */ for (i = 0; i < numthreads; i++) { if (kproc_kthread_add(ngthread, NULL, &p, &td, RFHIGHPID, 0, "ng_queue", "ng_queue%d", i)) { numthreads = i; break; } } break; case MOD_UNLOAD: /* You can't unload it because an interface may be using it. */ error = EBUSY; break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t netgraph_mod = { "netgraph", ngb_mod_event, (NULL) }; DECLARE_MODULE(netgraph, netgraph_mod, SI_SUB_NETGRAPH, SI_ORDER_FIRST); SYSCTL_NODE(_net, OID_AUTO, graph, CTLFLAG_RW, 0, "netgraph Family"); -SYSCTL_INT(_net_graph, OID_AUTO, abi_version, CTLFLAG_RD, 0, NG_ABI_VERSION,""); -SYSCTL_INT(_net_graph, OID_AUTO, msg_version, CTLFLAG_RD, 0, NG_VERSION, ""); +SYSCTL_INT(_net_graph, OID_AUTO, abi_version, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_ABI_VERSION,""); +SYSCTL_INT(_net_graph, OID_AUTO, msg_version, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_VERSION, ""); #ifdef NETGRAPH_DEBUG void dumphook (hook_p hook, char *file, int line) { printf("hook: name %s, %d refs, Last touched:\n", _NG_HOOK_NAME(hook), hook->hk_refs); printf(" Last active @ %s, line %d\n", hook->lastfile, hook->lastline); if (line) { printf(" problem discovered at file %s, line %d\n", file, line); #ifdef KDB kdb_backtrace(); #endif } } void dumpnode(node_p node, char *file, int line) { printf("node: ID [%x]: type '%s', %d hooks, flags 0x%x, %d refs, %s:\n", _NG_NODE_ID(node), node->nd_type->name, node->nd_numhooks, node->nd_flags, node->nd_refs, node->nd_name); printf(" Last active @ %s, line %d\n", node->lastfile, node->lastline); if (line) { printf(" problem discovered at file %s, line %d\n", file, line); #ifdef KDB kdb_backtrace(); #endif } } void dumpitem(item_p item, char *file, int line) { printf(" ACTIVE item, last used at %s, line %d", item->lastfile, item->lastline); switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: printf(" - [data]\n"); break; case NGQF_MESG: printf(" - retaddr[%d]:\n", _NGI_RETADDR(item)); break; case NGQF_FN: printf(" - fn@%p (%p, %p, %p, %d (%x))\n", _NGI_FN(item), _NGI_NODE(item), _NGI_HOOK(item), item->body.fn.fn_arg1, item->body.fn.fn_arg2, item->body.fn.fn_arg2); break; case NGQF_FN2: printf(" - fn2@%p (%p, %p, %p, %d (%x))\n", _NGI_FN2(item), _NGI_NODE(item), _NGI_HOOK(item), item->body.fn.fn_arg1, item->body.fn.fn_arg2, item->body.fn.fn_arg2); break; } if (line) { printf(" problem discovered at file %s, line %d\n", file, line); if (_NGI_NODE(item)) { printf("node %p ([%x])\n", _NGI_NODE(item), ng_node2ID(_NGI_NODE(item))); } } } static void ng_dumpitems(void) { item_p item; int i = 1; TAILQ_FOREACH(item, &ng_itemlist, all) { printf("[%d] ", i++); dumpitem(item, NULL, 0); } } static void ng_dumpnodes(void) { node_p node; int i = 1; mtx_lock(&ng_nodelist_mtx); SLIST_FOREACH(node, &ng_allnodes, nd_all) { printf("[%d] ", i++); dumpnode(node, NULL, 0); } mtx_unlock(&ng_nodelist_mtx); } static void ng_dumphooks(void) { hook_p hook; int i = 1; mtx_lock(&ng_nodelist_mtx); SLIST_FOREACH(hook, &ng_allhooks, hk_all) { printf("[%d] ", i++); dumphook(hook, NULL, 0); } mtx_unlock(&ng_nodelist_mtx); } static int sysctl_debug_ng_dump_items(SYSCTL_HANDLER_ARGS) { int error; int val; int i; val = allocated; i = 1; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == 42) { ng_dumpitems(); ng_dumpnodes(); ng_dumphooks(); } return (0); } SYSCTL_PROC(_debug, OID_AUTO, ng_dump_items, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(int), sysctl_debug_ng_dump_items, "I", "Number of allocated items"); #endif /* NETGRAPH_DEBUG */ /*********************************************************************** * Worklist routines **********************************************************************/ /* * Pick a node off the list of nodes with work, * try get an item to process off it. Remove the node from the list. */ static void ngthread(void *arg) { for (;;) { node_p node; /* Get node from the worklist. */ NG_WORKLIST_LOCK(); while ((node = STAILQ_FIRST(&ng_worklist)) == NULL) NG_WORKLIST_SLEEP(); STAILQ_REMOVE_HEAD(&ng_worklist, nd_input_queue.q_work); NG_WORKLIST_UNLOCK(); CURVNET_SET(node->nd_vnet); CTR3(KTR_NET, "%20s: node [%x] (%p) taken off worklist", __func__, node->nd_ID, node); /* * We have the node. We also take over the reference * that the list had on it. * Now process as much as you can, until it won't * let you have another item off the queue. * All this time, keep the reference * that lets us be sure that the node still exists. * Let the reference go at the last minute. */ for (;;) { item_p item; int rw; NG_QUEUE_LOCK(&node->nd_input_queue); item = ng_dequeue(node, &rw); if (item == NULL) { node->nd_input_queue.q_flags2 &= ~NGQ2_WORKQ; NG_QUEUE_UNLOCK(&node->nd_input_queue); break; /* go look for another node */ } else { NG_QUEUE_UNLOCK(&node->nd_input_queue); NGI_GET_NODE(item, node); /* zaps stored node */ ng_apply_item(node, item, rw); NG_NODE_UNREF(node); } } NG_NODE_UNREF(node); CURVNET_RESTORE(); } } /* * XXX * It's posible that a debugging NG_NODE_REF may need * to be outside the mutex zone */ static void ng_worklist_add(node_p node) { mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED); if ((node->nd_input_queue.q_flags2 & NGQ2_WORKQ) == 0) { /* * If we are not already on the work queue, * then put us on. */ node->nd_input_queue.q_flags2 |= NGQ2_WORKQ; NG_NODE_REF(node); /* XXX safe in mutex? */ NG_WORKLIST_LOCK(); STAILQ_INSERT_TAIL(&ng_worklist, node, nd_input_queue.q_work); NG_WORKLIST_UNLOCK(); CTR3(KTR_NET, "%20s: node [%x] (%p) put on worklist", __func__, node->nd_ID, node); NG_WORKLIST_WAKEUP(); } else { CTR3(KTR_NET, "%20s: node [%x] (%p) already on worklist", __func__, node->nd_ID, node); } } /*********************************************************************** * Externally useable functions to set up a queue item ready for sending ***********************************************************************/ #ifdef NETGRAPH_DEBUG #define ITEM_DEBUG_CHECKS \ do { \ if (NGI_NODE(item) ) { \ printf("item already has node"); \ kdb_enter(KDB_WHY_NETGRAPH, "has node"); \ NGI_CLR_NODE(item); \ } \ if (NGI_HOOK(item) ) { \ printf("item already has hook"); \ kdb_enter(KDB_WHY_NETGRAPH, "has hook"); \ NGI_CLR_HOOK(item); \ } \ } while (0) #else #define ITEM_DEBUG_CHECKS #endif /* * Put mbuf into the item. * Hook and node references will be removed when the item is dequeued. * (or equivalent) * (XXX) Unsafe because no reference held by peer on remote node. * remote node might go away in this timescale. * We know the hooks can't go away because that would require getting * a writer item on both nodes and we must have at least a reader * here to be able to do this. * Note that the hook loaded is the REMOTE hook. * * This is possibly in the critical path for new data. */ item_p ng_package_data(struct mbuf *m, int flags) { item_p item; if ((item = ng_alloc_item(NGQF_DATA, flags)) == NULL) { NG_FREE_M(m); return (NULL); } ITEM_DEBUG_CHECKS; item->el_flags |= NGQF_READER; NGI_M(item) = m; return (item); } /* * Allocate a queue item and put items into it.. * Evaluate the address as this will be needed to queue it and * to work out what some of the fields should be. * Hook and node references will be removed when the item is dequeued. * (or equivalent) */ item_p ng_package_msg(struct ng_mesg *msg, int flags) { item_p item; if ((item = ng_alloc_item(NGQF_MESG, flags)) == NULL) { NG_FREE_MSG(msg); return (NULL); } ITEM_DEBUG_CHECKS; /* Messages items count as writers unless explicitly exempted. */ if (msg->header.cmd & NGM_READONLY) item->el_flags |= NGQF_READER; else item->el_flags |= NGQF_WRITER; /* * Set the current lasthook into the queue item */ NGI_MSG(item) = msg; NGI_RETADDR(item) = 0; return (item); } #define SET_RETADDR(item, here, retaddr) \ do { /* Data or fn items don't have retaddrs */ \ if ((item->el_flags & NGQF_TYPE) == NGQF_MESG) { \ if (retaddr) { \ NGI_RETADDR(item) = retaddr; \ } else { \ /* \ * The old return address should be ok. \ * If there isn't one, use the address \ * here. \ */ \ if (NGI_RETADDR(item) == 0) { \ NGI_RETADDR(item) \ = ng_node2ID(here); \ } \ } \ } \ } while (0) int ng_address_hook(node_p here, item_p item, hook_p hook, ng_ID_t retaddr) { hook_p peer; node_p peernode; ITEM_DEBUG_CHECKS; /* * Quick sanity check.. * Since a hook holds a reference on it's node, once we know * that the peer is still connected (even if invalid,) we know * that the peer node is present, though maybe invalid. */ TOPOLOGY_RLOCK(); if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) || NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) || NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) { NG_FREE_ITEM(item); TRAP_ERROR(); TOPOLOGY_RUNLOCK(); return (ENETDOWN); } /* * Transfer our interest to the other (peer) end. */ NG_HOOK_REF(peer); NG_NODE_REF(peernode); NGI_SET_HOOK(item, peer); NGI_SET_NODE(item, peernode); SET_RETADDR(item, here, retaddr); TOPOLOGY_RUNLOCK(); return (0); } int ng_address_path(node_p here, item_p item, char *address, ng_ID_t retaddr) { node_p dest = NULL; hook_p hook = NULL; int error; ITEM_DEBUG_CHECKS; /* * Note that ng_path2noderef increments the reference count * on the node for us if it finds one. So we don't have to. */ error = ng_path2noderef(here, address, &dest, &hook); if (error) { NG_FREE_ITEM(item); return (error); } NGI_SET_NODE(item, dest); if (hook) NGI_SET_HOOK(item, hook); SET_RETADDR(item, here, retaddr); return (0); } int ng_address_ID(node_p here, item_p item, ng_ID_t ID, ng_ID_t retaddr) { node_p dest; ITEM_DEBUG_CHECKS; /* * Find the target node. */ dest = ng_ID2noderef(ID); /* GETS REFERENCE! */ if (dest == NULL) { NG_FREE_ITEM(item); TRAP_ERROR(); return(EINVAL); } /* Fill out the contents */ NGI_SET_NODE(item, dest); NGI_CLR_HOOK(item); SET_RETADDR(item, here, retaddr); return (0); } /* * special case to send a message to self (e.g. destroy node) * Possibly indicate an arrival hook too. * Useful for removing that hook :-) */ item_p ng_package_msg_self(node_p here, hook_p hook, struct ng_mesg *msg) { item_p item; /* * Find the target node. * If there is a HOOK argument, then use that in preference * to the address. */ if ((item = ng_alloc_item(NGQF_MESG, NG_NOFLAGS)) == NULL) { NG_FREE_MSG(msg); return (NULL); } /* Fill out the contents */ item->el_flags |= NGQF_WRITER; NG_NODE_REF(here); NGI_SET_NODE(item, here); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_MSG(item) = msg; NGI_RETADDR(item) = ng_node2ID(here); return (item); } /* * Send ng_item_fn function call to the specified node. */ int ng_send_fn(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2) { return ng_send_fn1(node, hook, fn, arg1, arg2, NG_NOFLAGS); } int ng_send_fn1(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2, int flags) { item_p item; if ((item = ng_alloc_item(NGQF_FN, flags)) == NULL) { return (ENOMEM); } item->el_flags |= NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; return(ng_snd_item(item, flags)); } /* * Send ng_item_fn2 function call to the specified node. * * If an optional pitem parameter is supplied, its apply * callback will be copied to the new item. If also NG_REUSE_ITEM * flag is set, no new item will be allocated, but pitem will * be used. */ int ng_send_fn2(node_p node, hook_p hook, item_p pitem, ng_item_fn2 *fn, void *arg1, int arg2, int flags) { item_p item; KASSERT((pitem != NULL || (flags & NG_REUSE_ITEM) == 0), ("%s: NG_REUSE_ITEM but no pitem", __func__)); /* * Allocate a new item if no supplied or * if we can't use supplied one. */ if (pitem == NULL || (flags & NG_REUSE_ITEM) == 0) { if ((item = ng_alloc_item(NGQF_FN2, flags)) == NULL) return (ENOMEM); if (pitem != NULL) item->apply = pitem->apply; } else { if ((item = ng_realloc_item(pitem, NGQF_FN2, flags)) == NULL) return (ENOMEM); } item->el_flags = (item->el_flags & ~NGQF_RW) | NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN2(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; return(ng_snd_item(item, flags)); } /* * Official timeout routines for Netgraph nodes. */ static void ng_callout_trampoline(void *arg) { item_p item = arg; CURVNET_SET(NGI_NODE(item)->nd_vnet); ng_snd_item(item, 0); CURVNET_RESTORE(); } int ng_callout(struct callout *c, node_p node, hook_p hook, int ticks, ng_item_fn *fn, void * arg1, int arg2) { item_p item, oitem; if ((item = ng_alloc_item(NGQF_FN, NG_NOFLAGS)) == NULL) return (ENOMEM); item->el_flags |= NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; oitem = c->c_arg; if (callout_reset(c, ticks, &ng_callout_trampoline, item) == 1 && oitem != NULL) NG_FREE_ITEM(oitem); return (0); } /* A special modified version of untimeout() */ int ng_uncallout(struct callout *c, node_p node) { item_p item; int rval; KASSERT(c != NULL, ("ng_uncallout: NULL callout")); KASSERT(node != NULL, ("ng_uncallout: NULL node")); rval = callout_stop(c); item = c->c_arg; /* Do an extra check */ if ((rval > 0) && (c->c_func == &ng_callout_trampoline) && (NGI_NODE(item) == node)) { /* * We successfully removed it from the queue before it ran * So now we need to unreference everything that was * given extra references. (NG_FREE_ITEM does this). */ NG_FREE_ITEM(item); } c->c_arg = NULL; return (rval); } /* * Set the address, if none given, give the node here. */ void ng_replace_retaddr(node_p here, item_p item, ng_ID_t retaddr) { if (retaddr) { NGI_RETADDR(item) = retaddr; } else { /* * The old return address should be ok. * If there isn't one, use the address here. */ NGI_RETADDR(item) = ng_node2ID(here); } } Index: stable/9/sys/netgraph/ng_socket.c =================================================================== --- stable/9/sys/netgraph/ng_socket.c (revision 273911) +++ stable/9/sys/netgraph/ng_socket.c (revision 273912) @@ -1,1186 +1,1186 @@ /* * ng_socket.c */ /*- * Copyright (c) 1996-1999 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Author: Julian Elischer * * $FreeBSD$ * $Whistle: ng_socket.c,v 1.28 1999/11/01 09:24:52 julian Exp $ */ /* * Netgraph socket nodes * * There are two types of netgraph sockets, control and data. * Control sockets have a netgraph node, but data sockets are * parasitic on control sockets, and have no node of their own. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NG_SEPARATE_MALLOC static MALLOC_DEFINE(M_NETGRAPH_PATH, "netgraph_path", "netgraph path info"); static MALLOC_DEFINE(M_NETGRAPH_SOCK, "netgraph_sock", "netgraph socket info"); #else #define M_NETGRAPH_PATH M_NETGRAPH #define M_NETGRAPH_SOCK M_NETGRAPH #endif /* * It's Ascii-art time! * +-------------+ +-------------+ * |socket (ctl)| |socket (data)| * +-------------+ +-------------+ * ^ ^ * | | * v v * +-----------+ +-----------+ * |pcb (ctl)| |pcb (data)| * +-----------+ +-----------+ * ^ ^ * | | * v v * +--------------------------+ * | Socket type private | * | data | * +--------------------------+ * ^ * | * v * +----------------+ * | struct ng_node | * +----------------+ */ /* Netgraph node methods */ static ng_constructor_t ngs_constructor; static ng_rcvmsg_t ngs_rcvmsg; static ng_shutdown_t ngs_shutdown; static ng_newhook_t ngs_newhook; static ng_connect_t ngs_connect; static ng_findhook_t ngs_findhook; static ng_rcvdata_t ngs_rcvdata; static ng_disconnect_t ngs_disconnect; /* Internal methods */ static int ng_attach_data(struct socket *so); static int ng_attach_cntl(struct socket *so); static int ng_attach_common(struct socket *so, int type); static void ng_detach_common(struct ngpcb *pcbp, int type); static void ng_socket_free_priv(struct ngsock *priv); static int ng_connect_data(struct sockaddr *nam, struct ngpcb *pcbp); static int ng_bind(struct sockaddr *nam, struct ngpcb *pcbp); static int ngs_mod_event(module_t mod, int event, void *data); static void ng_socket_item_applied(void *context, int error); /* Netgraph type descriptor */ static struct ng_type typestruct = { .version = NG_ABI_VERSION, .name = NG_SOCKET_NODE_TYPE, .mod_event = ngs_mod_event, .constructor = ngs_constructor, .rcvmsg = ngs_rcvmsg, .shutdown = ngs_shutdown, .newhook = ngs_newhook, .connect = ngs_connect, .findhook = ngs_findhook, .rcvdata = ngs_rcvdata, .disconnect = ngs_disconnect, }; NETGRAPH_INIT_ORDERED(socket, &typestruct, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); /* Buffer space */ static u_long ngpdg_sendspace = 20 * 1024; /* really max datagram size */ SYSCTL_ULONG(_net_graph, OID_AUTO, maxdgram, CTLFLAG_RW, &ngpdg_sendspace , 0, "Maximum outgoing Netgraph datagram size"); static u_long ngpdg_recvspace = 20 * 1024; SYSCTL_ULONG(_net_graph, OID_AUTO, recvspace, CTLFLAG_RW, &ngpdg_recvspace , 0, "Maximum space for incoming Netgraph datagrams"); /* List of all sockets (for netstat -f netgraph) */ static LIST_HEAD(, ngpcb) ngsocklist; static struct mtx ngsocketlist_mtx; #define sotongpcb(so) ((struct ngpcb *)(so)->so_pcb) /* If getting unexplained errors returned, set this to "kdb_enter("X"); */ #ifndef TRAP_ERROR #define TRAP_ERROR #endif /*************************************************************** Control sockets ***************************************************************/ static int ngc_attach(struct socket *so, int proto, struct thread *td) { struct ngpcb *const pcbp = sotongpcb(so); int error; error = priv_check(td, PRIV_NETGRAPH_CONTROL); if (error) return (error); if (pcbp != NULL) return (EISCONN); return (ng_attach_cntl(so)); } static void ngc_detach(struct socket *so) { struct ngpcb *const pcbp = sotongpcb(so); KASSERT(pcbp != NULL, ("ngc_detach: pcbp == NULL")); ng_detach_common(pcbp, NG_CONTROL); } static int ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct ngpcb *const pcbp = sotongpcb(so); struct ngsock *const priv = NG_NODE_PRIVATE(pcbp->sockdata->node); struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr; struct ng_mesg *msg; struct mbuf *m0; item_p item; char *path = NULL; int len, error = 0; struct ng_apply_info apply; if (control) { error = EINVAL; goto release; } /* Require destination as there may be >= 1 hooks on this node. */ if (addr == NULL) { error = EDESTADDRREQ; goto release; } /* * Allocate an expendable buffer for the path, chop off * the sockaddr header, and make sure it's NUL terminated. */ len = sap->sg_len - 2; path = malloc(len + 1, M_NETGRAPH_PATH, M_WAITOK); bcopy(sap->sg_data, path, len); path[len] = '\0'; /* * Move the actual message out of mbufs into a linear buffer. * Start by adding up the size of the data. (could use mh_len?) */ for (len = 0, m0 = m; m0 != NULL; m0 = m0->m_next) len += m0->m_len; /* * Move the data into a linear buffer as well. * Messages are not delivered in mbufs. */ msg = malloc(len + 1, M_NETGRAPH_MSG, M_WAITOK); m_copydata(m, 0, len, (char *)msg); if (msg->header.version != NG_VERSION) { free(msg, M_NETGRAPH_MSG); error = EINVAL; goto release; } /* * Hack alert! * We look into the message and if it mkpeers a node of unknown type, we * try to load it. We need to do this now, in syscall thread, because if * message gets queued and applied later we will get panic. */ if (msg->header.typecookie == NGM_GENERIC_COOKIE && msg->header.cmd == NGM_MKPEER) { struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data; if (ng_findtype(mkp->type) == NULL) { char filename[NG_TYPESIZ + 3]; int fileid; /* Not found, try to load it as a loadable module. */ snprintf(filename, sizeof(filename), "ng_%s", mkp->type); error = kern_kldload(curthread, filename, &fileid); if (error != 0) { free(msg, M_NETGRAPH_MSG); goto release; } /* See if type has been loaded successfully. */ if (ng_findtype(mkp->type) == NULL) { free(msg, M_NETGRAPH_MSG); (void)kern_kldunload(curthread, fileid, LINKER_UNLOAD_NORMAL); error = ENXIO; goto release; } } } item = ng_package_msg(msg, M_WAITOK); if ((error = ng_address_path((pcbp->sockdata->node), item, path, 0)) != 0) { #ifdef TRACE_MESSAGES printf("ng_address_path: errx=%d\n", error); #endif goto release; } #ifdef TRACE_MESSAGES printf("[%x]:<---------[socket]: c=<%d>cmd=%x(%s) f=%x #%d (%s)\n", item->el_dest->nd_ID, msg->header.typecookie, msg->header.cmd, msg->header.cmdstr, msg->header.flags, msg->header.token, item->el_dest->nd_type->name); #endif SAVE_LINE(item); /* * We do not want to return from syscall until the item * is processed by destination node. We register callback * on the item, which will update priv->error when item * was applied. * If ng_snd_item() has queued item, we sleep until * callback wakes us up. */ bzero(&apply, sizeof(apply)); apply.apply = ng_socket_item_applied; apply.context = priv; item->apply = &apply; priv->error = -1; error = ng_snd_item(item, 0); mtx_lock(&priv->mtx); if (priv->error == -1) msleep(priv, &priv->mtx, 0, "ngsock", 0); mtx_unlock(&priv->mtx); KASSERT(priv->error != -1, ("ng_socket: priv->error wasn't updated")); error = priv->error; release: if (path != NULL) free(path, M_NETGRAPH_PATH); if (control != NULL) m_freem(control); if (m != NULL) m_freem(m); return (error); } static int ngc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct ngpcb *const pcbp = sotongpcb(so); if (pcbp == 0) return (EINVAL); return (ng_bind(nam, pcbp)); } static int ngc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { /* * At this time refuse to do this.. it used to * do something but it was undocumented and not used. */ printf("program tried to connect control socket to remote node\n"); return (EINVAL); } /*************************************************************** Data sockets ***************************************************************/ static int ngd_attach(struct socket *so, int proto, struct thread *td) { struct ngpcb *const pcbp = sotongpcb(so); if (pcbp != NULL) return (EISCONN); return (ng_attach_data(so)); } static void ngd_detach(struct socket *so) { struct ngpcb *const pcbp = sotongpcb(so); KASSERT(pcbp != NULL, ("ngd_detach: pcbp == NULL")); ng_detach_common(pcbp, NG_DATA); } static int ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct ngpcb *const pcbp = sotongpcb(so); struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr; int len, error; hook_p hook = NULL; char hookname[NG_HOOKSIZ]; if ((pcbp == NULL) || (control != NULL)) { error = EINVAL; goto release; } if (pcbp->sockdata == NULL) { error = ENOTCONN; goto release; } if (sap == NULL) len = 0; /* Make compiler happy. */ else len = sap->sg_len - 2; /* * If the user used any of these ways to not specify an address * then handle specially. */ if ((sap == NULL) || (len <= 0) || (*sap->sg_data == '\0')) { if (NG_NODE_NUMHOOKS(pcbp->sockdata->node) != 1) { error = EDESTADDRREQ; goto release; } /* * If exactly one hook exists, just use it. * Special case to allow write(2) to work on an ng_socket. */ hook = LIST_FIRST(&pcbp->sockdata->node->nd_hooks); } else { if (len >= NG_HOOKSIZ) { error = EINVAL; goto release; } /* * chop off the sockaddr header, and make sure it's NUL * terminated */ bcopy(sap->sg_data, hookname, len); hookname[len] = '\0'; /* Find the correct hook from 'hookname' */ hook = ng_findhook(pcbp->sockdata->node, hookname); if (hook == NULL) { error = EHOSTUNREACH; goto release; } } /* Send data. */ NG_SEND_DATA_FLAGS(error, hook, m, NG_WAITOK); release: if (control != NULL) m_freem(control); if (m != NULL) m_freem(m); return (error); } static int ngd_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct ngpcb *const pcbp = sotongpcb(so); if (pcbp == 0) return (EINVAL); return (ng_connect_data(nam, pcbp)); } /* * Used for both data and control sockets */ static int ng_getsockaddr(struct socket *so, struct sockaddr **addr) { struct ngpcb *pcbp; struct sockaddr_ng *sg; int sg_len; int error = 0; pcbp = sotongpcb(so); if ((pcbp == NULL) || (pcbp->sockdata == NULL)) /* XXXGL: can this still happen? */ return (EINVAL); sg_len = sizeof(struct sockaddr_ng) + NG_NODESIZ - sizeof(sg->sg_data); sg = malloc(sg_len, M_SONAME, M_WAITOK | M_ZERO); mtx_lock(&pcbp->sockdata->mtx); if (pcbp->sockdata->node != NULL) { node_p node = pcbp->sockdata->node; if (NG_NODE_HAS_NAME(node)) bcopy(NG_NODE_NAME(node), sg->sg_data, strlen(NG_NODE_NAME(node))); mtx_unlock(&pcbp->sockdata->mtx); sg->sg_len = sg_len; sg->sg_family = AF_NETGRAPH; *addr = (struct sockaddr *)sg; } else { mtx_unlock(&pcbp->sockdata->mtx); free(sg, M_SONAME); error = EINVAL; } return (error); } /* * Attach a socket to it's protocol specific partner. * For a control socket, actually create a netgraph node and attach * to it as well. */ static int ng_attach_cntl(struct socket *so) { struct ngsock *priv; struct ngpcb *pcbp; node_p node; int error; /* Setup protocol control block */ if ((error = ng_attach_common(so, NG_CONTROL)) != 0) return (error); pcbp = sotongpcb(so); /* Make the generic node components */ if ((error = ng_make_node_common(&typestruct, &node)) != 0) { ng_detach_common(pcbp, NG_CONTROL); return (error); } /* * Allocate node private info and hash. We start * with 16 hash entries, however we may grow later * in ngs_newhook(). We can't predict how much hooks * does this node plan to have. */ priv = malloc(sizeof(*priv), M_NETGRAPH_SOCK, M_WAITOK | M_ZERO); priv->hash = hashinit(16, M_NETGRAPH_SOCK, &priv->hmask); /* Initialize mutex. */ mtx_init(&priv->mtx, "ng_socket", NULL, MTX_DEF); /* Link the pcb the private data. */ priv->ctlsock = pcbp; pcbp->sockdata = priv; priv->refs++; priv->node = node; /* Store a hint for netstat(1). */ priv->node_id = priv->node->nd_ID; /* Link the node and the private data. */ NG_NODE_SET_PRIVATE(priv->node, priv); NG_NODE_REF(priv->node); priv->refs++; return (0); } static int ng_attach_data(struct socket *so) { return (ng_attach_common(so, NG_DATA)); } /* * Set up a socket protocol control block. * This code is shared between control and data sockets. */ static int ng_attach_common(struct socket *so, int type) { struct ngpcb *pcbp; int error; /* Standard socket setup stuff. */ error = soreserve(so, ngpdg_sendspace, ngpdg_recvspace); if (error) return (error); /* Allocate the pcb. */ pcbp = malloc(sizeof(struct ngpcb), M_PCB, M_WAITOK | M_ZERO); pcbp->type = type; /* Link the pcb and the socket. */ so->so_pcb = (caddr_t)pcbp; pcbp->ng_socket = so; /* Add the socket to linked list */ mtx_lock(&ngsocketlist_mtx); LIST_INSERT_HEAD(&ngsocklist, pcbp, socks); mtx_unlock(&ngsocketlist_mtx); return (0); } /* * Disassociate the socket from it's protocol specific * partner. If it's attached to a node's private data structure, * then unlink from that too. If we were the last socket attached to it, * then shut down the entire node. Shared code for control and data sockets. */ static void ng_detach_common(struct ngpcb *pcbp, int which) { struct ngsock *priv = pcbp->sockdata; if (priv != NULL) { mtx_lock(&priv->mtx); switch (which) { case NG_CONTROL: priv->ctlsock = NULL; break; case NG_DATA: priv->datasock = NULL; break; default: panic("%s", __func__); } pcbp->sockdata = NULL; ng_socket_free_priv(priv); } pcbp->ng_socket->so_pcb = NULL; mtx_lock(&ngsocketlist_mtx); LIST_REMOVE(pcbp, socks); mtx_unlock(&ngsocketlist_mtx); free(pcbp, M_PCB); } /* * Remove a reference from node private data. */ static void ng_socket_free_priv(struct ngsock *priv) { mtx_assert(&priv->mtx, MA_OWNED); priv->refs--; if (priv->refs == 0) { mtx_destroy(&priv->mtx); hashdestroy(priv->hash, M_NETGRAPH_SOCK, priv->hmask); free(priv, M_NETGRAPH_SOCK); return; } if ((priv->refs == 1) && (priv->node != NULL)) { node_p node = priv->node; priv->node = NULL; mtx_unlock(&priv->mtx); NG_NODE_UNREF(node); ng_rmnode_self(node); } else mtx_unlock(&priv->mtx); } /* * Connect the data socket to a named control socket node. */ static int ng_connect_data(struct sockaddr *nam, struct ngpcb *pcbp) { struct sockaddr_ng *sap; node_p farnode; struct ngsock *priv; int error; item_p item; /* If we are already connected, don't do it again. */ if (pcbp->sockdata != NULL) return (EISCONN); /* * Find the target (victim) and check it doesn't already have * a data socket. Also check it is a 'socket' type node. * Use ng_package_data() and ng_address_path() to do this. */ sap = (struct sockaddr_ng *) nam; /* The item will hold the node reference. */ item = ng_package_data(NULL, NG_WAITOK); if ((error = ng_address_path(NULL, item, sap->sg_data, 0))) return (error); /* item is freed on failure */ /* * Extract node from item and free item. Remember we now have * a reference on the node. The item holds it for us. * when we free the item we release the reference. */ farnode = item->el_dest; /* shortcut */ if (strcmp(farnode->nd_type->name, NG_SOCKET_NODE_TYPE) != 0) { NG_FREE_ITEM(item); /* drop the reference to the node */ return (EINVAL); } priv = NG_NODE_PRIVATE(farnode); if (priv->datasock != NULL) { NG_FREE_ITEM(item); /* drop the reference to the node */ return (EADDRINUSE); } /* * Link the PCB and the private data struct. and note the extra * reference. Drop the extra reference on the node. */ mtx_lock(&priv->mtx); priv->datasock = pcbp; pcbp->sockdata = priv; priv->refs++; mtx_unlock(&priv->mtx); NG_FREE_ITEM(item); /* drop the reference to the node */ return (0); } /* * Binding a socket means giving the corresponding node a name */ static int ng_bind(struct sockaddr *nam, struct ngpcb *pcbp) { struct ngsock *const priv = pcbp->sockdata; struct sockaddr_ng *const sap = (struct sockaddr_ng *) nam; if (priv == NULL) { TRAP_ERROR; return (EINVAL); } if ((sap->sg_len < 4) || (sap->sg_len > (NG_NODESIZ + 2)) || (sap->sg_data[0] == '\0') || (sap->sg_data[sap->sg_len - 3] != '\0')) { TRAP_ERROR; return (EINVAL); } return (ng_name_node(priv->node, sap->sg_data)); } /*************************************************************** Netgraph node ***************************************************************/ /* * You can only create new nodes from the socket end of things. */ static int ngs_constructor(node_p nodep) { return (EINVAL); } static void ngs_rehash(node_p node) { struct ngsock *priv = NG_NODE_PRIVATE(node); struct ngshash *new; struct hookpriv *hp; hook_p hook; uint32_t h; u_long hmask; new = hashinit_flags((priv->hmask + 1) * 2, M_NETGRAPH_SOCK, &hmask, HASH_NOWAIT); if (new == NULL) return; LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) { hp = NG_HOOK_PRIVATE(hook); #ifdef INVARIANTS LIST_REMOVE(hp, next); #endif h = hash32_str(NG_HOOK_NAME(hook), HASHINIT) & hmask; LIST_INSERT_HEAD(&new[h], hp, next); } hashdestroy(priv->hash, M_NETGRAPH_SOCK, priv->hmask); priv->hash = new; priv->hmask = hmask; } /* * We allow any hook to be connected to the node. * There is no per-hook private information though. */ static int ngs_newhook(node_p node, hook_p hook, const char *name) { struct ngsock *const priv = NG_NODE_PRIVATE(node); struct hookpriv *hp; uint32_t h; hp = malloc(sizeof(*hp), M_NETGRAPH_SOCK, M_NOWAIT); if (hp == NULL) return (ENOMEM); if (node->nd_numhooks * 2 > priv->hmask) ngs_rehash(node); hp->hook = hook; h = hash32_str(name, HASHINIT) & priv->hmask; LIST_INSERT_HEAD(&priv->hash[h], hp, next); NG_HOOK_SET_PRIVATE(hook, hp); return (0); } /* * If only one hook, allow read(2) and write(2) to work. */ static int ngs_connect(hook_p hook) { node_p node = NG_HOOK_NODE(hook); struct ngsock *priv = NG_NODE_PRIVATE(node); if ((priv->datasock) && (priv->datasock->ng_socket)) { if (NG_NODE_NUMHOOKS(node) == 1) priv->datasock->ng_socket->so_state |= SS_ISCONNECTED; else priv->datasock->ng_socket->so_state &= ~SS_ISCONNECTED; } return (0); } /* Look up hook by name */ static hook_p ngs_findhook(node_p node, const char *name) { struct ngsock *priv = NG_NODE_PRIVATE(node); struct hookpriv *hp; uint32_t h; /* * Microoptimisation for an ng_socket with * a single hook, which is a common case. */ if (node->nd_numhooks == 1) { hook_p hook; hook = LIST_FIRST(&node->nd_hooks); if (strcmp(NG_HOOK_NAME(hook), name) == 0) return (hook); else return (NULL); } h = hash32_str(name, HASHINIT) & priv->hmask; LIST_FOREACH(hp, &priv->hash[h], next) if (strcmp(NG_HOOK_NAME(hp->hook), name) == 0) return (hp->hook); return (NULL); } /* * Incoming messages get passed up to the control socket. * Unless they are for us specifically (socket_type) */ static int ngs_rcvmsg(node_p node, item_p item, hook_p lasthook) { struct ngsock *const priv = NG_NODE_PRIVATE(node); struct ngpcb *pcbp; struct socket *so; struct sockaddr_ng addr; struct ng_mesg *msg; struct mbuf *m; ng_ID_t retaddr = NGI_RETADDR(item); int addrlen; int error = 0; NGI_GET_MSG(item, msg); NG_FREE_ITEM(item); /* * Grab priv->mtx here to prevent destroying of control socket * after checking that priv->ctlsock is not NULL. */ mtx_lock(&priv->mtx); pcbp = priv->ctlsock; /* * Only allow mesgs to be passed if we have the control socket. * Data sockets can only support the generic messages. */ if (pcbp == NULL) { mtx_unlock(&priv->mtx); TRAP_ERROR; NG_FREE_MSG(msg); return (EINVAL); } so = pcbp->ng_socket; SOCKBUF_LOCK(&so->so_rcv); /* As long as the race is handled, priv->mtx may be unlocked now. */ mtx_unlock(&priv->mtx); #ifdef TRACE_MESSAGES printf("[%x]:---------->[socket]: c=<%d>cmd=%x(%s) f=%x #%d\n", retaddr, msg->header.typecookie, msg->header.cmd, msg->header.cmdstr, msg->header.flags, msg->header.token); #endif if (msg->header.typecookie == NGM_SOCKET_COOKIE) { switch (msg->header.cmd) { case NGM_SOCK_CMD_NOLINGER: priv->flags |= NGS_FLAG_NOLINGER; break; case NGM_SOCK_CMD_LINGER: priv->flags &= ~NGS_FLAG_NOLINGER; break; default: error = EINVAL; /* unknown command */ } SOCKBUF_UNLOCK(&so->so_rcv); /* Free the message and return. */ NG_FREE_MSG(msg); return (error); } /* Get the return address into a sockaddr. */ bzero(&addr, sizeof(addr)); addr.sg_len = sizeof(addr); addr.sg_family = AF_NETGRAPH; addrlen = snprintf((char *)&addr.sg_data, sizeof(addr.sg_data), "[%x]:", retaddr); if (addrlen < 0 || addrlen > sizeof(addr.sg_data)) { SOCKBUF_UNLOCK(&so->so_rcv); printf("%s: snprintf([%x]) failed - %d\n", __func__, retaddr, addrlen); NG_FREE_MSG(msg); return (EINVAL); } /* Copy the message itself into an mbuf chain. */ m = m_devget((caddr_t)msg, sizeof(struct ng_mesg) + msg->header.arglen, 0, NULL, NULL); /* * Here we free the message. We need to do that * regardless of whether we got mbufs. */ NG_FREE_MSG(msg); if (m == NULL) { SOCKBUF_UNLOCK(&so->so_rcv); TRAP_ERROR; return (ENOBUFS); } /* Send it up to the socket. */ if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&addr, m, NULL) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); TRAP_ERROR; m_freem(m); return (ENOBUFS); } sorwakeup_locked(so); return (error); } /* * Receive data on a hook */ static int ngs_rcvdata(hook_p hook, item_p item) { struct ngsock *const priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); struct ngpcb *const pcbp = priv->datasock; struct socket *so; struct sockaddr_ng *addr; char *addrbuf[NG_HOOKSIZ + 4]; int addrlen; struct mbuf *m; NGI_GET_M(item, m); NG_FREE_ITEM(item); /* If there is no data socket, black-hole it. */ if (pcbp == NULL) { NG_FREE_M(m); return (0); } so = pcbp->ng_socket; /* Get the return address into a sockaddr. */ addrlen = strlen(NG_HOOK_NAME(hook)); /* <= NG_HOOKSIZ - 1 */ addr = (struct sockaddr_ng *) addrbuf; addr->sg_len = addrlen + 3; addr->sg_family = AF_NETGRAPH; bcopy(NG_HOOK_NAME(hook), addr->sg_data, addrlen); addr->sg_data[addrlen] = '\0'; /* Try to tell the socket which hook it came in on. */ if (sbappendaddr(&so->so_rcv, (struct sockaddr *)addr, m, NULL) == 0) { m_freem(m); TRAP_ERROR; return (ENOBUFS); } sorwakeup(so); return (0); } /* * Hook disconnection * * For this type, removal of the last link destroys the node * if the NOLINGER flag is set. */ static int ngs_disconnect(hook_p hook) { node_p node = NG_HOOK_NODE(hook); struct ngsock *const priv = NG_NODE_PRIVATE(node); struct hookpriv *hp = NG_HOOK_PRIVATE(hook); LIST_REMOVE(hp, next); free(hp, M_NETGRAPH_SOCK); if ((priv->datasock) && (priv->datasock->ng_socket)) { if (NG_NODE_NUMHOOKS(node) == 1) priv->datasock->ng_socket->so_state |= SS_ISCONNECTED; else priv->datasock->ng_socket->so_state &= ~SS_ISCONNECTED; } if ((priv->flags & NGS_FLAG_NOLINGER) && (NG_NODE_NUMHOOKS(node) == 0) && (NG_NODE_IS_VALID(node))) ng_rmnode_self(node); return (0); } /* * Do local shutdown processing. * In this case, that involves making sure the socket * knows we should be shutting down. */ static int ngs_shutdown(node_p node) { struct ngsock *const priv = NG_NODE_PRIVATE(node); struct ngpcb *dpcbp, *pcbp; mtx_lock(&priv->mtx); dpcbp = priv->datasock; pcbp = priv->ctlsock; if (dpcbp != NULL) soisdisconnected(dpcbp->ng_socket); if (pcbp != NULL) soisdisconnected(pcbp->ng_socket); priv->node = NULL; NG_NODE_SET_PRIVATE(node, NULL); ng_socket_free_priv(priv); NG_NODE_UNREF(node); return (0); } static void ng_socket_item_applied(void *context, int error) { struct ngsock *const priv = (struct ngsock *)context; mtx_lock(&priv->mtx); priv->error = error; wakeup(priv); mtx_unlock(&priv->mtx); } static int dummy_disconnect(struct socket *so) { return (0); } /* * Control and data socket type descriptors * * XXXRW: Perhaps _close should do something? */ static struct pr_usrreqs ngc_usrreqs = { .pru_abort = NULL, .pru_attach = ngc_attach, .pru_bind = ngc_bind, .pru_connect = ngc_connect, .pru_detach = ngc_detach, .pru_disconnect = dummy_disconnect, .pru_peeraddr = NULL, .pru_send = ngc_send, .pru_shutdown = NULL, .pru_sockaddr = ng_getsockaddr, .pru_close = NULL, }; static struct pr_usrreqs ngd_usrreqs = { .pru_abort = NULL, .pru_attach = ngd_attach, .pru_bind = NULL, .pru_connect = ngd_connect, .pru_detach = ngd_detach, .pru_disconnect = dummy_disconnect, .pru_peeraddr = NULL, .pru_send = ngd_send, .pru_shutdown = NULL, .pru_sockaddr = ng_getsockaddr, .pru_close = NULL, }; /* * Definitions of protocols supported in the NETGRAPH domain. */ extern struct domain ngdomain; /* stop compiler warnings */ static struct protosw ngsw[] = { { .pr_type = SOCK_DGRAM, .pr_domain = &ngdomain, .pr_protocol = NG_CONTROL, .pr_flags = PR_ATOMIC | PR_ADDR /* | PR_RIGHTS */, .pr_usrreqs = &ngc_usrreqs }, { .pr_type = SOCK_DGRAM, .pr_domain = &ngdomain, .pr_protocol = NG_DATA, .pr_flags = PR_ATOMIC | PR_ADDR, .pr_usrreqs = &ngd_usrreqs } }; struct domain ngdomain = { .dom_family = AF_NETGRAPH, .dom_name = "netgraph", .dom_protosw = ngsw, .dom_protoswNPROTOSW = &ngsw[sizeof(ngsw) / sizeof(ngsw[0])] }; /* * Handle loading and unloading for this node type. * This is to handle auxiliary linkages (e.g protocol domain addition). */ static int ngs_mod_event(module_t mod, int event, void *data) { int error = 0; switch (event) { case MOD_LOAD: mtx_init(&ngsocketlist_mtx, "ng_socketlist", NULL, MTX_DEF); break; case MOD_UNLOAD: /* Ensure there are no open netgraph sockets. */ if (!LIST_EMPTY(&ngsocklist)) { error = EBUSY; break; } #ifdef NOTYET /* Unregister protocol domain XXX can't do this yet.. */ #endif error = EBUSY; break; default: error = EOPNOTSUPP; break; } return (error); } VNET_DOMAIN_SET(ng); -SYSCTL_INT(_net_graph, OID_AUTO, family, CTLFLAG_RD, 0, AF_NETGRAPH, ""); +SYSCTL_INT(_net_graph, OID_AUTO, family, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, AF_NETGRAPH, ""); static SYSCTL_NODE(_net_graph, OID_AUTO, data, CTLFLAG_RW, 0, "DATA"); -SYSCTL_INT(_net_graph_data, OID_AUTO, proto, CTLFLAG_RD, 0, NG_DATA, ""); +SYSCTL_INT(_net_graph_data, OID_AUTO, proto, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_DATA, ""); static SYSCTL_NODE(_net_graph, OID_AUTO, control, CTLFLAG_RW, 0, "CONTROL"); -SYSCTL_INT(_net_graph_control, OID_AUTO, proto, CTLFLAG_RD, 0, NG_CONTROL, ""); +SYSCTL_INT(_net_graph_control, OID_AUTO, proto, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_CONTROL, ""); Index: stable/9/sys/netinet/cc/cc_chd.c =================================================================== --- stable/9/sys/netinet/cc/cc_chd.c (revision 273911) +++ stable/9/sys/netinet/cc/cc_chd.c (revision 273912) @@ -1,496 +1,496 @@ /*- * Copyright (c) 2009-2010 * Swinburne University of Technology, Melbourne, Australia * Copyright (c) 2010-2011 The FreeBSD Foundation * All rights reserved. * * This software was developed at the Centre for Advanced Internet * Architectures, Swinburne University of Technology, by David Hayes and * Lawrence Stewart, made possible in part by a grant from the Cisco University * Research Program Fund at Community Foundation Silicon Valley. * * Portions of this software were developed at the Centre for Advanced Internet * Architectures, Swinburne University of Technology, Melbourne, Australia by * David Hayes under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * An implementation of the CAIA-Hamilton delay based congestion control * algorithm, based on "Improved coexistence and loss tolerance for delay based * TCP congestion control" by D. A. Hayes and G. Armitage., in 35th Annual IEEE * Conference on Local Computer Networks (LCN 2010), Denver, Colorado, USA, * 11-14 October 2010. * * Originally released as part of the NewTCP research project at Swinburne * University of Technology's Centre for Advanced Internet Architectures, * Melbourne, Australia, which was made possible in part by a grant from the * Cisco University Research Program Fund at Community Foundation Silicon * Valley. More details are available at: * http://caia.swin.edu.au/urp/newtcp/ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define CAST_PTR_INT(X) (*((int*)(X))) /* * Private signal type for rate based congestion signal. * See for appropriate bit-range to use for private signals. */ #define CC_CHD_DELAY 0x02000000 /* Largest possible number returned by random(). */ #define RANDOM_MAX INT_MAX static void chd_ack_received(struct cc_var *ccv, uint16_t ack_type); static void chd_cb_destroy(struct cc_var *ccv); static int chd_cb_init(struct cc_var *ccv); static void chd_cong_signal(struct cc_var *ccv, uint32_t signal_type); static void chd_conn_init(struct cc_var *ccv); static int chd_mod_init(void); struct chd { /* * Shadow window - keeps track of what the NewReno congestion window * would have been if delay-based cwnd backoffs had not been made. This * functionality aids coexistence with loss-based TCP flows which may be * sharing links along the path. */ unsigned long shadow_w; /* * Loss-based TCP compatibility flag - When set, it turns on the shadow * window functionality. */ int loss_compete; /* The maximum round trip time seen within a measured rtt period. */ int maxrtt_in_rtt; /* The previous qdly that caused cwnd to backoff. */ int prev_backoff_qdly; }; static int ertt_id; static VNET_DEFINE(uint32_t, chd_qmin) = 5; static VNET_DEFINE(uint32_t, chd_pmax) = 50; static VNET_DEFINE(uint32_t, chd_loss_fair) = 1; static VNET_DEFINE(uint32_t, chd_use_max) = 1; static VNET_DEFINE(uint32_t, chd_qthresh) = 20; #define V_chd_qthresh VNET(chd_qthresh) #define V_chd_qmin VNET(chd_qmin) #define V_chd_pmax VNET(chd_pmax) #define V_chd_loss_fair VNET(chd_loss_fair) #define V_chd_use_max VNET(chd_use_max) static MALLOC_DEFINE(M_CHD, "chd data", "Per connection data required for the CHD congestion control algorithm"); struct cc_algo chd_cc_algo = { .name = "chd", .ack_received = chd_ack_received, .cb_destroy = chd_cb_destroy, .cb_init = chd_cb_init, .cong_signal = chd_cong_signal, .conn_init = chd_conn_init, .mod_init = chd_mod_init }; static __inline void chd_window_decrease(struct cc_var *ccv) { unsigned long win; win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / CCV(ccv, t_maxseg); win -= max((win / 2), 1); CCV(ccv, snd_ssthresh) = max(win, 2) * CCV(ccv, t_maxseg); } /* * Probabilistic backoff function. Returns 1 if we should backoff or 0 * otherwise. The calculation of p is similar to the calculation of p in cc_hd. */ static __inline int should_backoff(int qdly, int maxqdly, struct chd *chd_data) { unsigned long p, rand; rand = random(); if (qdly < V_chd_qthresh) { chd_data->loss_compete = 0; p = (((RANDOM_MAX / 100) * V_chd_pmax) / (V_chd_qthresh - V_chd_qmin)) * (qdly - V_chd_qmin); } else { if (qdly > V_chd_qthresh) { p = (((RANDOM_MAX / 100) * V_chd_pmax) / (maxqdly - V_chd_qthresh)) * (maxqdly - qdly); if (V_chd_loss_fair && rand < p) chd_data->loss_compete = 1; } else { p = (RANDOM_MAX / 100) * V_chd_pmax; chd_data->loss_compete = 0; } } return (rand < p); } static __inline void chd_window_increase(struct cc_var *ccv, int new_measurement) { struct chd *chd_data; int incr; chd_data = ccv->cc_data; incr = 0; if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) { /* Adapted from NewReno slow start. */ if (V_tcp_do_rfc3465) { /* In slow-start with ABC enabled. */ if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) { /* Not due to RTO. */ incr = min(ccv->bytes_this_ack, V_tcp_abc_l_var * CCV(ccv, t_maxseg)); } else { /* Due to RTO. */ incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg)); } } else incr = CCV(ccv, t_maxseg); } else { /* Congestion avoidance. */ if (V_tcp_do_rfc3465) { if (ccv->flags & CCF_ABC_SENTAWND) { ccv->flags &= ~CCF_ABC_SENTAWND; incr = CCV(ccv, t_maxseg); } } else if (new_measurement) incr = CCV(ccv, t_maxseg); } if (chd_data->shadow_w > 0) { /* Track NewReno window. */ chd_data->shadow_w = min(chd_data->shadow_w + incr, TCP_MAXWIN << CCV(ccv, snd_scale)); } CCV(ccv,snd_cwnd) = min(CCV(ccv, snd_cwnd) + incr, TCP_MAXWIN << CCV(ccv, snd_scale)); } /* * All ACK signals are used for timing measurements to determine delay-based * congestion. However, window increases are only performed when * ack_type == CC_ACK. */ static void chd_ack_received(struct cc_var *ccv, uint16_t ack_type) { struct chd *chd_data; struct ertt *e_t; int backoff, new_measurement, qdly, rtt; e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); chd_data = ccv->cc_data; new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT; backoff = qdly = 0; chd_data->maxrtt_in_rtt = imax(e_t->rtt, chd_data->maxrtt_in_rtt); if (new_measurement) { /* * There is a new per RTT measurement, so check to see if there * is delay based congestion. */ rtt = V_chd_use_max ? chd_data->maxrtt_in_rtt : e_t->rtt; chd_data->maxrtt_in_rtt = 0; if (rtt && e_t->minrtt && !IN_RECOVERY(CCV(ccv, t_flags))) { qdly = rtt - e_t->minrtt; if (qdly > V_chd_qmin) { /* * Probabilistic delay based congestion * indication. */ backoff = should_backoff(qdly, e_t->maxrtt - e_t->minrtt, chd_data); } else chd_data->loss_compete = 0; } /* Reset per RTT measurement flag to start a new measurement. */ e_t->flags &= ~ERTT_NEW_MEASUREMENT; } if (backoff) { /* * Update shadow_w before delay based backoff. */ if (chd_data->loss_compete || qdly > chd_data->prev_backoff_qdly) { /* * Delay is higher than when we backed off previously, * so it is possible that this flow is competing with * loss based flows. */ chd_data->shadow_w = max(CCV(ccv, snd_cwnd), chd_data->shadow_w); } else { /* * Reset shadow_w, as it is probable that this flow is * not competing with loss based flows at the moment. */ chd_data->shadow_w = 0; } chd_data->prev_backoff_qdly = qdly; /* * Send delay-based congestion signal to the congestion signal * handler. */ chd_cong_signal(ccv, CC_CHD_DELAY); } else if (ack_type == CC_ACK) chd_window_increase(ccv, new_measurement); } static void chd_cb_destroy(struct cc_var *ccv) { if (ccv->cc_data != NULL) free(ccv->cc_data, M_CHD); } static int chd_cb_init(struct cc_var *ccv) { struct chd *chd_data; chd_data = malloc(sizeof(struct chd), M_CHD, M_NOWAIT); if (chd_data == NULL) return (ENOMEM); chd_data->shadow_w = 0; ccv->cc_data = chd_data; return (0); } static void chd_cong_signal(struct cc_var *ccv, uint32_t signal_type) { struct ertt *e_t; struct chd *chd_data; int qdly; e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); chd_data = ccv->cc_data; qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; switch(signal_type) { case CC_CHD_DELAY: chd_window_decrease(ccv); /* Set new ssthresh. */ CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); CCV(ccv, snd_recover) = CCV(ccv, snd_max); ENTER_CONGRECOVERY(CCV(ccv, t_flags)); break; case CC_NDUPACK: /* Packet loss. */ /* * Only react to loss as a congestion signal if qdly > * V_chd_qthresh. If qdly is less than qthresh, presume that * this is a non congestion related loss. If qdly is greater * than qthresh, assume that we are competing with loss based * tcp flows and restore window from any unnecessary backoffs, * before the decrease. */ if (!IN_RECOVERY(CCV(ccv, t_flags)) && qdly > V_chd_qthresh) { if (chd_data->loss_compete) { CCV(ccv, snd_cwnd) = max(CCV(ccv, snd_cwnd), chd_data->shadow_w); } chd_window_decrease(ccv); } else { /* * This loss isn't congestion related, or already * recovering from congestion. */ CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); CCV(ccv, snd_recover) = CCV(ccv, snd_max); } if (chd_data->shadow_w > 0) { chd_data->shadow_w = max(chd_data->shadow_w / CCV(ccv, t_maxseg) / 2, 2) * CCV(ccv, t_maxseg); } ENTER_FASTRECOVERY(CCV(ccv, t_flags)); break; default: newreno_cc_algo.cong_signal(ccv, signal_type); } } static void chd_conn_init(struct cc_var *ccv) { struct chd *chd_data; chd_data = ccv->cc_data; chd_data->prev_backoff_qdly = 0; chd_data->maxrtt_in_rtt = 0; chd_data->loss_compete = 0; /* * Initialise the shadow_cwnd to be equal to snd_cwnd in case we are * competing with loss based flows from the start. */ chd_data->shadow_w = CCV(ccv, snd_cwnd); } static int chd_mod_init(void) { ertt_id = khelp_get_id("ertt"); if (ertt_id <= 0) { printf("%s: h_ertt module not found\n", __func__); return (ENOENT); } chd_cc_algo.after_idle = newreno_cc_algo.after_idle; chd_cc_algo.post_recovery = newreno_cc_algo.post_recovery; return (0); } static int chd_loss_fair_handler(SYSCTL_HANDLER_ARGS) { int error; uint32_t new; new = V_chd_loss_fair; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr != NULL) { if (CAST_PTR_INT(req->newptr) > 1) error = EINVAL; else V_chd_loss_fair = new; } return (error); } static int chd_pmax_handler(SYSCTL_HANDLER_ARGS) { int error; uint32_t new; new = V_chd_pmax; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr != NULL) { if (CAST_PTR_INT(req->newptr) == 0 || CAST_PTR_INT(req->newptr) > 100) error = EINVAL; else V_chd_pmax = new; } return (error); } static int chd_qthresh_handler(SYSCTL_HANDLER_ARGS) { int error; uint32_t new; new = V_chd_qthresh; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr != NULL) { if (CAST_PTR_INT(req->newptr) <= V_chd_qmin) error = EINVAL; else V_chd_qthresh = new; } return (error); } SYSCTL_DECL(_net_inet_tcp_cc_chd); SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, chd, CTLFLAG_RW, NULL, "CAIA Hamilton delay-based congestion control related settings"); SYSCTL_VNET_PROC(_net_inet_tcp_cc_chd, OID_AUTO, loss_fair, CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_loss_fair), 1, &chd_loss_fair_handler, "IU", "Flag to enable shadow window functionality."); SYSCTL_VNET_PROC(_net_inet_tcp_cc_chd, OID_AUTO, pmax, CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_pmax), 5, &chd_pmax_handler, "IU", "Per RTT maximum backoff probability as a percentage"); SYSCTL_VNET_PROC(_net_inet_tcp_cc_chd, OID_AUTO, queue_threshold, CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_qthresh), 20, &chd_qthresh_handler, "IU", "Queueing congestion threshold in ticks"); SYSCTL_VNET_UINT(_net_inet_tcp_cc_chd, OID_AUTO, queue_min, - CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_qmin), 5, + CTLFLAG_RW, &VNET_NAME(chd_qmin), 5, "Minimum queueing delay threshold in ticks"); SYSCTL_VNET_UINT(_net_inet_tcp_cc_chd, OID_AUTO, use_max, - CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(chd_use_max), 1, + CTLFLAG_RW, &VNET_NAME(chd_use_max), 1, "Use the maximum RTT seen within the measurement period (RTT) " "as the basic delay measurement for the algorithm."); DECLARE_CC_MODULE(chd, &chd_cc_algo); MODULE_DEPEND(chd, ertt, 1, 1, 1); Index: stable/9/sys/netinet/tcp_reass.c =================================================================== --- stable/9/sys/netinet/tcp_reass.c (revision 273911) +++ stable/9/sys/netinet/tcp_reass.c (revision 273912) @@ -1,363 +1,363 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS); static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, "TCP Segment Reassembly Queue"); static VNET_DEFINE(int, tcp_reass_maxseg) = 0; #define V_tcp_reass_maxseg VNET(tcp_reass_maxseg) SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, &VNET_NAME(tcp_reass_maxseg), 0, "Global maximum number of TCP Segments in Reassembly Queue"); SYSCTL_VNET_PROC(_net_inet_tcp_reass, OID_AUTO, cursegments, (CTLTYPE_INT | CTLFLAG_RD), NULL, 0, &tcp_reass_sysctl_qsize, "I", "Global number of TCP Segments currently in Reassembly Queue"); static VNET_DEFINE(int, tcp_reass_overflows) = 0; #define V_tcp_reass_overflows VNET(tcp_reass_overflows) SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, - CTLTYPE_INT | CTLFLAG_RD, + CTLFLAG_RD, &VNET_NAME(tcp_reass_overflows), 0, "Global number of TCP Segment Reassembly Queue Overflows"); static VNET_DEFINE(uma_zone_t, tcp_reass_zone); #define V_tcp_reass_zone VNET(tcp_reass_zone) /* Initialize TCP reassembly queue */ static void tcp_reass_zone_change(void *tag) { /* Set the zone limit and read back the effective value. */ V_tcp_reass_maxseg = nmbclusters / 16; V_tcp_reass_maxseg = uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg); } void tcp_reass_init(void) { V_tcp_reass_maxseg = nmbclusters / 16; TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", &V_tcp_reass_maxseg); V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); /* Set the zone limit and read back the effective value. */ V_tcp_reass_maxseg = uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg); EVENTHANDLER_REGISTER(nmbclusters_change, tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE void tcp_reass_destroy(void) { uma_zdestroy(V_tcp_reass_zone); } #endif void tcp_reass_flush(struct tcpcb *tp) { struct tseg_qent *qe; INP_WLOCK_ASSERT(tp->t_inpcb); while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(qe, tqe_q); m_freem(qe->tqe_m); uma_zfree(V_tcp_reass_zone, qe); tp->t_segqlen--; } KASSERT((tp->t_segqlen == 0), ("TCP reass queue %p segment count is %d instead of 0 after flush.", tp, tp->t_segqlen)); } static int tcp_reass_sysctl_qsize(SYSCTL_HANDLER_ARGS) { int qsize; qsize = uma_zone_get_cur(V_tcp_reass_zone); return (sysctl_handle_int(oidp, &qsize, 0, req)); } int tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) { struct tseg_qent *q; struct tseg_qent *p = NULL; struct tseg_qent *nq; struct tseg_qent *te = NULL; struct socket *so = tp->t_inpcb->inp_socket; char *s = NULL; int flags; struct tseg_qent tqs; INP_WLOCK_ASSERT(tp->t_inpcb); /* * XXX: tcp_reass() is rather inefficient with its data structures * and should be rewritten (see NetBSD for optimizations). */ /* * Call with th==NULL after become established to * force pre-ESTABLISHED data up to user socket. */ if (th == NULL) goto present; /* * Limit the number of segments that can be queued to reduce the * potential for mbuf exhaustion. For best performance, we want to be * able to queue a full window's worth of segments. The size of the * socket receive buffer determines our advertised window and grows * automatically when socket buffer autotuning is enabled. Use it as the * basis for our queue limit. * Always let the missing segment through which caused this queue. * NB: Access to the socket buffer is left intentionally unlocked as we * can tolerate stale information here. * * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat * should work but causes packets to be dropped when they shouldn't. * Investigate why and re-evaluate the below limit after the behaviour * is understood. */ if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) && tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) { V_tcp_reass_overflows++; TCPSTAT_INC(tcps_rcvmemdrop); m_freem(m); *tlenp = 0; if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: queue limit reached, " "segment dropped\n", s, __func__); free(s, M_TCPLOG); } return (0); } /* * Allocate a new queue entry. If we can't, or hit the zone limit * just drop the pkt. * * Use a temporary structure on the stack for the missing segment * when the zone is exhausted. Otherwise we may get stuck. */ te = uma_zalloc(V_tcp_reass_zone, M_NOWAIT); if (te == NULL) { if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) { TCPSTAT_INC(tcps_rcvmemdrop); m_freem(m); *tlenp = 0; if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: global zone limit " "reached, segment dropped\n", s, __func__); free(s, M_TCPLOG); } return (0); } else { bzero(&tqs, sizeof(struct tseg_qent)); te = &tqs; if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: global zone limit reached, using " "stack for missing segment\n", s, __func__); free(s, M_TCPLOG); } } } tp->t_segqlen++; /* * Find a segment which begins after this one does. */ LIST_FOREACH(q, &tp->t_segq, tqe_q) { if (SEQ_GT(q->tqe_th->th_seq, th->th_seq)) break; p = q; } /* * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. */ if (p != NULL) { int i; /* conversion to int (in i) handles seq wraparound */ i = p->tqe_th->th_seq + p->tqe_len - th->th_seq; if (i > 0) { if (i >= *tlenp) { TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp); m_freem(m); if (te != &tqs) uma_zfree(V_tcp_reass_zone, te); tp->t_segqlen--; /* * Try to present any queued data * at the left window edge to the user. * This is needed after the 3-WHS * completes. */ goto present; /* ??? */ } m_adj(m, i); *tlenp -= i; th->th_seq += i; } } tp->t_rcvoopack++; TCPSTAT_INC(tcps_rcvoopack); TCPSTAT_ADD(tcps_rcvoobyte, *tlenp); /* * While we overlap succeeding segments trim them or, * if they are completely covered, dequeue them. */ while (q) { int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; if (i <= 0) break; if (i < q->tqe_len) { q->tqe_th->th_seq += i; q->tqe_len -= i; m_adj(q->tqe_m, i); break; } nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); uma_zfree(V_tcp_reass_zone, q); tp->t_segqlen--; q = nq; } /* Insert the new segment queue entry into place. */ te->tqe_m = m; te->tqe_th = th; te->tqe_len = *tlenp; if (p == NULL) { LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q); } else { KASSERT(te != &tqs, ("%s: temporary stack based entry not " "first element in queue", __func__)); LIST_INSERT_AFTER(p, te, tqe_q); } present: /* * Present data to user, advancing rcv_nxt through * completed sequence space. */ if (!TCPS_HAVEESTABLISHED(tp->t_state)) return (0); q = LIST_FIRST(&tp->t_segq); if (!q || q->tqe_th->th_seq != tp->rcv_nxt) return (0); SOCKBUF_LOCK(&so->so_rcv); do { tp->rcv_nxt += q->tqe_len; flags = q->tqe_th->th_flags & TH_FIN; nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(q->tqe_m); else sbappendstream_locked(&so->so_rcv, q->tqe_m); if (q != &tqs) uma_zfree(V_tcp_reass_zone, q); tp->t_segqlen--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); ND6_HINT(tp); sorwakeup_locked(so); return (flags); } Index: stable/9/sys/netipsec/ipsec.h =================================================================== --- stable/9/sys/netipsec/ipsec.h (revision 273911) +++ stable/9/sys/netipsec/ipsec.h (revision 273912) @@ -1,458 +1,458 @@ /* $FreeBSD$ */ /* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #ifndef _NETIPSEC_IPSEC_H_ #define _NETIPSEC_IPSEC_H_ #if defined(_KERNEL) && !defined(_LKM) && !defined(KLD_MODULE) #include "opt_inet.h" #include "opt_ipsec.h" #endif #include #include #ifdef _KERNEL #define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) #define IPSEC_IS_PRIVILEGED_SO(_so) \ ((_so)->so_cred != NULL && \ priv_check_cred((_so)->so_cred, PRIV_NETINET_IPSEC, 0) \ == 0) /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. * When the value of the ul_proto is ICMPv6, the port field in "src" * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { u_int8_t dir; /* direction of packet flow, see below */ union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ u_int8_t prefs; /* prefix length in bits for src */ u_int8_t prefd; /* prefix length in bits for dst */ u_int16_t ul_proto; /* upper layer Protocol */ #ifdef notyet uid_t uids; uid_t uidd; gid_t gids; gid_t gidd; #endif }; /* Security Policy Data Base */ struct secpolicy { LIST_ENTRY(secpolicy) chain; struct mtx lock; u_int refcnt; /* reference count */ struct secpolicyindex spidx; /* selector */ u_int32_t id; /* It's unique number on the system. */ u_int state; /* 0: dead, others: alive */ #define IPSEC_SPSTATE_DEAD 0 #define IPSEC_SPSTATE_ALIVE 1 - u_int16_t policy; /* policy_type per pfkeyv2.h */ + u_int policy; /* policy_type per pfkeyv2.h */ u_int16_t scangen; /* scan generation # */ struct ipsecrequest *req; /* pointer to the ipsec request tree, */ /* if policy == IPSEC else this value == NULL.*/ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and * validtime are zero. * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime. * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime. */ time_t created; /* time created the policy */ time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ }; #define SECPOLICY_LOCK_INIT(_sp) \ mtx_init(&(_sp)->lock, "ipsec policy", NULL, MTX_DEF) #define SECPOLICY_LOCK(_sp) mtx_lock(&(_sp)->lock) #define SECPOLICY_UNLOCK(_sp) mtx_unlock(&(_sp)->lock) #define SECPOLICY_LOCK_DESTROY(_sp) mtx_destroy(&(_sp)->lock) #define SECPOLICY_LOCK_ASSERT(_sp) mtx_assert(&(_sp)->lock, MA_OWNED) /* Request for IPsec */ struct ipsecrequest { struct ipsecrequest *next; /* pointer to next structure */ /* If NULL, it means the end of chain. */ struct secasindex saidx;/* hint for search proper SA */ /* if __ss_len == 0 then no address specified.*/ u_int level; /* IPsec level defined below. */ struct secasvar *sav; /* place holder of SA for use */ struct secpolicy *sp; /* back pointer to SP */ struct rwlock lock; /* to interlock updates */ }; /* * Need recursion for when crypto callbacks happen directly, * as in the case of software crypto. Need to look at how * hard it is to remove this... */ #define IPSECREQUEST_LOCK_INIT(_isr) \ rw_init_flags(&(_isr)->lock, "ipsec request", RW_RECURSE) #define IPSECREQUEST_LOCK(_isr) rw_rlock(&(_isr)->lock) #define IPSECREQUEST_UNLOCK(_isr) rw_runlock(&(_isr)->lock) #define IPSECREQUEST_WLOCK(_isr) rw_wlock(&(_isr)->lock) #define IPSECREQUEST_WUNLOCK(_isr) rw_wunlock(&(_isr)->lock) #define IPSECREQUEST_UPGRADE(_isr) rw_try_upgrade(&(_isr)->lock) #define IPSECREQUEST_DOWNGRADE(_isr) rw_downgrade(&(_isr)->lock) #define IPSECREQUEST_LOCK_DESTROY(_isr) rw_destroy(&(_isr)->lock) #define IPSECREQUEST_LOCK_ASSERT(_isr) rw_assert(&(_isr)->lock, RA_LOCKED) /* security policy in PCB */ struct inpcbpolicy { struct secpolicy *sp_in; struct secpolicy *sp_out; int priv; /* privileged socket ? */ }; /* SP acquiring list table. */ struct secspacq { LIST_ENTRY(secspacq) chain; struct secpolicyindex spidx; time_t created; /* for lifetime */ int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; #endif /* _KERNEL */ /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 #define IPSEC_PROTO_ANY 255 /* mode of security protocol */ /* NOTE: DON'T use IPSEC_MODE_ANY at SPD. It's only use in SAD */ #define IPSEC_MODE_ANY 0 /* i.e. wildcard. */ #define IPSEC_MODE_TRANSPORT 1 #define IPSEC_MODE_TUNNEL 2 #define IPSEC_MODE_TCPMD5 3 /* TCP MD5 mode */ /* * Direction of security policy. * NOTE: Since INVALID is used just as flag. * The other are used for loop counter too. */ #define IPSEC_DIR_ANY 0 #define IPSEC_DIR_INBOUND 1 #define IPSEC_DIR_OUTBOUND 2 #define IPSEC_DIR_MAX 3 #define IPSEC_DIR_INVALID 4 /* Policy level */ /* * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB, * DISCARD, IPSEC and NONE are allowed for setkey() in SPD. * DISCARD and NONE are allowed for system default. */ #define IPSEC_POLICY_DISCARD 0 /* discarding packet */ #define IPSEC_POLICY_NONE 1 /* through IPsec engine */ #define IPSEC_POLICY_IPSEC 2 /* do IPsec */ #define IPSEC_POLICY_ENTRUST 3 /* consulting SPD if present. */ #define IPSEC_POLICY_BYPASS 4 /* only for privileged socket. */ /* Security protocol level */ #define IPSEC_LEVEL_DEFAULT 0 /* reference to system default */ #define IPSEC_LEVEL_USE 1 /* use SA if present. */ #define IPSEC_LEVEL_REQUIRE 2 /* require SA. */ #define IPSEC_LEVEL_UNIQUE 3 /* unique SA. */ #define IPSEC_MANUAL_REQID_MAX 0x3fff /* * if security policy level == unique, this id * indicate to a relative SA for use, else is * zero. * 1 - 0x3fff are reserved for manual keying. * 0 are reserved for above reason. Others is * for kernel use. * Note that this id doesn't identify SA * by only itself. */ #define IPSEC_REPLAYWSIZE 32 /* statistics for ipsec processing */ struct ipsecstat { u_quad_t in_success; /* succeeded inbound process */ u_quad_t in_polvio; /* security policy violation for inbound process */ u_quad_t in_nosa; /* inbound SA is unavailable */ u_quad_t in_inval; /* inbound processing failed due to EINVAL */ u_quad_t in_nomem; /* inbound processing failed due to ENOBUFS */ u_quad_t in_badspi; /* failed getting a SPI */ u_quad_t in_ahreplay; /* AH replay check failed */ u_quad_t in_espreplay; /* ESP replay check failed */ u_quad_t in_ahauthsucc; /* AH authentication success */ u_quad_t in_ahauthfail; /* AH authentication failure */ u_quad_t in_espauthsucc; /* ESP authentication success */ u_quad_t in_espauthfail; /* ESP authentication failure */ u_quad_t in_esphist[256]; u_quad_t in_ahhist[256]; u_quad_t in_comphist[256]; u_quad_t out_success; /* succeeded outbound process */ u_quad_t out_polvio; /* security policy violation for outbound process */ u_quad_t out_nosa; /* outbound SA is unavailable */ u_quad_t out_inval; /* outbound process failed due to EINVAL */ u_quad_t out_nomem; /* inbound processing failed due to ENOBUFS */ u_quad_t out_noroute; /* there is no route */ u_quad_t out_esphist[256]; u_quad_t out_ahhist[256]; u_quad_t out_comphist[256]; u_quad_t spdcachelookup; u_quad_t spdcachemiss; u_int32_t ips_in_polvio; /* input: sec policy violation */ u_int32_t ips_out_polvio; /* output: sec policy violation */ u_int32_t ips_out_nosa; /* output: SA unavailable */ u_int32_t ips_out_nomem; /* output: no memory available */ u_int32_t ips_out_noroute; /* output: no route available */ u_int32_t ips_out_inval; /* output: generic error */ u_int32_t ips_out_bundlesa; /* output: bundled SA processed */ u_int32_t ips_mbcoalesced; /* mbufs coalesced during clone */ u_int32_t ips_clcoalesced; /* clusters coalesced during clone */ u_int32_t ips_clcopied; /* clusters copied during clone */ u_int32_t ips_mbinserted; /* mbufs inserted during makespace */ /* * Temporary statistics for performance analysis. */ /* See where ESP/AH/IPCOMP header land in mbuf on input */ u_int32_t ips_input_front; u_int32_t ips_input_middle; u_int32_t ips_input_end; }; /* * Definitions for IPsec & Key sysctl operations. */ /* * Names for IPsec & Key sysctl objects */ #define IPSECCTL_STATS 1 /* stats */ #define IPSECCTL_DEF_POLICY 2 #define IPSECCTL_DEF_ESP_TRANSLEV 3 /* int; ESP transport mode */ #define IPSECCTL_DEF_ESP_NETLEV 4 /* int; ESP tunnel mode */ #define IPSECCTL_DEF_AH_TRANSLEV 5 /* int; AH transport mode */ #define IPSECCTL_DEF_AH_NETLEV 6 /* int; AH tunnel mode */ #if 0 /* obsolete, do not reuse */ #define IPSECCTL_INBOUND_CALL_IKE 7 #endif #define IPSECCTL_AH_CLEARTOS 8 #define IPSECCTL_AH_OFFSETMASK 9 #define IPSECCTL_DFBIT 10 #define IPSECCTL_ECN 11 #define IPSECCTL_DEBUG 12 #define IPSECCTL_ESP_RANDPAD 13 #define IPSECCTL_MAXID 14 #define IPSECCTL_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ { "def_policy", CTLTYPE_INT }, \ { "esp_trans_deflev", CTLTYPE_INT }, \ { "esp_net_deflev", CTLTYPE_INT }, \ { "ah_trans_deflev", CTLTYPE_INT }, \ { "ah_net_deflev", CTLTYPE_INT }, \ { 0, 0 }, \ { "ah_cleartos", CTLTYPE_INT }, \ { "ah_offsetmask", CTLTYPE_INT }, \ { "dfbit", CTLTYPE_INT }, \ { "ecn", CTLTYPE_INT }, \ { "debug", CTLTYPE_INT }, \ { "esp_randpad", CTLTYPE_INT }, \ } #define IPSEC6CTL_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ { "def_policy", CTLTYPE_INT }, \ { "esp_trans_deflev", CTLTYPE_INT }, \ { "esp_net_deflev", CTLTYPE_INT }, \ { "ah_trans_deflev", CTLTYPE_INT }, \ { "ah_net_deflev", CTLTYPE_INT }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { "ecn", CTLTYPE_INT }, \ { "debug", CTLTYPE_INT }, \ { "esp_randpad", CTLTYPE_INT }, \ } #ifdef _KERNEL struct ipsec_output_state { struct mbuf *m; struct route *ro; struct sockaddr *dst; }; struct ipsec_history { int ih_proto; u_int32_t ih_spi; }; VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) #ifdef REGRESSION VNET_DECLARE(int, ipsec_replay); VNET_DECLARE(int, ipsec_integrity); #define V_ipsec_replay VNET(ipsec_replay) #define V_ipsec_integrity VNET(ipsec_integrity) #endif VNET_DECLARE(struct ipsecstat, ipsec4stat); VNET_DECLARE(struct secpolicy, ip4_def_policy); VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); VNET_DECLARE(int, ip4_ah_net_deflev); VNET_DECLARE(int, ip4_ah_offsetmask); VNET_DECLARE(int, ip4_ipsec_dfbit); VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); #define IPSECSTAT_INC(name) V_ipsec4stat.name += 1 #define V_ipsec4stat VNET(ipsec4stat) #define V_ip4_def_policy VNET(ip4_def_policy) #define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) #define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) #define V_ip4_ah_offsetmask VNET(ip4_ah_offsetmask) #define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) extern struct ipsecrequest *ipsec_newisr(void); extern void ipsec_delisr(struct ipsecrequest *); struct tdb_ident; extern struct secpolicy *ipsec_getpolicy __P((struct tdb_ident*, u_int)); struct inpcb; extern struct secpolicy *ipsec4_checkpolicy __P((struct mbuf *, u_int, u_int, int *, struct inpcb *)); extern struct secpolicy * ipsec_getpolicybyaddr(struct mbuf *, u_int, int, int *); struct inpcb; extern int ipsec_init_policy __P((struct socket *so, struct inpcbpolicy **)); extern int ipsec_copy_policy __P((struct inpcbpolicy *, struct inpcbpolicy *)); extern u_int ipsec_get_reqlevel __P((struct ipsecrequest *)); extern int ipsec_in_reject __P((struct secpolicy *, struct mbuf *)); extern int ipsec_set_policy __P((struct inpcb *inp, int optname, caddr_t request, size_t len, struct ucred *cred)); extern int ipsec_get_policy __P((struct inpcb *inpcb, caddr_t request, size_t len, struct mbuf **mp)); extern int ipsec_delete_pcbpolicy __P((struct inpcb *)); extern int ipsec4_in_reject __P((struct mbuf *, struct inpcb *)); struct secas; struct tcpcb; extern int ipsec_chkreplay __P((u_int32_t, struct secasvar *)); extern int ipsec_updatereplay __P((u_int32_t, struct secasvar *)); extern size_t ipsec_hdrsiz __P((struct mbuf *, u_int, struct inpcb *)); extern size_t ipsec_hdrsiz_tcp __P((struct tcpcb *)); union sockaddr_union; extern char * ipsec_address(union sockaddr_union* sa); extern const char *ipsec_logsastr __P((struct secasvar *)); extern void ipsec_dumpmbuf __P((struct mbuf *)); struct m_tag; extern void ah4_input(struct mbuf *m, int off); extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *); extern void esp4_input(struct mbuf *m, int off); extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *); extern void ipcomp4_input(struct mbuf *m, int off); extern int ipsec4_common_input(struct mbuf *m, ...); extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff, struct m_tag *mt); extern int ipsec4_process_packet __P((struct mbuf *, struct ipsecrequest *, int, int)); extern int ipsec_process_done __P((struct mbuf *, struct ipsecrequest *)); extern struct mbuf *ipsec_copypkt __P((struct mbuf *)); extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off); extern caddr_t m_pad(struct mbuf *m, int n); extern int m_striphdr(struct mbuf *m, int skip, int hlen); #ifdef DEV_ENC #define ENC_BEFORE 0x0001 #define ENC_AFTER 0x0002 #define ENC_IN 0x0100 #define ENC_OUT 0x0200 extern int ipsec_filter(struct mbuf **, int, int); extern void ipsec_bpf(struct mbuf *, struct secasvar *, int, int); #endif #endif /* _KERNEL */ #ifndef _KERNEL extern caddr_t ipsec_set_policy __P((char *, int)); extern int ipsec_get_policylen __P((caddr_t)); extern char *ipsec_dump_policy __P((caddr_t, char *)); extern const char *ipsec_strerror __P((void)); #endif /* ! KERNEL */ #endif /* _NETIPSEC_IPSEC_H_ */ Index: stable/9/sys/netipx/ipx_proto.c =================================================================== --- stable/9/sys/netipx/ipx_proto.c (revision 273911) +++ stable/9/sys/netipx/ipx_proto.c (revision 273912) @@ -1,159 +1,160 @@ /*- * Copyright (c) 1984, 1985, 1986, 1987, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1995, Mike Mitchell * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ipx_proto.c */ #include __FBSDID("$FreeBSD$"); #include "opt_ipx.h" #include +#include #include #include #include #include #include #include #include #include #include #include static struct pr_usrreqs nousrreqs; /* * IPX protocol family: IPX, ERR, PXP, SPX, ROUTE. */ static struct domain ipxdomain; static struct protosw ipxsw[] = { { .pr_domain = &ipxdomain, .pr_init = ipx_init, .pr_usrreqs = &nousrreqs }, { .pr_type = SOCK_DGRAM, .pr_domain = &ipxdomain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_ctlinput = ipx_ctlinput, .pr_ctloutput = ipx_ctloutput, .pr_usrreqs = &ipx_usrreqs }, { .pr_type = SOCK_STREAM, .pr_domain = &ipxdomain, .pr_protocol = IPXPROTO_SPX, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctlinput = spx_ctlinput, .pr_ctloutput = spx_ctloutput, .pr_init = spx_init, .pr_fasttimo = spx_fasttimo, .pr_slowtimo = spx_slowtimo, .pr_usrreqs = &spx_usrreqs }, { .pr_type = SOCK_SEQPACKET, .pr_domain = &ipxdomain, .pr_protocol = IPXPROTO_SPX, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ATOMIC, .pr_ctlinput = spx_ctlinput, .pr_ctloutput = spx_ctloutput, .pr_usrreqs = &spx_usrreq_sps }, { .pr_type = SOCK_RAW, .pr_domain = &ipxdomain, .pr_protocol = IPXPROTO_RAW, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_ctloutput = ipx_ctloutput, .pr_usrreqs = &ripx_usrreqs }, }; extern int ipx_inithead(void **, int); static struct domain ipxdomain = { .dom_family = AF_IPX, .dom_name = "network systems", .dom_protosw = ipxsw, .dom_protoswNPROTOSW = &ipxsw[sizeof(ipxsw)/sizeof(ipxsw[0])], .dom_rtattach = ipx_inithead, .dom_rtoffset = 16, .dom_maxrtkey = sizeof(struct sockaddr_ipx) }; /* shim to adapt arguments */ int ipx_inithead(void **head, int offset) { return rn_inithead(head, offset); } DOMAIN_SET(ipx); SYSCTL_NODE(_net, PF_IPX, ipx, CTLFLAG_RW, 0, "IPX/SPX"); SYSCTL_NODE(_net_ipx, IPXPROTO_RAW, ipx, CTLFLAG_RW, 0, "IPX"); static SYSCTL_NODE(_net_ipx, IPXPROTO_SPX, spx, CTLFLAG_RW, 0, "SPX"); Index: stable/9/sys/netpfil =================================================================== --- stable/9/sys/netpfil (revision 273911) +++ stable/9/sys/netpfil (revision 273912) Property changes on: stable/9/sys/netpfil ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/netpfil:r263710,273377-273378,273423,273455 Index: stable/9/sys/ofed/drivers/net/mlx4/mlx4_en.h =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/mlx4_en.h (revision 273911) +++ stable/9/sys/ofed/drivers/net/mlx4/mlx4_en.h (revision 273912) @@ -1,950 +1,950 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #ifndef _MLX4_EN_H_ #define _MLX4_EN_H_ #include #include #include #include #include #include #include #include #ifdef CONFIG_MLX4_EN_DCB #include #endif #include #include #include #include #include #include #include #include "en_port.h" #include "mlx4_stats.h" #define DRV_NAME "mlx4_en" #define DRV_VERSION "2.1" #define DRV_RELDATE __DATE__ #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) /* * Device constants */ #define MLX4_EN_PAGE_SHIFT 12 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) #define DEF_RX_RINGS 16 #define MAX_RX_RINGS 128 #define MIN_RX_RINGS 4 #define TXBB_SIZE 64 #define HEADROOM (2048 / TXBB_SIZE + 1) #define STAMP_STRIDE 64 #define STAMP_DWORDS (STAMP_STRIDE / 4) #define STAMP_SHIFT 31 #define STAMP_VAL 0x7fffffff #define STATS_DELAY (HZ / 4) #define SERVICE_TASK_DELAY (HZ / 4) #define MAX_NUM_OF_FS_RULES 256 #define MLX4_EN_FILTER_HASH_SHIFT 4 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60 #ifdef CONFIG_NET_RX_BUSY_POLL #define LL_EXTENDED_STATS #endif /* vlan valid range */ #define VLAN_MIN_VALUE 1 #define VLAN_MAX_VALUE 4094 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ #define MAX_DESC_SIZE 512 #define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) /* * OS related constants and tunables */ #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(PAGE_SIZE) #define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE) enum mlx4_en_alloc_type { MLX4_EN_ALLOC_NEW = 0, MLX4_EN_ALLOC_REPLACEMENT = 1, }; /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ #if MJUMPAGESIZE == 4096 enum { FRAG_SZ0 = MCLBYTES, FRAG_SZ1 = MJUMPAGESIZE, FRAG_SZ2 = MJUMPAGESIZE, }; #define MLX4_EN_MAX_RX_FRAGS 3 #elif MJUMPAGESIZE == 8192 enum { FRAG_SZ0 = MCLBYTES, FRAG_SZ1 = MJUMPAGESIZE, }; #define MLX4_EN_MAX_RX_FRAGS 2 #elif MJUMPAGESIZE == 8192 #else #error "Unknown PAGE_SIZE" #endif /* Maximum ring sizes */ #define MLX4_EN_DEF_TX_QUEUE_SIZE 4096 /* Minimum packet number till arming the CQ */ #define MLX4_EN_MIN_RX_ARM 2048 #define MLX4_EN_MIN_TX_ARM 2048 /* Maximum ring sizes */ #define MLX4_EN_MAX_TX_SIZE 8192 #define MLX4_EN_MAX_RX_SIZE 8192 /* Minimum ring sizes */ #define MLX4_EN_MIN_RX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_SMALL_PKT_SIZE 64 #define MLX4_EN_MAX_TX_RING_P_UP 32 #define MLX4_EN_NUM_UP 1 #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ (MLX4_EN_NUM_UP + 1)) #define MLX4_EN_DEF_TX_RING_SIZE 1024 #define MLX4_EN_DEF_RX_RING_SIZE 1024 /* Target number of bytes to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 0x20000 #define MLX4_EN_RX_COAL_TIME 0x10 #define MLX4_EN_TX_COAL_PKTS 64 #define MLX4_EN_TX_COAL_TIME 64 #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 #define MLX4_EN_RX_RATE_HIGH 450000 #define MLX4_EN_RX_COAL_TIME_HIGH 128 #define MLX4_EN_RX_SIZE_THRESH 1024 #define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH) #define MLX4_EN_SAMPLE_INTERVAL 0 #define MLX4_EN_AVG_PKT_SMALL 256 #define MLX4_EN_AUTO_CONF 0xffff #define MLX4_EN_DEF_RX_PAUSE 1 #define MLX4_EN_DEF_TX_PAUSE 1 /* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define MLX4_EN_TX_POLL_MODER 16 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) #define MLX4_EN_64_ALIGN (64 - NET_SKB_PAD) #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETHER_HDR_LEN) #define MLX4_EN_MIN_MTU 46 #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 #define MLX4_EN_LOOPBACK_TIMEOUT 100 #ifdef MLX4_EN_PERF_STAT /* Number of samples to 'average' */ #define AVG_SIZE 128 #define AVG_FACTOR 1024 #define INC_PERF_COUNTER(cnt) (++(cnt)) #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add)) #define AVG_PERF_COUNTER(cnt, sample) \ ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE) #define GET_PERF_COUNTER(cnt) (cnt) #define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR) #else #define INC_PERF_COUNTER(cnt) do {} while (0) #define ADD_PERF_COUNTER(cnt, add) do {} while (0) #define AVG_PERF_COUNTER(cnt, sample) do {} while (0) #define GET_PERF_COUNTER(cnt) (0) #define GET_AVG_PERF_COUNTER(cnt) (0) #endif /* MLX4_EN_PERF_STAT */ /* * Configurables */ enum cq_type { RX = 0, TX = 1, }; /* * Useful macros */ #define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x)) #define XNOR(x, y) (!(x) == !(y)) #define ILLEGAL_MAC(addr) (addr == 0xffffffffffffULL || addr == 0x0) struct mlx4_en_tx_info { struct mbuf *mb; u32 nr_txbb; u32 nr_bytes; u8 linear; u8 nr_segs; u8 data_offset; u8 inl; #if 0 u8 ts_requested; #endif }; #define MLX4_EN_BIT_DESC_OWN 0x80000000 #define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) #define MLX4_EN_MEMTYPE_PAD 0x100 #define DS_SIZE sizeof(struct mlx4_wqe_data_seg) struct mlx4_en_tx_desc { struct mlx4_wqe_ctrl_seg ctrl; union { struct mlx4_wqe_data_seg data; /* at least one data segment */ struct mlx4_wqe_lso_seg lso; struct mlx4_wqe_inline_seg inl; }; }; #define MLX4_EN_USE_SRQ 0x01000000 #define MLX4_EN_TX_BUDGET 64*4 //Compensate for no NAPI in freeBSD - might need some fine tunning in the future. #define MLX4_EN_RX_BUDGET 64 #define MLX4_EN_CX3_LOW_ID 0x1000 #define MLX4_EN_CX3_HIGH_ID 0x1005 struct mlx4_en_tx_ring { spinlock_t tx_lock; struct mlx4_hwq_resources wqres; u32 size ; /* number of TXBBs */ u32 size_mask; u16 stride; u16 cqn; /* index of port CQ associated with this ring */ u32 prod; u32 cons; u32 buf_size; u32 doorbell_qpn; void *buf; u16 poll_cnt; int blocked; struct mlx4_en_tx_info *tx_info; u8 *bounce_buf; u8 queue_index; cpuset_t affinity_mask; struct buf_ring *br; u32 last_nr_txbb; struct mlx4_qp qp; struct mlx4_qp_context context; int qpn; enum mlx4_qp_state qp_state; struct mlx4_srq dummy; unsigned long bytes; unsigned long packets; unsigned long tx_csum; unsigned long queue_stopped; unsigned long wake_queue; struct mlx4_bf bf; bool bf_enabled; struct netdev_queue *tx_queue; int hwtstamp_tx_type; spinlock_t comp_lock; int full_size; int inline_thold; u64 watchdog_time; }; struct mlx4_en_rx_desc { /* actual number of entries depends on rx ring stride */ struct mlx4_wqe_data_seg data[0]; }; struct mlx4_en_rx_buf { dma_addr_t dma; struct page *page; unsigned int page_offset; }; struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; u16 stride; u16 log_stride; u16 cqn; /* index of port CQ associated with this ring */ u32 prod; u32 cons; u32 buf_size; u8 fcs_del; u16 rx_alloc_order; u32 rx_alloc_size; u32 rx_buf_size; u32 rx_mb_size; int qpn; void *buf; void *rx_info; unsigned long errors; unsigned long bytes; unsigned long packets; #ifdef LL_EXTENDED_STATS unsigned long yields; unsigned long misses; unsigned long cleaned; #endif unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; int numa_node; struct lro_ctrl lro; }; static inline int mlx4_en_can_lro(__be16 status) { static __be16 status_all; static __be16 status_ipv4_ipok_tcp; static __be16 status_ipv6_ipok_tcp; status_all = cpu_to_be16( MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F | MLX4_CQE_STATUS_IPV6 | MLX4_CQE_STATUS_IPV4OPT | MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP | MLX4_CQE_STATUS_IPOK); status_ipv4_ipok_tcp = cpu_to_be16( MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPOK | MLX4_CQE_STATUS_TCP); status_ipv6_ipok_tcp = cpu_to_be16( MLX4_CQE_STATUS_IPV6 | MLX4_CQE_STATUS_IPOK | MLX4_CQE_STATUS_TCP); status &= status_all; return (status == status_ipv4_ipok_tcp || status == status_ipv6_ipok_tcp); } struct mlx4_en_cq { struct mlx4_cq mcq; struct mlx4_hwq_resources wqres; int ring; spinlock_t lock; struct net_device *dev; /* Per-core Tx cq processing support */ struct timer_list timer; int size; int buf_size; unsigned vector; enum cq_type is_tx; u16 moder_time; u16 moder_cnt; struct mlx4_cqe *buf; struct task cq_task; struct taskqueue *tq; #define MLX4_EN_OPCODE_ERROR 0x1e u32 tot_rx; u32 tot_tx; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define MLX4_EN_CQ_STATEIDLE 0 #define MLX4_EN_CQ_STATENAPI 1 /* NAPI owns this CQ */ #define MLX4_EN_CQ_STATEPOLL 2 /* poll owns this CQ */ #define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATENAPI | MLX4_EN_CQ_STATEPOLL) #define MLX4_EN_CQ_STATENAPI_YIELD 4 /* NAPI yielded this CQ */ #define MLX4_EN_CQ_STATEPOLL_YIELD 8 /* poll yielded this CQ */ #define CQ_YIELD (MLX4_EN_CQ_STATENAPI_YIELD | MLX4_EN_CQ_STATEPOLL_YIELD) #define CQ_USER_PEND (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATEPOLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ #endif /* CONFIG_NET_RX_BUSY_POLL */ }; struct mlx4_en_port_profile { u32 flags; u32 tx_ring_num; u32 rx_ring_num; u32 tx_ring_size; u32 rx_ring_size; u8 rx_pause; u8 rx_ppp; u8 tx_pause; u8 tx_ppp; int rss_rings; }; struct mlx4_en_profile { int rss_xor; int udp_rss; u8 rss_mask; u32 active_ports; u32 small_pkt_int; u8 no_reset; u8 num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; struct mlx4_en_dev { struct mlx4_dev *dev; struct pci_dev *pdev; struct mutex state_lock; struct net_device *pndev[MLX4_MAX_PORTS + 1]; u32 port_cnt; bool device_up; struct mlx4_en_profile profile; u32 LSO_support; struct workqueue_struct *workqueue; struct device *dma_device; void __iomem *uar_map; struct mlx4_uar priv_uar; struct mlx4_mr mr; u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; unsigned long last_overflow_check; unsigned long overflow_period; }; struct mlx4_en_rss_map { int base_qpn; struct mlx4_qp qps[MAX_RX_RINGS]; enum mlx4_qp_state state[MAX_RX_RINGS]; struct mlx4_qp indir_qp; enum mlx4_qp_state indir_state; }; struct mlx4_en_port_state { int link_state; int link_speed; int transciver; int autoneg; }; enum mlx4_en_mclist_act { MCLIST_NONE, MCLIST_REM, MCLIST_ADD, }; struct mlx4_en_mc_list { struct list_head list; enum mlx4_en_mclist_act action; u8 addr[ETH_ALEN]; u64 reg_id; }; #ifdef CONFIG_MLX4_EN_DCB /* Minimal TC BW - setting to 0 will block traffic */ #define MLX4_EN_BW_MIN 1 #define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */ #define MLX4_EN_TC_ETS 7 #endif enum { MLX4_EN_FLAG_PROMISC = (1 << 0), MLX4_EN_FLAG_MC_PROMISC = (1 << 1), /* whether we need to enable hardware loopback by putting dmac * in Tx WQE */ MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), /* whether we need to drop packets that hardware loopback-ed */ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), #ifdef CONFIG_MLX4_EN_DCB MLX4_EN_FLAG_DCB_ENABLED = (1 << 5) #endif }; #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) #define MLX4_EN_MAC_HASH_IDX 5 struct en_port { struct kobject kobj; struct mlx4_dev *dev; u8 port_num; u8 vport_num; }; struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; }; struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; struct net_device *dev; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx4_en_port_state port_state; spinlock_t stats_lock; /* To allow rules removal while port is going down */ struct list_head ethtool_list; unsigned long last_moder_packets[MAX_RX_RINGS]; unsigned long last_moder_tx_packets; unsigned long last_moder_bytes[MAX_RX_RINGS]; unsigned long last_moder_jiffies; int last_moder_time[MAX_RX_RINGS]; u16 rx_usecs; u16 rx_frames; u16 tx_usecs; u16 tx_frames; u32 pkt_rate_low; - u16 rx_usecs_low; + u32 rx_usecs_low; u32 pkt_rate_high; - u16 rx_usecs_high; - u16 sample_interval; - u16 adaptive_rx_coal; + u32 rx_usecs_high; + u32 sample_interval; + u32 adaptive_rx_coal; u32 msg_enable; u32 loopback_ok; u32 validate_loopback; struct mlx4_hwq_resources res; int link_state; int last_link_state; bool port_up; int port; int registered; int allocated; int stride; unsigned char current_mac[ETH_ALEN + 2]; u64 mac; int mac_index; unsigned max_mtu; int base_qpn; int cqe_factor; struct mlx4_en_rss_map rss_map; __be32 ctrl_flags; u32 flags; u8 num_tx_rings_p_up; u32 tx_ring_num; u32 rx_ring_num; u32 rx_mb_size; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 rx_alloc_order; u32 rx_alloc_size; u32 rx_buf_size; u16 num_frags; u16 log_rx_info; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; struct mlx4_en_cq **tx_cq; struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; struct work_struct watchdog_task; struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_flow_stats flowstats[MLX4_NUM_PRIORITIES]; struct mlx4_en_port_stats port_stats; struct mlx4_en_vport_stats vport_stats; struct mlx4_en_vf_stats vf_stats; DECLARE_BITMAP(stats_bitmap, NUM_ALL_STATS); struct list_head mc_list; struct list_head curr_list; u64 broadcast_id; struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; struct device *ddev; struct dentry *dev_root; u32 counter_index; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout watchdog_timer; struct ifmedia media; volatile int blocked; struct sysctl_oid *sysctl; struct sysctl_ctx_list conf_ctx; struct sysctl_ctx_list stat_ctx; #define MLX4_EN_MAC_HASH_IDX 5 struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; u8 dcbx_cap; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; int last_filter_id; struct list_head filters; struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; #endif struct en_port *vf_ports[MLX4_MAX_NUM_VF]; unsigned long last_ifq_jiffies; u64 if_counters_rx_errors; u64 if_counters_rx_no_buffer; }; enum mlx4_en_wol { MLX4_EN_WOL_MAGIC = (1ULL << 61), MLX4_EN_WOL_ENABLED = (1ULL << 62), }; struct mlx4_mac_entry { struct hlist_node hlist; unsigned char mac[ETH_ALEN + 2]; u64 reg_id; }; #ifdef CONFIG_NET_RX_BUSY_POLL static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { spin_lock_init(&cq->poll_lock); cq->state = MLX4_EN_CQ_STATEIDLE; } /* called from the device poll rutine to get ownership of a cq */ static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) { int rc = true; spin_lock(&cq->poll_lock); if (cq->state & MLX4_CQ_LOCKED) { WARN_ON(cq->state & MLX4_EN_CQ_STATENAPI); cq->state |= MLX4_EN_CQ_STATENAPI_YIELD; rc = false; } else /* we don't care if someone yielded */ cq->state = MLX4_EN_CQ_STATENAPI; spin_unlock(&cq->poll_lock); return rc; } /* returns true is someone tried to get the cq while napi had it */ static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) { int rc = false; spin_lock(&cq->poll_lock); WARN_ON(cq->state & (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATENAPI_YIELD)); if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) rc = true; cq->state = MLX4_EN_CQ_STATEIDLE; spin_unlock(&cq->poll_lock); return rc; } /* called from mlx4_en_low_latency_poll() */ static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) { int rc = true; spin_lock_bh(&cq->poll_lock); if ((cq->state & MLX4_CQ_LOCKED)) { struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; cq->state |= MLX4_EN_CQ_STATEPOLL_YIELD; rc = false; #ifdef LL_EXTENDED_STATS rx_ring->yields++; #endif } else /* preserve yield marks */ cq->state |= MLX4_EN_CQ_STATEPOLL; spin_unlock_bh(&cq->poll_lock); return rc; } /* returns true if someone tried to get the cq while it was locked */ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) { int rc = false; spin_lock_bh(&cq->poll_lock); WARN_ON(cq->state & (MLX4_EN_CQ_STATENAPI)); if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) rc = true; cq->state = MLX4_EN_CQ_STATEIDLE; spin_unlock_bh(&cq->poll_lock); return rc; } /* true if a socket is polling, even if it did not get the lock */ static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); return cq->state & CQ_USER_PEND; } #else static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { } static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) { return true; } static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { return false; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_destroy_netdev(struct net_device *dev); int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof); int mlx4_en_start_port(struct net_device *dev); void mlx4_en_stop_port(struct net_device *dev); void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); int mlx4_en_pre_config(struct mlx4_en_priv *priv); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, int entries, int ring, enum cq_type mode, int node); void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb); int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_idx); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio); void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); void mlx4_en_qflush(struct ifnet *dev); int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node); void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride); void mlx4_en_tx_que(void *context, int pending); void mlx4_en_rx_que(void *context, int pending); int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv); void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring); int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget); void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); void mlx4_en_rx_irq(struct mlx4_cq *mcq); int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); int mlx4_en_get_vport_stats(struct mlx4_en_dev *mdev, u8 port); void mlx4_en_create_debug_files(struct mlx4_en_priv *priv); void mlx4_en_delete_debug_files(struct mlx4_en_priv *priv); int mlx4_en_register_debugfs(void); void mlx4_en_unregister_debugfs(void); #ifdef CONFIG_MLX4_EN_DCB extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); #ifdef CONFIG_RFS_ACCEL void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *rx_ring); #endif #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); /* * Functions for time stamping */ #define SKBTX_HW_TSTAMP (1 << 0) #define SKBTX_IN_PROGRESS (1 << 2) u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe); /* Functions for caching and restoring statistics */ int mlx4_en_get_sset_count(struct net_device *dev, int sset); void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv, u64 *data); /* * Globals */ extern const struct ethtool_ops mlx4_en_ethtool_ops; /* * Defines for link speed - needed by selftest */ #define MLX4_EN_LINK_SPEED_1G 1000 #define MLX4_EN_LINK_SPEED_10G 10000 #define MLX4_EN_LINK_SPEED_40G 40000 enum { NETIF_MSG_DRV = 0x0001, NETIF_MSG_PROBE = 0x0002, NETIF_MSG_LINK = 0x0004, NETIF_MSG_TIMER = 0x0008, NETIF_MSG_IFDOWN = 0x0010, NETIF_MSG_IFUP = 0x0020, NETIF_MSG_RX_ERR = 0x0040, NETIF_MSG_TX_ERR = 0x0080, NETIF_MSG_TX_QUEUED = 0x0100, NETIF_MSG_INTR = 0x0200, NETIF_MSG_TX_DONE = 0x0400, NETIF_MSG_RX_STATUS = 0x0800, NETIF_MSG_PKTDATA = 0x1000, NETIF_MSG_HW = 0x2000, NETIF_MSG_WOL = 0x4000, }; /* * printk / logging functions */ #define en_print(level, priv, format, arg...) \ { \ if ((priv)->registered) \ printk(level "%s: %s: " format, DRV_NAME, \ (priv->dev)->if_xname, ## arg); \ else \ printk(level "%s: %s: Port %d: " format, \ DRV_NAME, dev_name(&priv->mdev->pdev->dev), \ (priv)->port, ## arg); \ } #define en_dbg(mlevel, priv, format, arg...) \ do { \ if (NETIF_MSG_##mlevel & priv->msg_enable) \ en_print(KERN_DEBUG, priv, format, ##arg); \ } while (0) #define en_warn(priv, format, arg...) \ en_print(KERN_WARNING, priv, format, ##arg) #define en_err(priv, format, arg...) \ en_print(KERN_ERR, priv, format, ##arg) #define en_info(priv, format, arg...) \ en_print(KERN_INFO, priv, format, ## arg) #define mlx4_err(mdev, format, arg...) \ pr_err("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #define mlx4_info(mdev, format, arg...) \ pr_info("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #define mlx4_warn(mdev, format, arg...) \ pr_warning("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #endif Index: stable/9/sys/powerpc/powermac/fcu.c =================================================================== --- stable/9/sys/powerpc/powermac/fcu.c (revision 273911) +++ stable/9/sys/powerpc/powermac/fcu.c (revision 273912) @@ -1,677 +1,677 @@ /*- * Copyright (c) 2010 Andreas Tobler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* FCU registers * /u3@0,f8000000/i2c@f8001000/fan@15e */ #define FCU_RPM_FAIL 0x0b /* fans states in bits 0<1-6>7 */ #define FCU_RPM_AVAILABLE 0x0c #define FCU_RPM_ACTIVE 0x0d #define FCU_RPM_READ(x) 0x11 + (x) * 2 #define FCU_RPM_SET(x) 0x10 + (x) * 2 #define FCU_PWM_FAIL 0x2b #define FCU_PWM_AVAILABLE 0x2c #define FCU_PWM_ACTIVE 0x2d #define FCU_PWM_RPM(x) 0x31 + (x) * 2 /* Get RPM. */ #define FCU_PWM_SGET(x) 0x30 + (x) * 2 /* Set or get PWM. */ struct fcu_fan { struct pmac_fan fan; device_t dev; int id; enum { FCU_FAN_RPM, FCU_FAN_PWM } type; int setpoint; int rpm; }; struct fcu_softc { device_t sc_dev; struct intr_config_hook enum_hook; uint32_t sc_addr; struct fcu_fan *sc_fans; int sc_nfans; }; /* We can read the PWM and the RPM from a PWM controlled fan. * Offer both values via sysctl. */ enum { FCU_PWM_SYSCTL_PWM = 1 << 8, FCU_PWM_SYSCTL_RPM = 2 << 8 }; static int fcu_rpm_shift; /* Regular bus attachment functions */ static int fcu_probe(device_t); static int fcu_attach(device_t); /* Utility functions */ static void fcu_attach_fans(device_t dev); static int fcu_fill_fan_prop(device_t dev); static int fcu_fan_set_rpm(struct fcu_fan *fan, int rpm); static int fcu_fan_get_rpm(struct fcu_fan *fan); static int fcu_fan_set_pwm(struct fcu_fan *fan, int pwm); static int fcu_fan_get_pwm(device_t dev, struct fcu_fan *fan, int *pwm, int *rpm); static int fcu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS); static void fcu_start(void *xdev); static int fcu_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buf, int len); static int fcu_read_1(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data); static device_method_t fcu_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fcu_probe), DEVMETHOD(device_attach, fcu_attach), { 0, 0 }, }; static driver_t fcu_driver = { "fcu", fcu_methods, sizeof(struct fcu_softc) }; static devclass_t fcu_devclass; DRIVER_MODULE(fcu, iicbus, fcu_driver, fcu_devclass, 0, 0); static MALLOC_DEFINE(M_FCU, "fcu", "FCU Sensor Information"); static int fcu_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buff, int len) { unsigned char buf[4]; int try = 0; struct iic_msg msg[] = { { addr, IIC_M_WR, 0, buf } }; msg[0].len = len + 1; buf[0] = reg; memcpy(buf + 1, buff, len); for (;;) { if (iicbus_transfer(dev, msg, 1) == 0) return (0); if (++try > 5) { device_printf(dev, "iicbus write failed\n"); return (-1); } pause("fcu_write", hz); } } static int fcu_read_1(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data) { uint8_t buf[4]; int err, try = 0; struct iic_msg msg[2] = { { addr, IIC_M_WR | IIC_M_NOSTOP, 1, ® }, { addr, IIC_M_RD, 1, buf }, }; for (;;) { err = iicbus_transfer(dev, msg, 2); if (err != 0) goto retry; *data = *((uint8_t*)buf); return (0); retry: if (++try > 5) { device_printf(dev, "iicbus read failed\n"); return (-1); } pause("fcu_read_1", hz); } } static int fcu_probe(device_t dev) { const char *name, *compatible; struct fcu_softc *sc; name = ofw_bus_get_name(dev); compatible = ofw_bus_get_compat(dev); if (!name) return (ENXIO); if (strcmp(name, "fan") != 0 || strcmp(compatible, "fcu") != 0) return (ENXIO); sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_addr = iicbus_get_addr(dev); device_set_desc(dev, "Apple Fan Control Unit"); return (0); } static int fcu_attach(device_t dev) { struct fcu_softc *sc; sc = device_get_softc(dev); sc->enum_hook.ich_func = fcu_start; sc->enum_hook.ich_arg = dev; /* We have to wait until interrupts are enabled. I2C read and write * only works if the interrupts are available. * The unin/i2c is controlled by the htpic on unin. But this is not * the master. The openpic on mac-io is controlling the htpic. * This one gets attached after the mac-io probing and then the * interrupts will be available. */ if (config_intrhook_establish(&sc->enum_hook) != 0) return (ENOMEM); return (0); } static void fcu_start(void *xdev) { unsigned char buf[1] = { 0xff }; struct fcu_softc *sc; device_t dev = (device_t)xdev; sc = device_get_softc(dev); /* Start the fcu device. */ fcu_write(sc->sc_dev, sc->sc_addr, 0xe, buf, 1); fcu_write(sc->sc_dev, sc->sc_addr, 0x2e, buf, 1); fcu_read_1(sc->sc_dev, sc->sc_addr, 0, buf); fcu_rpm_shift = (buf[0] == 1) ? 2 : 3; device_printf(dev, "FCU initialized, RPM shift: %d\n", fcu_rpm_shift); /* Detect and attach child devices. */ fcu_attach_fans(dev); config_intrhook_disestablish(&sc->enum_hook); } static int fcu_fan_set_rpm(struct fcu_fan *fan, int rpm) { uint8_t reg; struct fcu_softc *sc; unsigned char buf[2]; sc = device_get_softc(fan->dev); /* Clamp to allowed range */ rpm = max(fan->fan.min_rpm, rpm); rpm = min(fan->fan.max_rpm, rpm); if (fan->type == FCU_FAN_RPM) { reg = FCU_RPM_SET(fan->id); fan->setpoint = rpm; } else { device_printf(fan->dev, "Unknown fan type: %d\n", fan->type); return (ENXIO); } buf[0] = rpm >> (8 - fcu_rpm_shift); buf[1] = rpm << fcu_rpm_shift; if (fcu_write(sc->sc_dev, sc->sc_addr, reg, buf, 2) < 0) return (EIO); return (0); } static int fcu_fan_get_rpm(struct fcu_fan *fan) { uint8_t reg; struct fcu_softc *sc; uint8_t buff[2] = { 0, 0 }; uint8_t active = 0, avail = 0, fail = 0; int rpm; sc = device_get_softc(fan->dev); if (fan->type == FCU_FAN_RPM) { /* Check if the fan is available. */ reg = FCU_RPM_AVAILABLE; if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &avail) < 0) return (-1); if ((avail & (1 << fan->id)) == 0) { device_printf(fan->dev, "RPM Fan not available ID: %d\n", fan->id); return (-1); } /* Check if we have a failed fan. */ reg = FCU_RPM_FAIL; if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &fail) < 0) return (-1); if ((fail & (1 << fan->id)) != 0) { device_printf(fan->dev, "RPM Fan failed ID: %d\n", fan->id); return (-1); } /* Check if fan is active. */ reg = FCU_RPM_ACTIVE; if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &active) < 0) return (-1); if ((active & (1 << fan->id)) == 0) { device_printf(fan->dev, "RPM Fan not active ID: %d\n", fan->id); return (-1); } reg = FCU_RPM_READ(fan->id); } else { device_printf(fan->dev, "Unknown fan type: %d\n", fan->type); return (-1); } /* It seems that we can read the fans rpm. */ if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buff) < 0) return (-1); rpm = (buff[0] << (8 - fcu_rpm_shift)) | buff[1] >> fcu_rpm_shift; return (rpm); } static int fcu_fan_set_pwm(struct fcu_fan *fan, int pwm) { uint8_t reg; struct fcu_softc *sc; uint8_t buf[2]; sc = device_get_softc(fan->dev); /* Clamp to allowed range */ pwm = max(fan->fan.min_rpm, pwm); pwm = min(fan->fan.max_rpm, pwm); if (fan->type == FCU_FAN_PWM) { reg = FCU_PWM_SGET(fan->id); if (pwm > 100) pwm = 100; if (pwm < 30) pwm = 30; fan->setpoint = pwm; } else { device_printf(fan->dev, "Unknown fan type: %d\n", fan->type); return (EIO); } buf[0] = (pwm * 2550) / 1000; if (fcu_write(sc->sc_dev, sc->sc_addr, reg, buf, 1) < 0) return (EIO); return (0); } static int fcu_fan_get_pwm(device_t dev, struct fcu_fan *fan, int *pwm, int *rpm) { uint8_t reg; struct fcu_softc *sc; uint8_t buf[2]; uint8_t active = 0, avail = 0, fail = 0; sc = device_get_softc(dev); if (fan->type == FCU_FAN_PWM) { /* Check if the fan is available. */ reg = FCU_PWM_AVAILABLE; if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &avail) < 0) return (-1); if ((avail & (1 << fan->id)) == 0) { device_printf(dev, "PWM Fan not available ID: %d\n", fan->id); return (-1); } /* Check if we have a failed fan. */ reg = FCU_PWM_FAIL; if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &fail) < 0) return (-1); if ((fail & (1 << fan->id)) != 0) { device_printf(dev, "PWM Fan failed ID: %d\n", fan->id); return (-1); } /* Check if fan is active. */ reg = FCU_PWM_ACTIVE; if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &active) < 0) return (-1); if ((active & (1 << fan->id)) == 0) { device_printf(dev, "PWM Fan not active ID: %d\n", fan->id); return (-1); } reg = FCU_PWM_SGET(fan->id); } else { device_printf(dev, "Unknown fan type: %d\n", fan->type); return (EIO); } /* It seems that we can read the fans pwm. */ if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buf) < 0) return (-1); *pwm = (buf[0] * 1000) / 2550; /* Now read the rpm. */ reg = FCU_PWM_RPM(fan->id); if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buf) < 0) return (-1); *rpm = (buf[0] << (8 - fcu_rpm_shift)) | buf[1] >> fcu_rpm_shift; return (0); } /* * This function returns the number of fans. If we call it the second time * and we have allocated memory for sc->sc_fans, we fill in the properties. */ static int fcu_fill_fan_prop(device_t dev) { phandle_t child; struct fcu_softc *sc; u_int id[8]; char location[96]; char type[64]; int i = 0, j, len = 0, prop_len, prev_len = 0; sc = device_get_softc(dev); child = ofw_bus_get_node(dev); /* Fill the fan location property. */ prop_len = OF_getprop(child, "hwctrl-location", location, sizeof(location)); while (len < prop_len) { if (sc->sc_fans != NULL) { strcpy(sc->sc_fans[i].fan.name, location + len); } prev_len = strlen(location + len) + 1; len += prev_len; i++; } if (sc->sc_fans == NULL) return (i); /* Fill the fan type property. */ len = 0; i = 0; prev_len = 0; prop_len = OF_getprop(child, "hwctrl-type", type, sizeof(type)); while (len < prop_len) { if (strcmp(type + len, "fan-rpm") == 0) sc->sc_fans[i].type = FCU_FAN_RPM; else sc->sc_fans[i].type = FCU_FAN_PWM; prev_len = strlen(type + len) + 1; len += prev_len; i++; } /* Fill the fan ID property. */ prop_len = OF_getprop(child, "hwctrl-id", id, sizeof(id)); for (j = 0; j < i; j++) sc->sc_fans[j].id = ((id[j] >> 8) & 0x0f) % 8; /* Fill the fan zone property. */ prop_len = OF_getprop(child, "hwctrl-zone", id, sizeof(id)); for (j = 0; j < i; j++) sc->sc_fans[j].fan.zone = id[j]; /* Finish setting up fan properties */ for (j = 0; j < i; j++) { sc->sc_fans[j].dev = sc->sc_dev; if (sc->sc_fans[j].type == FCU_FAN_RPM) { sc->sc_fans[j].fan.min_rpm = 4800 >> fcu_rpm_shift; sc->sc_fans[j].fan.max_rpm = 56000 >> fcu_rpm_shift; sc->sc_fans[j].setpoint = fcu_fan_get_rpm(&sc->sc_fans[j]); sc->sc_fans[j].fan.read = (int (*)(struct pmac_fan *))(fcu_fan_get_rpm); sc->sc_fans[j].fan.set = (int (*)(struct pmac_fan *, int))(fcu_fan_set_rpm); } else { sc->sc_fans[j].fan.min_rpm = 30; /* Percent */ sc->sc_fans[j].fan.max_rpm = 100; sc->sc_fans[j].fan.read = NULL; sc->sc_fans[j].fan.set = (int (*)(struct pmac_fan *, int))(fcu_fan_set_pwm); } sc->sc_fans[j].fan.default_rpm = sc->sc_fans[j].fan.max_rpm; } return (i); } static int fcu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS) { device_t fcu; struct fcu_softc *sc; struct fcu_fan *fan; int rpm = 0, pwm = 0, error = 0; fcu = arg1; sc = device_get_softc(fcu); fan = &sc->sc_fans[arg2 & 0x00ff]; if (fan->type == FCU_FAN_RPM) { rpm = fcu_fan_get_rpm(fan); if (rpm < 0) return (EIO); error = sysctl_handle_int(oidp, &rpm, 0, req); } else { error = fcu_fan_get_pwm(fcu, fan, &pwm, &rpm); if (error < 0) return (EIO); switch (arg2 & 0xff00) { case FCU_PWM_SYSCTL_PWM: error = sysctl_handle_int(oidp, &pwm, 0, req); break; case FCU_PWM_SYSCTL_RPM: error = sysctl_handle_int(oidp, &rpm, 0, req); break; default: /* This should never happen */ return (EINVAL); }; } /* We can only read the RPM from a PWM controlled fan, so return. */ if ((arg2 & 0xff00) == FCU_PWM_SYSCTL_RPM) return (0); if (error || !req->newptr) return (error); if (fan->type == FCU_FAN_RPM) return (fcu_fan_set_rpm(fan, rpm)); else return (fcu_fan_set_pwm(fan, pwm)); } static void fcu_attach_fans(device_t dev) { struct fcu_softc *sc; struct sysctl_oid *oid, *fanroot_oid; struct sysctl_ctx_list *ctx; char sysctl_name[32]; int i, j; sc = device_get_softc(dev); sc->sc_nfans = 0; /* Count the actual number of fans. */ sc->sc_nfans = fcu_fill_fan_prop(dev); device_printf(dev, "%d fans detected!\n", sc->sc_nfans); if (sc->sc_nfans == 0) { device_printf(dev, "WARNING: No fans detected!\n"); return; } sc->sc_fans = malloc(sc->sc_nfans * sizeof(struct fcu_fan), M_FCU, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(dev); fanroot_oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fans", CTLFLAG_RD, 0, "FCU Fan Information"); /* Now we can fill the properties into the allocated struct. */ sc->sc_nfans = fcu_fill_fan_prop(dev); /* Register fans with pmac_thermal */ for (i = 0; i < sc->sc_nfans; i++) pmac_thermal_fan_register(&sc->sc_fans[i].fan); /* Add sysctls for the fans. */ for (i = 0; i < sc->sc_nfans; i++) { for (j = 0; j < strlen(sc->sc_fans[i].fan.name); j++) { sysctl_name[j] = tolower(sc->sc_fans[i].fan.name[j]); if (isspace(sysctl_name[j])) sysctl_name[j] = '_'; } sysctl_name[j] = 0; if (sc->sc_fans[i].type == FCU_FAN_RPM) { oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid), OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "minrpm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].fan.min_rpm), - sizeof(int), "Minimum allowed RPM"); + "minrpm", CTLFLAG_RD, + &(sc->sc_fans[i].fan.min_rpm), 0, + "Minimum allowed RPM"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "maxrpm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].fan.max_rpm), - sizeof(int), "Maximum allowed RPM"); + "maxrpm", CTLFLAG_RD, + &(sc->sc_fans[i].fan.max_rpm), 0, + "Maximum allowed RPM"); /* I use i to pass the fan id. */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "rpm", CTLTYPE_INT | CTLFLAG_RW, dev, i, fcu_fanrpm_sysctl, "I", "Fan RPM"); } else { fcu_fan_get_pwm(dev, &sc->sc_fans[i], &sc->sc_fans[i].setpoint, &sc->sc_fans[i].rpm); oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid), OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "minpwm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].fan.min_rpm), - sizeof(int), "Minimum allowed PWM in %"); + "minpwm", CTLFLAG_RD, + &(sc->sc_fans[i].fan.min_rpm), 0, + "Minimum allowed PWM in %"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "maxpwm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].fan.max_rpm), - sizeof(int), "Maximum allowed PWM in %"); + "maxpwm", CTLFLAG_RD, + &(sc->sc_fans[i].fan.max_rpm), 0, + "Maximum allowed PWM in %"); /* I use i to pass the fan id or'ed with the type * of info I want to display/modify. */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "pwm", CTLTYPE_INT | CTLFLAG_RW, dev, FCU_PWM_SYSCTL_PWM | i, fcu_fanrpm_sysctl, "I", "Fan PWM in %"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "rpm", CTLTYPE_INT | CTLFLAG_RD, dev, FCU_PWM_SYSCTL_RPM | i, fcu_fanrpm_sysctl, "I", "Fan RPM"); } } /* Dump fan location, type & RPM. */ if (bootverbose) { device_printf(dev, "Fans\n"); for (i = 0; i < sc->sc_nfans; i++) { device_printf(dev, "Location: %s type: %d ID: %d " "RPM: %d\n", sc->sc_fans[i].fan.name, sc->sc_fans[i].type, sc->sc_fans[i].id, (sc->sc_fans[i].type == FCU_FAN_RPM) ? sc->sc_fans[i].setpoint : sc->sc_fans[i].rpm ); } } } Index: stable/9/sys/powerpc/powermac/smu.c =================================================================== --- stable/9/sys/powerpc/powermac/smu.c (revision 273911) +++ stable/9/sys/powerpc/powermac/smu.c (revision 273912) @@ -1,1579 +1,1579 @@ /*- * Copyright (c) 2009 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "clock_if.h" #include "iicbus_if.h" struct smu_cmd { volatile uint8_t cmd; uint8_t len; uint8_t data[254]; STAILQ_ENTRY(smu_cmd) cmd_q; }; STAILQ_HEAD(smu_cmdq, smu_cmd); struct smu_fan { struct pmac_fan fan; device_t dev; cell_t reg; enum { SMU_FAN_RPM, SMU_FAN_PWM } type; int setpoint; int old_style; int rpm; }; /* We can read the PWM and the RPM from a PWM controlled fan. * Offer both values via sysctl. */ enum { SMU_PWM_SYSCTL_PWM = 1 << 8, SMU_PWM_SYSCTL_RPM = 2 << 8 }; struct smu_sensor { struct pmac_therm therm; device_t dev; cell_t reg; enum { SMU_CURRENT_SENSOR, SMU_VOLTAGE_SENSOR, SMU_POWER_SENSOR, SMU_TEMP_SENSOR } type; }; struct smu_softc { device_t sc_dev; struct mtx sc_mtx; struct resource *sc_memr; int sc_memrid; int sc_u3; bus_dma_tag_t sc_dmatag; bus_space_tag_t sc_bt; bus_space_handle_t sc_mailbox; struct smu_cmd *sc_cmd, *sc_cur_cmd; bus_addr_t sc_cmd_phys; bus_dmamap_t sc_cmd_dmamap; struct smu_cmdq sc_cmdq; struct smu_fan *sc_fans; int sc_nfans; int old_style_fans; struct smu_sensor *sc_sensors; int sc_nsensors; int sc_doorbellirqid; struct resource *sc_doorbellirq; void *sc_doorbellirqcookie; struct proc *sc_fanmgt_proc; time_t sc_lastuserchange; /* Calibration data */ uint16_t sc_cpu_diode_scale; int16_t sc_cpu_diode_offset; uint16_t sc_cpu_volt_scale; int16_t sc_cpu_volt_offset; uint16_t sc_cpu_curr_scale; int16_t sc_cpu_curr_offset; uint16_t sc_slots_pow_scale; int16_t sc_slots_pow_offset; struct cdev *sc_leddev; }; /* regular bus attachment functions */ static int smu_probe(device_t); static int smu_attach(device_t); static const struct ofw_bus_devinfo * smu_get_devinfo(device_t bus, device_t dev); /* cpufreq notification hooks */ static void smu_cpufreq_pre_change(device_t, const struct cf_level *level); static void smu_cpufreq_post_change(device_t, const struct cf_level *level); /* clock interface */ static int smu_gettime(device_t dev, struct timespec *ts); static int smu_settime(device_t dev, struct timespec *ts); /* utility functions */ static int smu_run_cmd(device_t dev, struct smu_cmd *cmd, int wait); static int smu_get_datablock(device_t dev, int8_t id, uint8_t *buf, size_t len); static void smu_attach_i2c(device_t dev, phandle_t i2croot); static void smu_attach_fans(device_t dev, phandle_t fanroot); static void smu_attach_sensors(device_t dev, phandle_t sensroot); static void smu_set_sleepled(void *xdev, int onoff); static int smu_server_mode(SYSCTL_HANDLER_ARGS); static void smu_doorbell_intr(void *xdev); static void smu_shutdown(void *xdev, int howto); /* where to find the doorbell GPIO */ static device_t smu_doorbell = NULL; static device_method_t smu_methods[] = { /* Device interface */ DEVMETHOD(device_probe, smu_probe), DEVMETHOD(device_attach, smu_attach), /* Clock interface */ DEVMETHOD(clock_gettime, smu_gettime), DEVMETHOD(clock_settime, smu_settime), /* ofw_bus interface */ DEVMETHOD(bus_child_pnpinfo_str,ofw_bus_gen_child_pnpinfo_str), DEVMETHOD(ofw_bus_get_devinfo, smu_get_devinfo), DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), { 0, 0 }, }; static driver_t smu_driver = { "smu", smu_methods, sizeof(struct smu_softc) }; static devclass_t smu_devclass; DRIVER_MODULE(smu, nexus, smu_driver, smu_devclass, 0, 0); static MALLOC_DEFINE(M_SMU, "smu", "SMU Sensor Information"); #define SMU_MAILBOX 0x8000860c #define SMU_FANMGT_INTERVAL 1000 /* ms */ /* Command types */ #define SMU_ADC 0xd8 #define SMU_FAN 0x4a #define SMU_RPM_STATUS 0x01 #define SMU_RPM_SETPOINT 0x02 #define SMU_PWM_STATUS 0x11 #define SMU_PWM_SETPOINT 0x12 #define SMU_I2C 0x9a #define SMU_I2C_SIMPLE 0x00 #define SMU_I2C_NORMAL 0x01 #define SMU_I2C_COMBINED 0x02 #define SMU_MISC 0xee #define SMU_MISC_GET_DATA 0x02 #define SMU_MISC_LED_CTRL 0x04 #define SMU_POWER 0xaa #define SMU_POWER_EVENTS 0x8f #define SMU_PWR_GET_POWERUP 0x00 #define SMU_PWR_SET_POWERUP 0x01 #define SMU_PWR_CLR_POWERUP 0x02 #define SMU_RTC 0x8e #define SMU_RTC_GET 0x81 #define SMU_RTC_SET 0x80 /* Power event types */ #define SMU_WAKEUP_KEYPRESS 0x01 #define SMU_WAKEUP_AC_INSERT 0x02 #define SMU_WAKEUP_AC_CHANGE 0x04 #define SMU_WAKEUP_RING 0x10 /* Data blocks */ #define SMU_CPUTEMP_CAL 0x18 #define SMU_CPUVOLT_CAL 0x21 #define SMU_SLOTPW_CAL 0x78 /* Partitions */ #define SMU_PARTITION 0x3e #define SMU_PARTITION_LATEST 0x01 #define SMU_PARTITION_BASE 0x02 #define SMU_PARTITION_UPDATE 0x03 static int smu_probe(device_t dev) { const char *name = ofw_bus_get_name(dev); if (strcmp(name, "smu") != 0) return (ENXIO); device_set_desc(dev, "Apple System Management Unit"); return (0); } static void smu_phys_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct smu_softc *sc = xsc; sc->sc_cmd_phys = segs[0].ds_addr; } static int smu_attach(device_t dev) { struct smu_softc *sc; phandle_t node, child; uint8_t data[12]; sc = device_get_softc(dev); mtx_init(&sc->sc_mtx, "smu", NULL, MTX_DEF); sc->sc_cur_cmd = NULL; sc->sc_doorbellirqid = -1; sc->sc_u3 = 0; if (OF_finddevice("/u3") != -1) sc->sc_u3 = 1; /* * Map the mailbox area. This should be determined from firmware, * but I have not found a simple way to do that. */ bus_dma_tag_create(NULL, 16, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE, 1, PAGE_SIZE, 0, NULL, NULL, &(sc->sc_dmatag)); sc->sc_bt = &bs_le_tag; bus_space_map(sc->sc_bt, SMU_MAILBOX, 4, 0, &sc->sc_mailbox); /* * Allocate the command buffer. This can be anywhere in the low 4 GB * of memory. */ bus_dmamem_alloc(sc->sc_dmatag, (void **)&sc->sc_cmd, BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->sc_cmd_dmamap); bus_dmamap_load(sc->sc_dmatag, sc->sc_cmd_dmamap, sc->sc_cmd, PAGE_SIZE, smu_phys_callback, sc, 0); STAILQ_INIT(&sc->sc_cmdq); /* * Set up handlers to change CPU voltage when CPU frequency is changed. */ EVENTHANDLER_REGISTER(cpufreq_pre_change, smu_cpufreq_pre_change, dev, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(cpufreq_post_change, smu_cpufreq_post_change, dev, EVENTHANDLER_PRI_ANY); node = ofw_bus_get_node(dev); /* Some SMUs have RPM and PWM controlled fans which do not sit * under the same node. So we have to attach them separately. */ smu_attach_fans(dev, node); /* * Now detect and attach the other child devices. */ for (child = OF_child(node); child != 0; child = OF_peer(child)) { char name[32]; memset(name, 0, sizeof(name)); OF_getprop(child, "name", name, sizeof(name)); if (strncmp(name, "sensors", 8) == 0) smu_attach_sensors(dev, child); if (strncmp(name, "smu-i2c-control", 15) == 0) smu_attach_i2c(dev, child); } /* Some SMUs have the I2C children directly under the bus. */ smu_attach_i2c(dev, node); /* * Collect calibration constants. */ smu_get_datablock(dev, SMU_CPUTEMP_CAL, data, sizeof(data)); sc->sc_cpu_diode_scale = (data[4] << 8) + data[5]; sc->sc_cpu_diode_offset = (data[6] << 8) + data[7]; smu_get_datablock(dev, SMU_CPUVOLT_CAL, data, sizeof(data)); sc->sc_cpu_volt_scale = (data[4] << 8) + data[5]; sc->sc_cpu_volt_offset = (data[6] << 8) + data[7]; sc->sc_cpu_curr_scale = (data[8] << 8) + data[9]; sc->sc_cpu_curr_offset = (data[10] << 8) + data[11]; smu_get_datablock(dev, SMU_SLOTPW_CAL, data, sizeof(data)); sc->sc_slots_pow_scale = (data[4] << 8) + data[5]; sc->sc_slots_pow_offset = (data[6] << 8) + data[7]; /* * Set up LED interface */ sc->sc_leddev = led_create(smu_set_sleepled, dev, "sleepled"); /* * Reset on power loss behavior */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "server_mode", CTLTYPE_INT | CTLFLAG_RW, dev, 0, smu_server_mode, "I", "Enable reboot after power failure"); /* * Set up doorbell interrupt. */ sc->sc_doorbellirqid = 0; sc->sc_doorbellirq = bus_alloc_resource_any(smu_doorbell, SYS_RES_IRQ, &sc->sc_doorbellirqid, RF_ACTIVE); bus_setup_intr(smu_doorbell, sc->sc_doorbellirq, INTR_TYPE_MISC | INTR_MPSAFE, NULL, smu_doorbell_intr, dev, &sc->sc_doorbellirqcookie); powerpc_config_intr(rman_get_start(sc->sc_doorbellirq), INTR_TRIGGER_EDGE, INTR_POLARITY_LOW); /* * Connect RTC interface. */ clock_register(dev, 1000); /* * Learn about shutdown events */ EVENTHANDLER_REGISTER(shutdown_final, smu_shutdown, dev, SHUTDOWN_PRI_LAST); return (bus_generic_attach(dev)); } static const struct ofw_bus_devinfo * smu_get_devinfo(device_t bus, device_t dev) { return (device_get_ivars(dev)); } static void smu_send_cmd(device_t dev, struct smu_cmd *cmd) { struct smu_softc *sc; sc = device_get_softc(dev); mtx_assert(&sc->sc_mtx, MA_OWNED); if (sc->sc_u3) powerpc_pow_enabled = 0; /* SMU cannot work if we go to NAP */ sc->sc_cur_cmd = cmd; /* Copy the command to the mailbox */ sc->sc_cmd->cmd = cmd->cmd; sc->sc_cmd->len = cmd->len; memcpy(sc->sc_cmd->data, cmd->data, sizeof(cmd->data)); bus_dmamap_sync(sc->sc_dmatag, sc->sc_cmd_dmamap, BUS_DMASYNC_PREWRITE); bus_space_write_4(sc->sc_bt, sc->sc_mailbox, 0, sc->sc_cmd_phys); /* Flush the cacheline it is in -- SMU bypasses the cache */ __asm __volatile("sync; dcbf 0,%0; sync" :: "r"(sc->sc_cmd): "memory"); /* Ring SMU doorbell */ macgpio_write(smu_doorbell, GPIO_DDR_OUTPUT); } static void smu_doorbell_intr(void *xdev) { device_t smu; struct smu_softc *sc; int doorbell_ack; smu = xdev; doorbell_ack = macgpio_read(smu_doorbell); sc = device_get_softc(smu); if (doorbell_ack != (GPIO_DDR_OUTPUT | GPIO_LEVEL_RO | GPIO_DATA)) return; mtx_lock(&sc->sc_mtx); if (sc->sc_cur_cmd == NULL) /* spurious */ goto done; /* Check result. First invalidate the cache again... */ __asm __volatile("dcbf 0,%0; sync" :: "r"(sc->sc_cmd) : "memory"); bus_dmamap_sync(sc->sc_dmatag, sc->sc_cmd_dmamap, BUS_DMASYNC_POSTREAD); sc->sc_cur_cmd->cmd = sc->sc_cmd->cmd; sc->sc_cur_cmd->len = sc->sc_cmd->len; memcpy(sc->sc_cur_cmd->data, sc->sc_cmd->data, sizeof(sc->sc_cmd->data)); wakeup(sc->sc_cur_cmd); sc->sc_cur_cmd = NULL; if (sc->sc_u3) powerpc_pow_enabled = 1; done: /* Queue next command if one is pending */ if (STAILQ_FIRST(&sc->sc_cmdq) != NULL) { sc->sc_cur_cmd = STAILQ_FIRST(&sc->sc_cmdq); STAILQ_REMOVE_HEAD(&sc->sc_cmdq, cmd_q); smu_send_cmd(smu, sc->sc_cur_cmd); } mtx_unlock(&sc->sc_mtx); } static int smu_run_cmd(device_t dev, struct smu_cmd *cmd, int wait) { struct smu_softc *sc; uint8_t cmd_code; int error; sc = device_get_softc(dev); cmd_code = cmd->cmd; mtx_lock(&sc->sc_mtx); if (sc->sc_cur_cmd != NULL) { STAILQ_INSERT_TAIL(&sc->sc_cmdq, cmd, cmd_q); } else smu_send_cmd(dev, cmd); mtx_unlock(&sc->sc_mtx); if (!wait) return (0); if (sc->sc_doorbellirqid < 0) { /* Poll if the IRQ has not been set up yet */ do { DELAY(50); smu_doorbell_intr(dev); } while (sc->sc_cur_cmd != NULL); } else { /* smu_doorbell_intr will wake us when the command is ACK'ed */ error = tsleep(cmd, 0, "smu", 800 * hz / 1000); if (error != 0) smu_doorbell_intr(dev); /* One last chance */ if (error != 0) { mtx_lock(&sc->sc_mtx); if (cmd->cmd == cmd_code) { /* Never processed */ /* Abort this command if we timed out */ if (sc->sc_cur_cmd == cmd) sc->sc_cur_cmd = NULL; else STAILQ_REMOVE(&sc->sc_cmdq, cmd, smu_cmd, cmd_q); mtx_unlock(&sc->sc_mtx); return (error); } error = 0; mtx_unlock(&sc->sc_mtx); } } /* SMU acks the command by inverting the command bits */ if (cmd->cmd == ((~cmd_code) & 0xff)) error = 0; else error = EIO; return (error); } static int smu_get_datablock(device_t dev, int8_t id, uint8_t *buf, size_t len) { struct smu_cmd cmd; uint8_t addr[4]; cmd.cmd = SMU_PARTITION; cmd.len = 2; cmd.data[0] = SMU_PARTITION_LATEST; cmd.data[1] = id; smu_run_cmd(dev, &cmd, 1); addr[0] = addr[1] = 0; addr[2] = cmd.data[0]; addr[3] = cmd.data[1]; cmd.cmd = SMU_MISC; cmd.len = 7; cmd.data[0] = SMU_MISC_GET_DATA; cmd.data[1] = sizeof(addr); memcpy(&cmd.data[2], addr, sizeof(addr)); cmd.data[6] = len; smu_run_cmd(dev, &cmd, 1); memcpy(buf, cmd.data, len); return (0); } static void smu_slew_cpu_voltage(device_t dev, int to) { struct smu_cmd cmd; cmd.cmd = SMU_POWER; cmd.len = 8; cmd.data[0] = 'V'; cmd.data[1] = 'S'; cmd.data[2] = 'L'; cmd.data[3] = 'E'; cmd.data[4] = 'W'; cmd.data[5] = 0xff; cmd.data[6] = 1; cmd.data[7] = to; smu_run_cmd(dev, &cmd, 1); } static void smu_cpufreq_pre_change(device_t dev, const struct cf_level *level) { /* * Make sure the CPU voltage is raised before we raise * the clock. */ if (level->rel_set[0].freq == 10000 /* max */) smu_slew_cpu_voltage(dev, 0); } static void smu_cpufreq_post_change(device_t dev, const struct cf_level *level) { /* We are safe to reduce CPU voltage after a downward transition */ if (level->rel_set[0].freq < 10000 /* max */) smu_slew_cpu_voltage(dev, 1); /* XXX: 1/4 voltage for 970MP? */ } /* Routines for probing the SMU doorbell GPIO */ static int doorbell_probe(device_t dev); static int doorbell_attach(device_t dev); static device_method_t doorbell_methods[] = { /* Device interface */ DEVMETHOD(device_probe, doorbell_probe), DEVMETHOD(device_attach, doorbell_attach), { 0, 0 }, }; static driver_t doorbell_driver = { "smudoorbell", doorbell_methods, 0 }; static devclass_t doorbell_devclass; DRIVER_MODULE(smudoorbell, macgpio, doorbell_driver, doorbell_devclass, 0, 0); static int doorbell_probe(device_t dev) { const char *name = ofw_bus_get_name(dev); if (strcmp(name, "smu-doorbell") != 0) return (ENXIO); device_set_desc(dev, "SMU Doorbell GPIO"); device_quiet(dev); return (0); } static int doorbell_attach(device_t dev) { smu_doorbell = dev; return (0); } /* * Sensor and fan management */ static int smu_fan_check_old_style(struct smu_fan *fan) { device_t smu = fan->dev; struct smu_softc *sc = device_get_softc(smu); struct smu_cmd cmd; int error; if (sc->old_style_fans != -1) return (sc->old_style_fans); /* * Apple has two fan control mechanisms. We can't distinguish * them except by seeing if the new one fails. If the new one * fails, use the old one. */ cmd.cmd = SMU_FAN; cmd.len = 2; cmd.data[0] = 0x31; cmd.data[1] = fan->reg; do { error = smu_run_cmd(smu, &cmd, 1); } while (error == EWOULDBLOCK); sc->old_style_fans = (error != 0); return (sc->old_style_fans); } static int smu_fan_set_rpm(struct smu_fan *fan, int rpm) { device_t smu = fan->dev; struct smu_cmd cmd; int error; cmd.cmd = SMU_FAN; error = EIO; /* Clamp to allowed range */ rpm = max(fan->fan.min_rpm, rpm); rpm = min(fan->fan.max_rpm, rpm); smu_fan_check_old_style(fan); if (!fan->old_style) { cmd.len = 4; cmd.data[0] = 0x30; cmd.data[1] = fan->reg; cmd.data[2] = (rpm >> 8) & 0xff; cmd.data[3] = rpm & 0xff; error = smu_run_cmd(smu, &cmd, 1); if (error && error != EWOULDBLOCK) fan->old_style = 1; } else { cmd.len = 14; cmd.data[0] = 0x00; /* RPM fan. */ cmd.data[1] = 1 << fan->reg; cmd.data[2 + 2*fan->reg] = (rpm >> 8) & 0xff; cmd.data[3 + 2*fan->reg] = rpm & 0xff; error = smu_run_cmd(smu, &cmd, 1); } if (error == 0) fan->setpoint = rpm; return (error); } static int smu_fan_read_rpm(struct smu_fan *fan) { device_t smu = fan->dev; struct smu_cmd cmd; int rpm, error; smu_fan_check_old_style(fan); if (!fan->old_style) { cmd.cmd = SMU_FAN; cmd.len = 2; cmd.data[0] = 0x31; cmd.data[1] = fan->reg; error = smu_run_cmd(smu, &cmd, 1); if (error && error != EWOULDBLOCK) fan->old_style = 1; rpm = (cmd.data[0] << 8) | cmd.data[1]; } if (fan->old_style) { cmd.cmd = SMU_FAN; cmd.len = 1; cmd.data[0] = SMU_RPM_STATUS; error = smu_run_cmd(smu, &cmd, 1); if (error) return (error); rpm = (cmd.data[fan->reg*2+1] << 8) | cmd.data[fan->reg*2+2]; } return (rpm); } static int smu_fan_set_pwm(struct smu_fan *fan, int pwm) { device_t smu = fan->dev; struct smu_cmd cmd; int error; cmd.cmd = SMU_FAN; error = EIO; /* Clamp to allowed range */ pwm = max(fan->fan.min_rpm, pwm); pwm = min(fan->fan.max_rpm, pwm); /* * Apple has two fan control mechanisms. We can't distinguish * them except by seeing if the new one fails. If the new one * fails, use the old one. */ if (!fan->old_style) { cmd.len = 4; cmd.data[0] = 0x30; cmd.data[1] = fan->reg; cmd.data[2] = (pwm >> 8) & 0xff; cmd.data[3] = pwm & 0xff; error = smu_run_cmd(smu, &cmd, 1); if (error && error != EWOULDBLOCK) fan->old_style = 1; } if (fan->old_style) { cmd.len = 14; cmd.data[0] = 0x10; /* PWM fan. */ cmd.data[1] = 1 << fan->reg; cmd.data[2 + 2*fan->reg] = (pwm >> 8) & 0xff; cmd.data[3 + 2*fan->reg] = pwm & 0xff; error = smu_run_cmd(smu, &cmd, 1); } if (error == 0) fan->setpoint = pwm; return (error); } static int smu_fan_read_pwm(struct smu_fan *fan, int *pwm, int *rpm) { device_t smu = fan->dev; struct smu_cmd cmd; int error; if (!fan->old_style) { cmd.cmd = SMU_FAN; cmd.len = 2; cmd.data[0] = 0x31; cmd.data[1] = fan->reg; error = smu_run_cmd(smu, &cmd, 1); if (error && error != EWOULDBLOCK) fan->old_style = 1; *rpm = (cmd.data[0] << 8) | cmd.data[1]; } if (fan->old_style) { cmd.cmd = SMU_FAN; cmd.len = 1; cmd.data[0] = SMU_PWM_STATUS; error = smu_run_cmd(smu, &cmd, 1); if (error) return (error); *rpm = (cmd.data[fan->reg*2+1] << 8) | cmd.data[fan->reg*2+2]; } if (fan->old_style) { cmd.cmd = SMU_FAN; cmd.len = 14; cmd.data[0] = SMU_PWM_SETPOINT; cmd.data[1] = 1 << fan->reg; error = smu_run_cmd(smu, &cmd, 1); if (error) return (error); *pwm = cmd.data[fan->reg*2+2]; } return (0); } static int smu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS) { device_t smu; struct smu_softc *sc; struct smu_fan *fan; int pwm = 0, rpm, error = 0; smu = arg1; sc = device_get_softc(smu); fan = &sc->sc_fans[arg2 & 0xff]; if (fan->type == SMU_FAN_RPM) { rpm = smu_fan_read_rpm(fan); if (rpm < 0) return (rpm); error = sysctl_handle_int(oidp, &rpm, 0, req); } else { error = smu_fan_read_pwm(fan, &pwm, &rpm); if (error < 0) return (EIO); switch (arg2 & 0xff00) { case SMU_PWM_SYSCTL_PWM: error = sysctl_handle_int(oidp, &pwm, 0, req); break; case SMU_PWM_SYSCTL_RPM: error = sysctl_handle_int(oidp, &rpm, 0, req); break; default: /* This should never happen */ return (EINVAL); }; } /* We can only read the RPM from a PWM controlled fan, so return. */ if ((arg2 & 0xff00) == SMU_PWM_SYSCTL_RPM) return (0); if (error || !req->newptr) return (error); sc->sc_lastuserchange = time_uptime; if (fan->type == SMU_FAN_RPM) return (smu_fan_set_rpm(fan, rpm)); else return (smu_fan_set_pwm(fan, pwm)); } static void smu_fill_fan_prop(device_t dev, phandle_t child, int id) { struct smu_fan *fan; struct smu_softc *sc; char type[32]; sc = device_get_softc(dev); fan = &sc->sc_fans[id]; OF_getprop(child, "device_type", type, sizeof(type)); /* We have either RPM or PWM controlled fans. */ if (strcmp(type, "fan-rpm-control") == 0) fan->type = SMU_FAN_RPM; else fan->type = SMU_FAN_PWM; fan->dev = dev; fan->old_style = 0; OF_getprop(child, "reg", &fan->reg, sizeof(cell_t)); OF_getprop(child, "min-value", &fan->fan.min_rpm, sizeof(int)); OF_getprop(child, "max-value", &fan->fan.max_rpm, sizeof(int)); OF_getprop(child, "zone", &fan->fan.zone, sizeof(int)); if (OF_getprop(child, "unmanaged-value", &fan->fan.default_rpm, sizeof(int)) != sizeof(int)) fan->fan.default_rpm = fan->fan.max_rpm; OF_getprop(child, "location", fan->fan.name, sizeof(fan->fan.name)); if (fan->type == SMU_FAN_RPM) fan->setpoint = smu_fan_read_rpm(fan); else smu_fan_read_pwm(fan, &fan->setpoint, &fan->rpm); } /* On the first call count the number of fans. In the second call, * after allocating the fan struct, fill the properties of the fans. */ static int smu_count_fans(device_t dev) { struct smu_softc *sc; phandle_t child, node, root; int nfans = 0; node = ofw_bus_get_node(dev); sc = device_get_softc(dev); /* First find the fanroots and count the number of fans. */ for (root = OF_child(node); root != 0; root = OF_peer(root)) { char name[32]; memset(name, 0, sizeof(name)); OF_getprop(root, "name", name, sizeof(name)); if (strncmp(name, "rpm-fans", 9) == 0 || strncmp(name, "pwm-fans", 9) == 0 || strncmp(name, "fans", 5) == 0) for (child = OF_child(root); child != 0; child = OF_peer(child)) { nfans++; /* When allocated, fill the fan properties. */ if (sc->sc_fans != NULL) { smu_fill_fan_prop(dev, child, nfans - 1); } } } if (nfans == 0) { device_printf(dev, "WARNING: No fans detected!\n"); return (0); } return (nfans); } static void smu_attach_fans(device_t dev, phandle_t fanroot) { struct smu_fan *fan; struct smu_softc *sc; struct sysctl_oid *oid, *fanroot_oid; struct sysctl_ctx_list *ctx; char sysctl_name[32]; int i, j; sc = device_get_softc(dev); /* Get the number of fans. */ sc->sc_nfans = smu_count_fans(dev); if (sc->sc_nfans == 0) return; /* Now we're able to allocate memory for the fans struct. */ sc->sc_fans = malloc(sc->sc_nfans * sizeof(struct smu_fan), M_SMU, M_WAITOK | M_ZERO); /* Now fill in the properties. */ smu_count_fans(dev); /* Register fans with pmac_thermal */ for (i = 0; i < sc->sc_nfans; i++) pmac_thermal_fan_register(&sc->sc_fans[i].fan); ctx = device_get_sysctl_ctx(dev); fanroot_oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fans", CTLFLAG_RD, 0, "SMU Fan Information"); /* Add sysctls */ for (i = 0; i < sc->sc_nfans; i++) { fan = &sc->sc_fans[i]; for (j = 0; j < strlen(fan->fan.name); j++) { sysctl_name[j] = tolower(fan->fan.name[j]); if (isspace(sysctl_name[j])) sysctl_name[j] = '_'; } sysctl_name[j] = 0; if (fan->type == SMU_FAN_RPM) { oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid), OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "minrpm", CTLTYPE_INT | CTLFLAG_RD, - &fan->fan.min_rpm, sizeof(int), + "minrpm", CTLFLAG_RD, + &fan->fan.min_rpm, 0, "Minimum allowed RPM"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "maxrpm", CTLTYPE_INT | CTLFLAG_RD, - &fan->fan.max_rpm, sizeof(int), + "maxrpm", CTLFLAG_RD, + &fan->fan.max_rpm, 0, "Maximum allowed RPM"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "rpm",CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, i, smu_fanrpm_sysctl, "I", "Fan RPM"); fan->fan.read = (int (*)(struct pmac_fan *))smu_fan_read_rpm; fan->fan.set = (int (*)(struct pmac_fan *, int))smu_fan_set_rpm; } else { oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid), OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "minpwm", CTLTYPE_INT | CTLFLAG_RD, - &fan->fan.min_rpm, sizeof(int), + "minpwm", CTLFLAG_RD, + &fan->fan.min_rpm, 0, "Minimum allowed PWM in %"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "maxpwm", CTLTYPE_INT | CTLFLAG_RD, - &fan->fan.max_rpm, sizeof(int), + "maxpwm", CTLFLAG_RD, + &fan->fan.max_rpm, 0, "Maximum allowed PWM in %"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "pwm",CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, SMU_PWM_SYSCTL_PWM | i, smu_fanrpm_sysctl, "I", "Fan PWM in %"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "rpm",CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, SMU_PWM_SYSCTL_RPM | i, smu_fanrpm_sysctl, "I", "Fan RPM"); fan->fan.read = NULL; fan->fan.set = (int (*)(struct pmac_fan *, int))smu_fan_set_pwm; } if (bootverbose) device_printf(dev, "Fan: %s type: %d\n", fan->fan.name, fan->type); } } static int smu_sensor_read(struct smu_sensor *sens) { device_t smu = sens->dev; struct smu_cmd cmd; struct smu_softc *sc; int64_t value; int error; cmd.cmd = SMU_ADC; cmd.len = 1; cmd.data[0] = sens->reg; error = 0; error = smu_run_cmd(smu, &cmd, 1); if (error != 0) return (-1); sc = device_get_softc(smu); value = (cmd.data[0] << 8) | cmd.data[1]; switch (sens->type) { case SMU_TEMP_SENSOR: value *= sc->sc_cpu_diode_scale; value >>= 3; value += ((int64_t)sc->sc_cpu_diode_offset) << 9; value <<= 1; /* Convert from 16.16 fixed point degC into integer 0.1 K. */ value = 10*(value >> 16) + ((10*(value & 0xffff)) >> 16) + 2732; break; case SMU_VOLTAGE_SENSOR: value *= sc->sc_cpu_volt_scale; value += sc->sc_cpu_volt_offset; value <<= 4; /* Convert from 16.16 fixed point V into mV. */ value *= 15625; value /= 1024; value /= 1000; break; case SMU_CURRENT_SENSOR: value *= sc->sc_cpu_curr_scale; value += sc->sc_cpu_curr_offset; value <<= 4; /* Convert from 16.16 fixed point A into mA. */ value *= 15625; value /= 1024; value /= 1000; break; case SMU_POWER_SENSOR: value *= sc->sc_slots_pow_scale; value += sc->sc_slots_pow_offset; value <<= 4; /* Convert from 16.16 fixed point W into mW. */ value *= 15625; value /= 1024; value /= 1000; break; } return (value); } static int smu_sensor_sysctl(SYSCTL_HANDLER_ARGS) { device_t smu; struct smu_softc *sc; struct smu_sensor *sens; int value, error; smu = arg1; sc = device_get_softc(smu); sens = &sc->sc_sensors[arg2]; value = smu_sensor_read(sens); if (value < 0) return (EBUSY); error = sysctl_handle_int(oidp, &value, 0, req); return (error); } static void smu_attach_sensors(device_t dev, phandle_t sensroot) { struct smu_sensor *sens; struct smu_softc *sc; struct sysctl_oid *sensroot_oid; struct sysctl_ctx_list *ctx; phandle_t child; char type[32]; int i; sc = device_get_softc(dev); sc->sc_nsensors = 0; for (child = OF_child(sensroot); child != 0; child = OF_peer(child)) sc->sc_nsensors++; if (sc->sc_nsensors == 0) { device_printf(dev, "WARNING: No sensors detected!\n"); return; } sc->sc_sensors = malloc(sc->sc_nsensors * sizeof(struct smu_sensor), M_SMU, M_WAITOK | M_ZERO); sens = sc->sc_sensors; sc->sc_nsensors = 0; ctx = device_get_sysctl_ctx(dev); sensroot_oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensors", CTLFLAG_RD, 0, "SMU Sensor Information"); for (child = OF_child(sensroot); child != 0; child = OF_peer(child)) { char sysctl_name[40], sysctl_desc[40]; const char *units; sens->dev = dev; OF_getprop(child, "device_type", type, sizeof(type)); if (strcmp(type, "current-sensor") == 0) { sens->type = SMU_CURRENT_SENSOR; units = "mA"; } else if (strcmp(type, "temp-sensor") == 0) { sens->type = SMU_TEMP_SENSOR; units = "C"; } else if (strcmp(type, "voltage-sensor") == 0) { sens->type = SMU_VOLTAGE_SENSOR; units = "mV"; } else if (strcmp(type, "power-sensor") == 0) { sens->type = SMU_POWER_SENSOR; units = "mW"; } else { continue; } OF_getprop(child, "reg", &sens->reg, sizeof(cell_t)); OF_getprop(child, "zone", &sens->therm.zone, sizeof(int)); OF_getprop(child, "location", sens->therm.name, sizeof(sens->therm.name)); for (i = 0; i < strlen(sens->therm.name); i++) { sysctl_name[i] = tolower(sens->therm.name[i]); if (isspace(sysctl_name[i])) sysctl_name[i] = '_'; } sysctl_name[i] = 0; sprintf(sysctl_desc,"%s (%s)", sens->therm.name, units); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(sensroot_oid), OID_AUTO, sysctl_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, sc->sc_nsensors, smu_sensor_sysctl, (sens->type == SMU_TEMP_SENSOR) ? "IK" : "I", sysctl_desc); if (sens->type == SMU_TEMP_SENSOR) { /* Make up some numbers */ sens->therm.target_temp = 500 + 2732; /* 50 C */ sens->therm.max_temp = 900 + 2732; /* 90 C */ sens->therm.read = (int (*)(struct pmac_therm *))smu_sensor_read; pmac_thermal_sensor_register(&sens->therm); } sens++; sc->sc_nsensors++; } } static void smu_set_sleepled(void *xdev, int onoff) { static struct smu_cmd cmd; device_t smu = xdev; cmd.cmd = SMU_MISC; cmd.len = 3; cmd.data[0] = SMU_MISC_LED_CTRL; cmd.data[1] = 0; cmd.data[2] = onoff; smu_run_cmd(smu, &cmd, 0); } static int smu_server_mode(SYSCTL_HANDLER_ARGS) { struct smu_cmd cmd; u_int server_mode; device_t smu = arg1; int error; cmd.cmd = SMU_POWER_EVENTS; cmd.len = 1; cmd.data[0] = SMU_PWR_GET_POWERUP; error = smu_run_cmd(smu, &cmd, 1); if (error) return (error); server_mode = (cmd.data[1] & SMU_WAKEUP_AC_INSERT) ? 1 : 0; error = sysctl_handle_int(oidp, &server_mode, 0, req); if (error || !req->newptr) return (error); if (server_mode == 1) cmd.data[0] = SMU_PWR_SET_POWERUP; else if (server_mode == 0) cmd.data[0] = SMU_PWR_CLR_POWERUP; else return (EINVAL); cmd.len = 3; cmd.data[1] = 0; cmd.data[2] = SMU_WAKEUP_AC_INSERT; return (smu_run_cmd(smu, &cmd, 1)); } static void smu_shutdown(void *xdev, int howto) { device_t smu = xdev; struct smu_cmd cmd; cmd.cmd = SMU_POWER; if (howto & RB_HALT) strcpy(cmd.data, "SHUTDOWN"); else strcpy(cmd.data, "RESTART"); cmd.len = strlen(cmd.data); smu_run_cmd(smu, &cmd, 1); for (;;); } static int smu_gettime(device_t dev, struct timespec *ts) { struct smu_cmd cmd; struct clocktime ct; cmd.cmd = SMU_RTC; cmd.len = 1; cmd.data[0] = SMU_RTC_GET; if (smu_run_cmd(dev, &cmd, 1) != 0) return (ENXIO); ct.nsec = 0; ct.sec = bcd2bin(cmd.data[0]); ct.min = bcd2bin(cmd.data[1]); ct.hour = bcd2bin(cmd.data[2]); ct.dow = bcd2bin(cmd.data[3]); ct.day = bcd2bin(cmd.data[4]); ct.mon = bcd2bin(cmd.data[5]); ct.year = bcd2bin(cmd.data[6]) + 2000; return (clock_ct_to_ts(&ct, ts)); } static int smu_settime(device_t dev, struct timespec *ts) { static struct smu_cmd cmd; struct clocktime ct; cmd.cmd = SMU_RTC; cmd.len = 8; cmd.data[0] = SMU_RTC_SET; clock_ts_to_ct(ts, &ct); cmd.data[1] = bin2bcd(ct.sec); cmd.data[2] = bin2bcd(ct.min); cmd.data[3] = bin2bcd(ct.hour); cmd.data[4] = bin2bcd(ct.dow); cmd.data[5] = bin2bcd(ct.day); cmd.data[6] = bin2bcd(ct.mon); cmd.data[7] = bin2bcd(ct.year - 2000); return (smu_run_cmd(dev, &cmd, 0)); } /* SMU I2C Interface */ static int smuiic_probe(device_t dev); static int smuiic_attach(device_t dev); static int smuiic_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs); static phandle_t smuiic_get_node(device_t bus, device_t dev); static device_method_t smuiic_methods[] = { /* device interface */ DEVMETHOD(device_probe, smuiic_probe), DEVMETHOD(device_attach, smuiic_attach), /* iicbus interface */ DEVMETHOD(iicbus_callback, iicbus_null_callback), DEVMETHOD(iicbus_transfer, smuiic_transfer), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, smuiic_get_node), { 0, 0 } }; struct smuiic_softc { struct mtx sc_mtx; volatile int sc_iic_inuse; int sc_busno; }; static driver_t smuiic_driver = { "iichb", smuiic_methods, sizeof(struct smuiic_softc) }; static devclass_t smuiic_devclass; DRIVER_MODULE(smuiic, smu, smuiic_driver, smuiic_devclass, 0, 0); static void smu_attach_i2c(device_t smu, phandle_t i2croot) { phandle_t child; device_t cdev; struct ofw_bus_devinfo *dinfo; char name[32]; for (child = OF_child(i2croot); child != 0; child = OF_peer(child)) { if (OF_getprop(child, "name", name, sizeof(name)) <= 0) continue; if (strcmp(name, "i2c-bus") != 0 && strcmp(name, "i2c") != 0) continue; dinfo = malloc(sizeof(struct ofw_bus_devinfo), M_SMU, M_WAITOK | M_ZERO); if (ofw_bus_gen_setup_devinfo(dinfo, child) != 0) { free(dinfo, M_SMU); continue; } cdev = device_add_child(smu, NULL, -1); if (cdev == NULL) { device_printf(smu, "<%s>: device_add_child failed\n", dinfo->obd_name); ofw_bus_gen_destroy_devinfo(dinfo); free(dinfo, M_SMU); continue; } device_set_ivars(cdev, dinfo); } } static int smuiic_probe(device_t dev) { const char *name; name = ofw_bus_get_name(dev); if (name == NULL) return (ENXIO); if (strcmp(name, "i2c-bus") == 0 || strcmp(name, "i2c") == 0) { device_set_desc(dev, "SMU I2C controller"); return (0); } return (ENXIO); } static int smuiic_attach(device_t dev) { struct smuiic_softc *sc = device_get_softc(dev); mtx_init(&sc->sc_mtx, "smuiic", NULL, MTX_DEF); sc->sc_iic_inuse = 0; /* Get our bus number */ OF_getprop(ofw_bus_get_node(dev), "reg", &sc->sc_busno, sizeof(sc->sc_busno)); /* Add the IIC bus layer */ device_add_child(dev, "iicbus", -1); return (bus_generic_attach(dev)); } static int smuiic_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs) { struct smuiic_softc *sc = device_get_softc(dev); struct smu_cmd cmd; int i, j, error; mtx_lock(&sc->sc_mtx); while (sc->sc_iic_inuse) mtx_sleep(sc, &sc->sc_mtx, 0, "smuiic", 100); sc->sc_iic_inuse = 1; error = 0; for (i = 0; i < nmsgs; i++) { cmd.cmd = SMU_I2C; cmd.data[0] = sc->sc_busno; if (msgs[i].flags & IIC_M_NOSTOP) cmd.data[1] = SMU_I2C_COMBINED; else cmd.data[1] = SMU_I2C_SIMPLE; cmd.data[2] = msgs[i].slave; if (msgs[i].flags & IIC_M_RD) cmd.data[2] |= 1; if (msgs[i].flags & IIC_M_NOSTOP) { KASSERT(msgs[i].len < 4, ("oversize I2C combined message")); cmd.data[3] = min(msgs[i].len, 3); memcpy(&cmd.data[4], msgs[i].buf, min(msgs[i].len, 3)); i++; /* Advance to next part of message */ } else { cmd.data[3] = 0; memset(&cmd.data[4], 0, 3); } cmd.data[7] = msgs[i].slave; if (msgs[i].flags & IIC_M_RD) cmd.data[7] |= 1; cmd.data[8] = msgs[i].len; if (msgs[i].flags & IIC_M_RD) { memset(&cmd.data[9], 0xff, msgs[i].len); cmd.len = 9; } else { memcpy(&cmd.data[9], msgs[i].buf, msgs[i].len); cmd.len = 9 + msgs[i].len; } mtx_unlock(&sc->sc_mtx); smu_run_cmd(device_get_parent(dev), &cmd, 1); mtx_lock(&sc->sc_mtx); for (j = 0; j < 10; j++) { cmd.cmd = SMU_I2C; cmd.len = 1; cmd.data[0] = 0; memset(&cmd.data[1], 0xff, msgs[i].len); mtx_unlock(&sc->sc_mtx); smu_run_cmd(device_get_parent(dev), &cmd, 1); mtx_lock(&sc->sc_mtx); if (!(cmd.data[0] & 0x80)) break; mtx_sleep(sc, &sc->sc_mtx, 0, "smuiic", 10); } if (cmd.data[0] & 0x80) { error = EIO; msgs[i].len = 0; goto exit; } memcpy(msgs[i].buf, &cmd.data[1], msgs[i].len); msgs[i].len = cmd.len - 1; } exit: sc->sc_iic_inuse = 0; mtx_unlock(&sc->sc_mtx); wakeup(sc); return (error); } static phandle_t smuiic_get_node(device_t bus, device_t dev) { return (ofw_bus_get_node(bus)); } Index: stable/9/sys/powerpc/powerpc/busdma_machdep.c =================================================================== --- stable/9/sys/powerpc/powerpc/busdma_machdep.c (revision 273911) +++ stable/9/sys/powerpc/powerpc/busdma_machdep.c (revision 273912) @@ -1,1186 +1,1186 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * From amd64/busdma_machdep.c, r204214 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iommu_if.h" #define MAX_BPAGES MIN(8192, physmem/40) struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_size_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; device_t iommu; void *iommu_cookie; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ bus_addr_t dataaddr; /* client physical address */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dma_segment_t *segments; int nsegs; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; int contigalloc; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (dmat->filter == NULL && dmat->iommu == NULL && paddr > dmat->lowaddr && paddr <= dmat->highaddr) retval = 1; if (dmat->filter == NULL && (paddr & (dmat->alignment - 1)) != 0) retval = 1; if (dmat->filter != NULL && (*dmat->filter)(dmat->filterarg, paddr) != 0) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_size_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; if (maxsegsz == 0) { return (EINVAL); } /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); newtag->iommu = parent->iommu; newtag->iommu_cookie = parent->iommu_cookie; } if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) && newtag->iommu == NULL) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } if (error != 0) { free(newtag, M_DEVBUF); } else { *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { int error; error = 0; *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, M_NOWAIT | M_ZERO); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) return (error); } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } (*mapp)->nsegs = 0; (*mapp)->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if ((*mapp)->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (dmat->flags & BUS_DMA_COULD_BOUNCE) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; } free(map->segments, M_DEVBUF); free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; bus_dmamap_create(dmat, flags, mapp); if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; #ifdef NOTYET if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else #endif attr = VM_MEMATTR_DEFAULT; /* * XXX: * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact * alignment guarantees of malloc need to be nailed down, and the * code below should be rewritten to take that into account. * * In the meantime, we'll warn the user if malloc gets it wrong. */ if ((dmat->maxsize <= PAGE_SIZE) && (dmat->alignment < dmat->maxsize) && dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); } else { /* * XXX Use Contigmalloc until it is merged into this facility * and handles multi-seg allocations. Nobody is doing * multi-seg allocations yet though. * XXX Certain AGP hardware does. */ *vaddr = (void *)kmem_alloc_contig(kernel_map, dmat->maxsize, mflags, 0ul, dmat->lowaddr, dmat->alignment ? dmat->alignment : 1ul, dmat->boundary, attr); (*mapp)->contigalloc = 1; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { if (!map->contigalloc) free(vaddr, M_DEVBUF); else kmem_free(kernel_map, (vm_offset_t)vaddr, dmat->maxsize); bus_dmamap_destroy(dmat, map); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { bus_size_t sg_len; sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) segs = map->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; vm_offset_t vaddr; int error; if (segs == NULL) segs = map->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } vaddr = (vm_offset_t)buf; while (buflen > 0) { bus_size_t max_sgsize; /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) curaddr = pmap_kextract(vaddr); else curaddr = pmap_extract(pmap, vaddr); /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->maxsegsz); sgsize = PAGE_SIZE - ((vm_offset_t)curaddr & PAGE_MASK); if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } void __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if (dmat->flags & BUS_DMA_COULD_BOUNCE) { map->dmat = dmat; map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { map->nsegs = nsegs; if (segs != NULL) memcpy(map->segments, segs, map->nsegs*sizeof(segs[0])); if (dmat->iommu != NULL) IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, dmat->lowaddr, dmat->highaddr, dmat->alignment, dmat->boundary, dmat->iommu_cookie); if (segs != NULL) memcpy(segs, map->segments, map->nsegs*sizeof(segs[0])); else segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; if (dmat->iommu) { IOMMU_UNMAP(dmat->iommu, map->segments, map->nsegs, dmat->iommu_cookie); map->nsegs = 0; } while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } } void _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { /* * Handle data bouncing. We might also * want to add support for invalidating * the caches on broken hardware */ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, op, dmat, dmat->flags); if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { if (bpage->datavaddr != 0) bcopy((void *)bpage->datavaddr, (void *)bpage->vaddr, bpage->datacount); else physcopyout(bpage->dataaddr, (void *)bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { if (bpage->datavaddr != 0) bcopy((void *)bpage->vaddr, (void *)bpage->datavaddr, bpage->datacount); else physcopyin((void *)bpage->vaddr, bpage->dataaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), + SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "alignment", CTLFLAG_RD, &bz->alignment, 0, ""); + "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= vaddr & PAGE_MASK; bpage->busaddr |= vaddr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->dataaddr = addr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } int bus_dma_tag_set_iommu(bus_dma_tag_t tag, struct device *iommu, void *cookie) { tag->iommu = iommu; tag->iommu_cookie = cookie; return (0); } Index: stable/9/sys/powerpc/powerpc/cpu.c =================================================================== --- stable/9/sys/powerpc/powerpc/cpu.c (revision 273911) +++ stable/9/sys/powerpc/powerpc/cpu.c (revision 273912) @@ -1,591 +1,591 @@ /*- * Copyright (c) 2001 Matt Thomas. * Copyright (c) 2001 Tsubai Masanari. * Copyright (c) 1998, 1999, 2001 Internet Research Institute, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by * Internet Research Institute, Inc. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2003 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from $NetBSD: cpu_subr.c,v 1.1 2003/02/03 17:10:09 matt Exp $ * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void cpu_6xx_setup(int cpuid, uint16_t vers); static void cpu_e500_setup(int cpuid, uint16_t vers); static void cpu_970_setup(int cpuid, uint16_t vers); int powerpc_pow_enabled; void (*cpu_idle_hook)(void) = NULL; static void cpu_idle_60x(void); static void cpu_idle_e500(void); struct cputab { const char *name; uint16_t version; uint16_t revfmt; int features; /* Do not include PPC_FEATURE_32 or * PPC_FEATURE_HAS_MMU */ void (*cpu_setup)(int cpuid, uint16_t vers); }; #define REVFMT_MAJMIN 1 /* %u.%u */ #define REVFMT_HEX 2 /* 0x%04x */ #define REVFMT_DEC 3 /* %u */ static const struct cputab models[] = { { "Motorola PowerPC 601", MPC601, REVFMT_DEC, PPC_FEATURE_HAS_FPU | PPC_FEATURE_UNIFIED_CACHE, cpu_6xx_setup }, { "Motorola PowerPC 602", MPC602, REVFMT_DEC, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 603", MPC603, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 603e", MPC603e, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 603ev", MPC603ev, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 604", MPC604, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 604ev", MPC604ev, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 620", MPC620, REVFMT_HEX, PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, NULL }, { "Motorola PowerPC 750", MPC750, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "IBM PowerPC 750FX", IBM750FX, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "IBM PowerPC 970", IBM970, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_970_setup }, { "IBM PowerPC 970FX", IBM970FX, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_970_setup }, { "IBM PowerPC 970GX", IBM970GX, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_970_setup }, { "IBM PowerPC 970MP", IBM970MP, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_970_setup }, { "Motorola PowerPC 7400", MPC7400, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 7410", MPC7410, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 7450", MPC7450, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 7455", MPC7455, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 7457", MPC7457, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 7447A", MPC7447A, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 7448", MPC7448, REVFMT_MAJMIN, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 8240", MPC8240, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Motorola PowerPC 8245", MPC8245, REVFMT_MAJMIN, PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, { "Freescale e500v1 core", FSL_E500v1, REVFMT_MAJMIN, 0, cpu_e500_setup }, { "Freescale e500v2 core", FSL_E500v2, REVFMT_MAJMIN, 0, cpu_e500_setup }, { "IBM Cell Broadband Engine", IBMCELLBE, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, NULL}, { "Unknown PowerPC CPU", 0, REVFMT_HEX, 0, NULL }, }; static void cpu_6xx_print_cacheinfo(u_int, uint16_t); static int cpu_feature_bit(SYSCTL_HANDLER_ARGS); static char model[64]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, model, 0, ""); int cpu_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU; -SYSCTL_OPAQUE(_hw, OID_AUTO, cpu_features, CTLTYPE_INT | CTLFLAG_RD, +SYSCTL_OPAQUE(_hw, OID_AUTO, cpu_features, CTLFLAG_RD, &cpu_features, sizeof(cpu_features), "IX", "PowerPC CPU features"); /* Provide some user-friendly aliases for bits in cpu_features */ SYSCTL_PROC(_hw, OID_AUTO, floatingpoint, CTLTYPE_INT | CTLFLAG_RD, 0, PPC_FEATURE_HAS_FPU, cpu_feature_bit, "I", "Floating point instructions executed in hardware"); SYSCTL_PROC(_hw, OID_AUTO, altivec, CTLTYPE_INT | CTLFLAG_RD, 0, PPC_FEATURE_HAS_ALTIVEC, cpu_feature_bit, "I", "CPU supports Altivec"); void cpu_setup(u_int cpuid) { u_int pvr, maj, min; uint16_t vers, rev, revfmt; uint64_t cps; const struct cputab *cp; const char *name; pvr = mfpvr(); vers = pvr >> 16; rev = pvr; switch (vers) { case MPC7410: min = (pvr >> 0) & 0xff; maj = min <= 4 ? 1 : 2; break; case FSL_E500v1: case FSL_E500v2: maj = (pvr >> 4) & 0xf; min = (pvr >> 0) & 0xf; break; default: maj = (pvr >> 8) & 0xf; min = (pvr >> 0) & 0xf; } for (cp = models; cp->version != 0; cp++) { if (cp->version == vers) break; } revfmt = cp->revfmt; name = cp->name; if (rev == MPC750 && pvr == 15) { name = "Motorola MPC755"; revfmt = REVFMT_HEX; } strncpy(model, name, sizeof(model) - 1); printf("cpu%d: %s revision ", cpuid, name); switch (revfmt) { case REVFMT_MAJMIN: printf("%u.%u", maj, min); break; case REVFMT_HEX: printf("0x%04x", rev); break; case REVFMT_DEC: printf("%u", rev); break; } if (cpu_est_clockrate(0, &cps) == 0) printf(", %jd.%02jd MHz", cps / 1000000, (cps / 10000) % 100); printf("\n"); cpu_features |= cp->features; printf("cpu%d: Features %b\n", cpuid, cpu_features, PPC_FEATURE_BITMASK); /* * Configure CPU */ if (cp->cpu_setup != NULL) cp->cpu_setup(cpuid, vers); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *cps) { uint16_t vers; register_t msr; vers = mfpvr() >> 16; msr = mfmsr(); mtmsr(msr & ~PSL_EE); switch (vers) { case MPC7450: case MPC7455: case MPC7457: case MPC750: case IBM750FX: case MPC7400: case MPC7410: case MPC7447A: case MPC7448: mtspr(SPR_MMCR0, SPR_MMCR0_FC); mtspr(SPR_PMC1, 0); mtspr(SPR_MMCR0, SPR_MMCR0_PMC1SEL(PMCN_CYCLES)); DELAY(1000); *cps = (mfspr(SPR_PMC1) * 1000) + 4999; mtspr(SPR_MMCR0, SPR_MMCR0_FC); mtmsr(msr); return (0); case IBM970: case IBM970FX: case IBM970MP: isync(); mtspr(SPR_970MMCR0, SPR_MMCR0_FC); isync(); mtspr(SPR_970MMCR1, 0); mtspr(SPR_970MMCRA, 0); mtspr(SPR_970PMC1, 0); mtspr(SPR_970MMCR0, SPR_970MMCR0_PMC1SEL(PMC970N_CYCLES)); isync(); DELAY(1000); powerpc_sync(); mtspr(SPR_970MMCR0, SPR_MMCR0_FC); *cps = (mfspr(SPR_970PMC1) * 1000) + 4999; mtmsr(msr); return (0); } return (ENXIO); } void cpu_6xx_setup(int cpuid, uint16_t vers) { register_t hid0, pvr; const char *bitmask; hid0 = mfspr(SPR_HID0); pvr = mfpvr(); /* * Configure power-saving mode. */ switch (vers) { case MPC603: case MPC603e: case MPC603ev: case MPC604ev: case MPC750: case IBM750FX: case MPC7400: case MPC7410: case MPC8240: case MPC8245: /* Select DOZE mode. */ hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP); hid0 |= HID0_DOZE | HID0_DPM; powerpc_pow_enabled = 1; break; case MPC7448: case MPC7447A: case MPC7457: case MPC7455: case MPC7450: /* Enable the 7450 branch caches */ hid0 |= HID0_SGE | HID0_BTIC; hid0 |= HID0_LRSTK | HID0_FOLD | HID0_BHT; /* Disable BTIC on 7450 Rev 2.0 or earlier and on 7457 */ if (((pvr >> 16) == MPC7450 && (pvr & 0xFFFF) <= 0x0200) || (pvr >> 16) == MPC7457) hid0 &= ~HID0_BTIC; /* Select NAP mode. */ hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP); hid0 |= HID0_NAP | HID0_DPM; powerpc_pow_enabled = 1; break; default: /* No power-saving mode is available. */ ; } switch (vers) { case IBM750FX: case MPC750: hid0 &= ~HID0_DBP; /* XXX correct? */ hid0 |= HID0_EMCP | HID0_BTIC | HID0_SGE | HID0_BHT; break; case MPC7400: case MPC7410: hid0 &= ~HID0_SPD; hid0 |= HID0_EMCP | HID0_BTIC | HID0_SGE | HID0_BHT; hid0 |= HID0_EIEC; break; } mtspr(SPR_HID0, hid0); if (bootverbose) cpu_6xx_print_cacheinfo(cpuid, vers); switch (vers) { case MPC7447A: case MPC7448: case MPC7450: case MPC7455: case MPC7457: bitmask = HID0_7450_BITMASK; break; default: bitmask = HID0_BITMASK; break; } printf("cpu%d: HID0 %b\n", cpuid, (int)hid0, bitmask); if (cpu_idle_hook == NULL) cpu_idle_hook = cpu_idle_60x; } static void cpu_6xx_print_cacheinfo(u_int cpuid, uint16_t vers) { register_t hid; hid = mfspr(SPR_HID0); printf("cpu%u: ", cpuid); printf("L1 I-cache %sabled, ", (hid & HID0_ICE) ? "en" : "dis"); printf("L1 D-cache %sabled\n", (hid & HID0_DCE) ? "en" : "dis"); printf("cpu%u: ", cpuid); if (mfspr(SPR_L2CR) & L2CR_L2E) { switch (vers) { case MPC7450: case MPC7455: case MPC7457: printf("256KB L2 cache, "); if (mfspr(SPR_L3CR) & L3CR_L3E) printf("%cMB L3 backside cache", mfspr(SPR_L3CR) & L3CR_L3SIZ ? '2' : '1'); else printf("L3 cache disabled"); printf("\n"); break; case IBM750FX: printf("512KB L2 cache\n"); break; default: switch (mfspr(SPR_L2CR) & L2CR_L2SIZ) { case L2SIZ_256K: printf("256KB "); break; case L2SIZ_512K: printf("512KB "); break; case L2SIZ_1M: printf("1MB "); break; } printf("write-%s", (mfspr(SPR_L2CR) & L2CR_L2WT) ? "through" : "back"); if (mfspr(SPR_L2CR) & L2CR_L2PE) printf(", with parity"); printf(" backside cache\n"); break; } } else printf("L2 cache disabled\n"); } static void cpu_e500_setup(int cpuid, uint16_t vers) { register_t hid0; hid0 = mfspr(SPR_HID0); /* Programe power-management mode. */ hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP); hid0 |= HID0_DOZE; mtspr(SPR_HID0, hid0); printf("cpu%d: HID0 %b\n", cpuid, (int)hid0, HID0_E500_BITMASK); if (cpu_idle_hook == NULL) cpu_idle_hook = cpu_idle_e500; } static void cpu_970_setup(int cpuid, uint16_t vers) { #ifdef AIM uint32_t hid0_hi, hid0_lo; __asm __volatile ("mfspr %0,%2; clrldi %1,%0,32; srdi %0,%0,32;" : "=r" (hid0_hi), "=r" (hid0_lo) : "K" (SPR_HID0)); /* Configure power-saving mode */ switch (vers) { case IBM970MP: hid0_hi |= (HID0_DEEPNAP | HID0_NAP | HID0_DPM); hid0_hi &= ~HID0_DOZE; break; default: hid0_hi |= (HID0_NAP | HID0_DPM); hid0_hi &= ~(HID0_DOZE | HID0_DEEPNAP); break; } powerpc_pow_enabled = 1; __asm __volatile (" \ sync; isync; \ sldi %0,%0,32; or %0,%0,%1; \ mtspr %2, %0; \ mfspr %0, %2; mfspr %0, %2; mfspr %0, %2; \ mfspr %0, %2; mfspr %0, %2; mfspr %0, %2; \ sync; isync" :: "r" (hid0_hi), "r"(hid0_lo), "K" (SPR_HID0)); __asm __volatile ("mfspr %0,%1; srdi %0,%0,32;" : "=r" (hid0_hi) : "K" (SPR_HID0)); printf("cpu%d: HID0 %b\n", cpuid, (int)(hid0_hi), HID0_970_BITMASK); #endif cpu_idle_hook = cpu_idle_60x; } static int cpu_feature_bit(SYSCTL_HANDLER_ARGS) { int result; result = (cpu_features & arg2) ? 1 : 0; return (sysctl_handle_int(oidp, &result, 0, req)); } void cpu_idle(int busy) { #ifdef INVARIANTS if ((mfmsr() & PSL_EE) != PSL_EE) { struct thread *td = curthread; printf("td msr %#lx\n", (u_long)td->td_md.md_saved_msr); panic("ints disabled in idleproc!"); } #endif CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); if (cpu_idle_hook != NULL) { if (!busy) { critical_enter(); cpu_idleclock(); } cpu_idle_hook(); if (!busy) { cpu_activeclock(); critical_exit(); } } CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { return (0); } static void cpu_idle_60x(void) { register_t msr; uint16_t vers; if (!powerpc_pow_enabled) return; msr = mfmsr(); vers = mfpvr() >> 16; #ifdef AIM switch (vers) { case IBM970: case IBM970FX: case IBM970MP: case MPC7447A: case MPC7448: case MPC7450: case MPC7455: case MPC7457: __asm __volatile("\ dssall; sync; mtmsr %0; isync" :: "r"(msr | PSL_POW)); break; default: powerpc_sync(); mtmsr(msr | PSL_POW); isync(); break; } #endif } static void cpu_idle_e500(void) { register_t msr; msr = mfmsr(); #ifdef E500 /* Freescale E500 core RM section 6.4.1. */ __asm __volatile("msync; mtmsr %0; isync" :: "r" (msr | PSL_WE)); #endif } Index: stable/9/sys/sys/sysctl.h =================================================================== --- stable/9/sys/sys/sysctl.h (revision 273911) +++ stable/9/sys/sys/sysctl.h (revision 273912) @@ -1,807 +1,879 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sysctl.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _SYS_SYSCTL_H_ #define _SYS_SYSCTL_H_ #include struct thread; /* * Definitions for sysctl call. The sysctl call uses a hierarchical name * for objects that can be examined or modified. The name is expressed as * a sequence of integers. Like a file path name, the meaning of each * component depends on its place in the hierarchy. The top-level and kern * identifiers are defined here, and other identifiers are defined in the * respective subsystem header files. */ #define CTL_MAXNAME 24 /* largest number of components supported */ /* * Each subsystem defined by sysctl defines a list of variables * for that subsystem. Each name is either a node with further * levels defined below it, or it is a leaf of some particular * type given below. Each sysctl level defines a set of name/type * pairs to be used by sysctl(8) in manipulating the subsystem. */ struct ctlname { char *ctl_name; /* subsystem name */ int ctl_type; /* type of name */ }; #define CTLTYPE 0xf /* Mask for the type */ #define CTLTYPE_NODE 1 /* name is a node */ #define CTLTYPE_INT 2 /* name describes an integer */ #define CTLTYPE_STRING 3 /* name describes a string */ #define CTLTYPE_S64 4 /* name describes a signed 64-bit number */ #define CTLTYPE_OPAQUE 5 /* name describes a structure */ #define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */ #define CTLTYPE_UINT 6 /* name describes an unsigned integer */ #define CTLTYPE_LONG 7 /* name describes a long */ #define CTLTYPE_ULONG 8 /* name describes an unsigned long */ #define CTLTYPE_U64 9 /* name describes an unsigned 64-bit number */ #define CTLFLAG_RD 0x80000000 /* Allow reads of variable */ #define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */ #define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR) #define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */ #define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */ #define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */ #define CTLFLAG_DYN 0x02000000 /* Dynamic oid - can be freed */ #define CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */ #define CTLMASK_SECURE 0x00F00000 /* Secure level */ #define CTLFLAG_TUN 0x00080000 /* Tunable variable */ #define CTLFLAG_RDTUN (CTLFLAG_RD|CTLFLAG_TUN) #define CTLFLAG_RWTUN (CTLFLAG_RW|CTLFLAG_TUN) #define CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */ #define CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */ #define CTLFLAG_DYING 0x00010000 /* oid is being removed */ #define CTLFLAG_CAPRD 0x00008000 /* Can be read in capability mode */ #define CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */ #define CTLFLAG_CAPRW (CTLFLAG_CAPRD|CTLFLAG_CAPWR) /* * Secure level. Note that CTLFLAG_SECURE == CTLFLAG_SECURE1. * * Secure when the securelevel is raised to at least N. */ #define CTLSHIFT_SECURE 20 #define CTLFLAG_SECURE1 (CTLFLAG_SECURE | (0 << CTLSHIFT_SECURE)) #define CTLFLAG_SECURE2 (CTLFLAG_SECURE | (1 << CTLSHIFT_SECURE)) #define CTLFLAG_SECURE3 (CTLFLAG_SECURE | (2 << CTLSHIFT_SECURE)) /* * USE THIS instead of a hardwired number from the categories below * to get dynamically assigned sysctl entries using the linker-set * technology. This is the way nearly all new sysctl variables should * be implemented. * e.g. SYSCTL_INT(_parent, OID_AUTO, name, CTLFLAG_RW, &variable, 0, ""); */ #define OID_AUTO (-1) /* * The starting number for dynamically-assigned entries. WARNING! * ALL static sysctl entries should have numbers LESS than this! */ #define CTL_AUTO_START 0x100 #ifdef _KERNEL #include -#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \ +#ifdef KLD_MODULE +/* XXX allow overspecification of type in external kernel modules */ +#define SYSCTL_CT_ASSERT_MASK CTLTYPE +#else +#define SYSCTL_CT_ASSERT_MASK 0 +#endif + +#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \ intptr_t arg2, struct sysctl_req *req /* definitions for sysctl_req 'lock' member */ #define REQ_UNWIRED 1 #define REQ_WIRED 2 /* definitions for sysctl_req 'flags' member */ #if defined(__amd64__) || defined(__ia64__) || defined(__powerpc64__) #define SCTL_MASK32 1 /* 32 bit emulation */ #endif /* * This describes the access space for a sysctl request. This is needed * so that we can use the interface from the kernel or from user-space. */ struct sysctl_req { struct thread *td; /* used for access checking */ int lock; /* wiring state */ void *oldptr; size_t oldlen; size_t oldidx; int (*oldfunc)(struct sysctl_req *, const void *, size_t); void *newptr; size_t newlen; size_t newidx; int (*newfunc)(struct sysctl_req *, void *, size_t); size_t validlen; int flags; }; SLIST_HEAD(sysctl_oid_list, sysctl_oid); /* * This describes one "oid" in the MIB tree. Potentially more nodes can * be hidden behind it, expanded by the handler. */ struct sysctl_oid { struct sysctl_oid_list *oid_parent; SLIST_ENTRY(sysctl_oid) oid_link; int oid_number; u_int oid_kind; void *oid_arg1; intptr_t oid_arg2; const char *oid_name; int (*oid_handler)(SYSCTL_HANDLER_ARGS); const char *oid_fmt; int oid_refcnt; u_int oid_running; const char *oid_descr; }; #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l) #define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l) int sysctl_handle_int(SYSCTL_HANDLER_ARGS); int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS); int sysctl_handle_long(SYSCTL_HANDLER_ARGS); int sysctl_handle_64(SYSCTL_HANDLER_ARGS); int sysctl_handle_string(SYSCTL_HANDLER_ARGS); int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS); /* * These functions are used to add/remove an oid from the mib. */ void sysctl_register_oid(struct sysctl_oid *oidp); void sysctl_unregister_oid(struct sysctl_oid *oidp); /* Declare a static oid to allow child oids to be added to it. */ #define SYSCTL_DECL(name) \ extern struct sysctl_oid_list sysctl_##name##_children /* Hide these in macros */ #define SYSCTL_CHILDREN(oid_ptr) (struct sysctl_oid_list *) \ (oid_ptr)->oid_arg1 #define SYSCTL_CHILDREN_SET(oid_ptr, val) \ (oid_ptr)->oid_arg1 = (val); #define SYSCTL_STATIC_CHILDREN(oid_name) \ (&sysctl_##oid_name##_children) /* === Structs and macros related to context handling === */ /* All dynamically created sysctls can be tracked in a context list. */ struct sysctl_ctx_entry { struct sysctl_oid *entry; TAILQ_ENTRY(sysctl_ctx_entry) link; }; TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #define SYSCTL_NODE_CHILDREN(parent, name) \ sysctl_##parent##_##name##_children -/* - * These macros provide type safety for sysctls. SYSCTL_ALLOWED_TYPES() - * defines a transparent union of the allowed types. SYSCTL_ASSERT_TYPE() - * and SYSCTL_ADD_ASSERT_TYPE() use the transparent union to assert that - * the pointer matches the allowed types. - * - * The allow_0 member allows a literal 0 to be passed for ptr. - */ -#define SYSCTL_ALLOWED_TYPES(type, decls) \ - union sysctl_##type { \ - long allow_0; \ - decls \ - } __attribute__((__transparent_union__)); \ - \ - static inline void * \ - __sysctl_assert_##type(union sysctl_##type ptr) \ - { \ - return (ptr.a); \ - } \ - struct __hack - -SYSCTL_ALLOWED_TYPES(INT, int *a; ); -SYSCTL_ALLOWED_TYPES(UINT, unsigned int *a; ); -SYSCTL_ALLOWED_TYPES(LONG, long *a; ); -SYSCTL_ALLOWED_TYPES(ULONG, unsigned long *a; ); -SYSCTL_ALLOWED_TYPES(INT64, int64_t *a; long long *b; ); -SYSCTL_ALLOWED_TYPES(UINT64, uint64_t *a; unsigned long long *b; ); - -#ifdef notyet -#define SYSCTL_ADD_ASSERT_TYPE(type, ptr) \ - __sysctl_assert_ ## type (ptr) -#define SYSCTL_ASSERT_TYPE(type, ptr, parent, name) \ - _SYSCTL_ASSERT_TYPE(type, ptr, __LINE__, parent##_##name) -#else -#define SYSCTL_ADD_ASSERT_TYPE(type, ptr) ptr -#define SYSCTL_ASSERT_TYPE(type, ptr, parent, name) -#endif -#define _SYSCTL_ASSERT_TYPE(t, p, l, id) \ - __SYSCTL_ASSERT_TYPE(t, p, l, id) -#define __SYSCTL_ASSERT_TYPE(type, ptr, line, id) \ - static inline void \ - sysctl_assert_##line##_##id(void) \ - { \ - (void)__sysctl_assert_##type(ptr); \ - } \ - struct __hack - #ifndef NO_SYSCTL_DESCR #define __DESCR(d) d #else #define __DESCR(d) "" #endif /* This constructs a "raw" MIB oid. */ #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ static struct sysctl_oid sysctl__##parent##_##name = { \ &sysctl_##parent##_children, { NULL }, nbr, kind, \ a1, a2, #name, handler, fmt, 0, 0, __DESCR(descr) }; \ DATA_SET(sysctl_set, sysctl__##parent##_##name) #define SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr)) +/* This constructs a root node from which other nodes can hang. */ +#define SYSCTL_ROOT_NODE(nbr, name, access, handler, descr) \ + SYSCTL_NODE(, nbr, name, access, handler, descr) + /* This constructs a node from which other oids can hang. */ #define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \ struct sysctl_oid_list SYSCTL_NODE_CHILDREN(parent, name); \ SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|(access), \ - (void*)&SYSCTL_NODE_CHILDREN(parent, name), 0, handler, "N", descr) + (void*)&SYSCTL_NODE_CHILDREN(parent, name), 0, handler, "N", descr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE) -#define SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_NODE|(access), \ - NULL, 0, handler, "N", __DESCR(descr)) +#define SYSCTL_ADD_ROOT_NODE(ctx, nbr, name, access, handler, descr) \ + SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(), nbr, name, access, handler, descr) +#define SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr) \ +({ \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_NODE); \ + sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_NODE|(access), \ + NULL, 0, handler, "N", __DESCR(descr)); \ +}) + /* Oid for a string. len can be 0 to indicate '\0' termination. */ -#define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \ - arg, len, sysctl_handle_string, "A", descr) +#define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \ + SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \ + arg, len, sysctl_handle_string, "A", descr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING) -#define SYSCTL_ADD_STRING(ctx, parent, nbr, name, access, arg, len, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access), \ - arg, len, sysctl_handle_string, "A", __DESCR(descr)) +#define SYSCTL_ADD_STRING(ctx, parent, nbr, name, access, arg, len, descr) \ +({ \ + char *__arg = (arg); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_STRING); \ + sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access), \ + __arg, len, sysctl_handle_string, "A", __DESCR(descr)); \ +}) -/* Oid for an int. If ptr is NULL, val is returned. */ -#define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_ASSERT_TYPE(INT, ptr, parent, name); \ - SYSCTL_OID(parent, nbr, name, \ - CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \ - ptr, val, sysctl_handle_int, "I", descr) +/* Oid for an int. If ptr is SYSCTL_NULL_INT_PTR, val is returned. */ +#define SYSCTL_NULL_INT_PTR ((int *)NULL) +#define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_int, "I", descr); \ + CTASSERT((((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT) && \ + sizeof(int) == sizeof(*(ptr))) #define SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr) \ +({ \ + int *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_INT); \ sysctl_add_oid(ctx, parent, nbr, name, \ CTLTYPE_INT | CTLFLAG_MPSAFE | (access), \ - SYSCTL_ADD_ASSERT_TYPE(INT, ptr), val, \ - sysctl_handle_int, "I", __DESCR(descr)) + __ptr, val, sysctl_handle_int, "I", __DESCR(descr)); \ +}) /* Oid for an unsigned int. If ptr is NULL, val is returned. */ -#define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_ASSERT_TYPE(UINT, ptr, parent, name); \ - SYSCTL_OID(parent, nbr, name, \ - CTLTYPE_UINT | CTLFLAG_MPSAFE | (access), \ - ptr, val, sysctl_handle_int, "IU", descr) +#define SYSCTL_NULL_UINT_PTR ((unsigned *)NULL) +#define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_UINT | CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_int, "IU", descr); \ + CTASSERT((((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_UINT) && \ + sizeof(unsigned) == sizeof(*(ptr))) #define SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \ +({ \ + unsigned *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_UINT); \ sysctl_add_oid(ctx, parent, nbr, name, \ CTLTYPE_UINT | CTLFLAG_MPSAFE | (access), \ - SYSCTL_ADD_ASSERT_TYPE(UINT, ptr), val, \ - sysctl_handle_int, "IU", __DESCR(descr)) + __ptr, val, sysctl_handle_int, "IU", __DESCR(descr)); \ +}) /* Oid for a long. The pointer must be non NULL. */ -#define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_ASSERT_TYPE(LONG, ptr, parent, name); \ - SYSCTL_OID(parent, nbr, name, \ - CTLTYPE_LONG | CTLFLAG_MPSAFE | (access), \ - ptr, val, sysctl_handle_long, "L", descr) +#define SYSCTL_NULL_LONG_PTR ((long *)NULL) +#define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_LONG | CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_long, "L", descr); \ + CTASSERT((((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_LONG) && \ + sizeof(long) == sizeof(*(ptr))) #define SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + long *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_LONG); \ sysctl_add_oid(ctx, parent, nbr, name, \ CTLTYPE_LONG | CTLFLAG_MPSAFE | (access), \ - SYSCTL_ADD_ASSERT_TYPE(LONG, ptr), 0, \ - sysctl_handle_long, "L", __DESCR(descr)) + __ptr, 0, sysctl_handle_long, "L", __DESCR(descr)); \ +}) /* Oid for an unsigned long. The pointer must be non NULL. */ +#define SYSCTL_NULL_ULONG_PTR ((unsigned long *)NULL) #define SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_ASSERT_TYPE(ULONG, ptr, parent, name); \ SYSCTL_OID(parent, nbr, name, \ CTLTYPE_ULONG | CTLFLAG_MPSAFE | (access), \ - ptr, val, sysctl_handle_long, "LU", descr) + ptr, val, sysctl_handle_long, "LU", descr); \ + CTASSERT((((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_ULONG) && \ + sizeof(unsigned long) == sizeof(*(ptr))) #define SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + unsigned long *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_ULONG); \ sysctl_add_oid(ctx, parent, nbr, name, \ CTLTYPE_ULONG | CTLFLAG_MPSAFE | (access), \ - SYSCTL_ADD_ASSERT_TYPE(ULONG, ptr), 0, \ - sysctl_handle_long, "LU", __DESCR(descr)) + __ptr, 0, sysctl_handle_long, "LU", __DESCR(descr)); \ +}) /* Oid for a quad. The pointer must be non NULL. */ -#define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_ASSERT_TYPE(INT64, ptr, parent, name); \ - SYSCTL_OID(parent, nbr, name, \ - CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \ - ptr, val, sysctl_handle_64, "Q", descr) +#define SYSCTL_NULL_QUAD_PTR ((int64_t *)NULL) +#define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \ + ptr, val, sysctl_handle_64, "Q", descr); \ + CTASSERT((((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64) && \ + sizeof(int64_t) == sizeof(*(ptr))) #define SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + int64_t *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64); \ sysctl_add_oid(ctx, parent, nbr, name, \ CTLTYPE_S64 | CTLFLAG_MPSAFE | (access), \ - SYSCTL_ADD_ASSERT_TYPE(INT64, ptr), 0, \ - sysctl_handle_64, "Q", __DESCR(descr)) + __ptr, 0, sysctl_handle_64, "Q", __DESCR(descr)); \ +}) +#define SYSCTL_NULL_UQUAD_PTR ((uint64_t *)NULL) #define SYSCTL_UQUAD(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_ASSERT_TYPE(UINT64, ptr, parent, name); \ SYSCTL_OID(parent, nbr, name, \ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \ - ptr, val, sysctl_handle_64, "QU", descr) + ptr, val, sysctl_handle_64, "QU", descr); \ + CTASSERT((((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64) && \ + sizeof(uint64_t) == sizeof(*(ptr))) #define SYSCTL_ADD_UQUAD(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + uint64_t *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_U64); \ sysctl_add_oid(ctx, parent, nbr, name, \ CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \ + __ptr, 0, sysctl_handle_64, "QU", __DESCR(descr)); \ +}) + +/* Oid for a 64-bit unsigned counter(9). The pointer must be non NULL. */ +#define SYSCTL_COUNTER_U64(parent, nbr, name, access, ptr, descr) \ + SYSCTL_ASSERT_TYPE(UINT64, ptr, parent, name); \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \ + ptr, 0, sysctl_handle_counter_u64, "QU", descr) + +#define SYSCTL_ADD_COUNTER_U64(ctx, parent, nbr, name, access, ptr, descr)\ + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \ SYSCTL_ADD_ASSERT_TYPE(UINT64, ptr), 0, \ - sysctl_handle_64, "QU", __DESCR(descr)) + sysctl_handle_counter_u64, "QU", __DESCR(descr)) +/* Oid for a CPU dependant variable */ +#define SYSCTL_ADD_UAUTO(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + struct sysctl_oid *__ret; \ + CTASSERT((sizeof(uint64_t) == sizeof(*(ptr)) || \ + sizeof(unsigned) == sizeof(*(ptr))) && \ + ((access) & CTLTYPE) == 0); \ + if (sizeof(uint64_t) == sizeof(*(ptr))) { \ + __ret = sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_U64 | CTLFLAG_MPSAFE | (access), \ + (ptr), 0, sysctl_handle_64, "QU", \ + __DESCR(descr)); \ + } else { \ + __ret = sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_UINT | CTLFLAG_MPSAFE | (access), \ + (ptr), 0, sysctl_handle_int, "IU", \ + __DESCR(descr)); \ + } \ + __ret; \ +}) + /* Oid for an opaque object. Specified by a pointer and a length. */ -#define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \ - ptr, len, sysctl_handle_opaque, fmt, descr) +#define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ + SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \ + ptr, len, sysctl_handle_opaque, fmt, descr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE) -#define SYSCTL_ADD_OPAQUE(ctx, parent, nbr, name, access, ptr, len, fmt, descr)\ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access), \ - ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr)) +#define SYSCTL_ADD_OPAQUE(ctx, parent, nbr, name, access, ptr, len, fmt, descr) \ +({ \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE); \ + sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access), \ + ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr)); \ +}) /* Oid for a struct. Specified by a pointer and a type. */ -#define SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \ - ptr, sizeof(struct type), sysctl_handle_opaque, \ - "S," #type, descr) +#define SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \ + SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \ + ptr, sizeof(struct type), sysctl_handle_opaque, \ + "S," #type, descr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE) #define SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access), \ - ptr, sizeof(struct type), sysctl_handle_opaque, "S," #type, __DESCR(descr)) +({ \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_OPAQUE); \ + sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access), \ + (ptr), sizeof(struct type), \ + sysctl_handle_opaque, "S," #type, __DESCR(descr)); \ +}) /* Oid for a procedure. Specified by a pointer and an arg. */ #define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ - CTASSERT(((access) & CTLTYPE) != 0); \ - SYSCTL_OID(parent, nbr, name, (access), \ - ptr, arg, handler, fmt, descr) + SYSCTL_OID(parent, nbr, name, (access), \ + ptr, arg, handler, fmt, descr); \ + CTASSERT(((access) & CTLTYPE) != 0) #define SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ - sysctl_add_oid(ctx, parent, nbr, name, (access), \ - ptr, arg, handler, fmt, __DESCR(descr)) +({ \ + CTASSERT(((access) & CTLTYPE) != 0); \ + sysctl_add_oid(ctx, parent, nbr, name, (access), \ + (ptr), (arg), (handler), (fmt), __DESCR(descr)); \ +}) /* * A macro to generate a read-only sysctl to indicate the presense of optional * kernel features. */ #define FEATURE(name, desc) \ SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_CAPRD, \ - 0, 1, desc) + SYSCTL_NULL_INT_PTR, 1, desc) #endif /* _KERNEL */ /* * Top-level identifiers */ #define CTL_UNSPEC 0 /* unused */ #define CTL_KERN 1 /* "high kernel": proc, limits */ #define CTL_VM 2 /* virtual memory */ #define CTL_VFS 3 /* filesystem, mount type is next */ #define CTL_NET 4 /* network, see socket.h */ #define CTL_DEBUG 5 /* debugging parameters */ #define CTL_HW 6 /* generic cpu/io */ #define CTL_MACHDEP 7 /* machine dependent */ #define CTL_USER 8 /* user-level */ #define CTL_P1003_1B 9 /* POSIX 1003.1B */ #define CTL_MAXID 10 /* number of valid top-level ids */ #define CTL_NAMES { \ { 0, 0 }, \ { "kern", CTLTYPE_NODE }, \ { "vm", CTLTYPE_NODE }, \ { "vfs", CTLTYPE_NODE }, \ { "net", CTLTYPE_NODE }, \ { "debug", CTLTYPE_NODE }, \ { "hw", CTLTYPE_NODE }, \ { "machdep", CTLTYPE_NODE }, \ { "user", CTLTYPE_NODE }, \ { "p1003_1b", CTLTYPE_NODE }, \ } /* * CTL_KERN identifiers */ #define KERN_OSTYPE 1 /* string: system version */ #define KERN_OSRELEASE 2 /* string: system release */ #define KERN_OSREV 3 /* int: system revision */ #define KERN_VERSION 4 /* string: compile time info */ #define KERN_MAXVNODES 5 /* int: max vnodes */ #define KERN_MAXPROC 6 /* int: max processes */ #define KERN_MAXFILES 7 /* int: max open files */ #define KERN_ARGMAX 8 /* int: max arguments to exec */ #define KERN_SECURELVL 9 /* int: system security level */ #define KERN_HOSTNAME 10 /* string: hostname */ #define KERN_HOSTID 11 /* int: host identifier */ #define KERN_CLOCKRATE 12 /* struct: struct clockrate */ #define KERN_VNODE 13 /* struct: vnode structures */ #define KERN_PROC 14 /* struct: process entries */ #define KERN_FILE 15 /* struct: file entries */ #define KERN_PROF 16 /* node: kernel profiling info */ #define KERN_POSIX1 17 /* int: POSIX.1 version */ #define KERN_NGROUPS 18 /* int: # of supplemental group ids */ #define KERN_JOB_CONTROL 19 /* int: is job control available */ #define KERN_SAVED_IDS 20 /* int: saved set-user/group-ID */ #define KERN_BOOTTIME 21 /* struct: time kernel was booted */ #define KERN_NISDOMAINNAME 22 /* string: YP domain name */ #define KERN_UPDATEINTERVAL 23 /* int: update process sleep time */ #define KERN_OSRELDATE 24 /* int: kernel release date */ #define KERN_NTP_PLL 25 /* node: NTP PLL control */ #define KERN_BOOTFILE 26 /* string: name of booted kernel */ #define KERN_MAXFILESPERPROC 27 /* int: max open files per proc */ #define KERN_MAXPROCPERUID 28 /* int: max processes per uid */ #define KERN_DUMPDEV 29 /* struct cdev *: device to dump on */ #define KERN_IPC 30 /* node: anything related to IPC */ #define KERN_DUMMY 31 /* unused */ #define KERN_PS_STRINGS 32 /* int: address of PS_STRINGS */ #define KERN_USRSTACK 33 /* int: address of USRSTACK */ #define KERN_LOGSIGEXIT 34 /* int: do we log sigexit procs? */ #define KERN_IOV_MAX 35 /* int: value of UIO_MAXIOV */ #define KERN_HOSTUUID 36 /* string: host UUID identifier */ #define KERN_ARND 37 /* int: from arc4rand() */ #define KERN_MAXID 38 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ { "ostype", CTLTYPE_STRING }, \ { "osrelease", CTLTYPE_STRING }, \ { "osrevision", CTLTYPE_INT }, \ { "version", CTLTYPE_STRING }, \ { "maxvnodes", CTLTYPE_INT }, \ { "maxproc", CTLTYPE_INT }, \ { "maxfiles", CTLTYPE_INT }, \ { "argmax", CTLTYPE_INT }, \ { "securelevel", CTLTYPE_INT }, \ { "hostname", CTLTYPE_STRING }, \ { "hostid", CTLTYPE_UINT }, \ { "clockrate", CTLTYPE_STRUCT }, \ { "vnode", CTLTYPE_STRUCT }, \ { "proc", CTLTYPE_STRUCT }, \ { "file", CTLTYPE_STRUCT }, \ { "profiling", CTLTYPE_NODE }, \ { "posix1version", CTLTYPE_INT }, \ { "ngroups", CTLTYPE_INT }, \ { "job_control", CTLTYPE_INT }, \ { "saved_ids", CTLTYPE_INT }, \ { "boottime", CTLTYPE_STRUCT }, \ { "nisdomainname", CTLTYPE_STRING }, \ { "update", CTLTYPE_INT }, \ { "osreldate", CTLTYPE_INT }, \ { "ntp_pll", CTLTYPE_NODE }, \ { "bootfile", CTLTYPE_STRING }, \ { "maxfilesperproc", CTLTYPE_INT }, \ { "maxprocperuid", CTLTYPE_INT }, \ { "ipc", CTLTYPE_NODE }, \ { "dummy", CTLTYPE_INT }, \ { "ps_strings", CTLTYPE_INT }, \ { "usrstack", CTLTYPE_INT }, \ { "logsigexit", CTLTYPE_INT }, \ { "iov_max", CTLTYPE_INT }, \ { "hostuuid", CTLTYPE_STRING }, \ { "arc4rand", CTLTYPE_OPAQUE }, \ } /* * CTL_VFS identifiers */ #define CTL_VFS_NAMES { \ { "vfsconf", CTLTYPE_STRUCT }, \ } /* * KERN_PROC subtypes */ #define KERN_PROC_ALL 0 /* everything */ #define KERN_PROC_PID 1 /* by process id */ #define KERN_PROC_PGRP 2 /* by process group id */ #define KERN_PROC_SESSION 3 /* by session of pid */ #define KERN_PROC_TTY 4 /* by controlling tty */ #define KERN_PROC_UID 5 /* by effective uid */ #define KERN_PROC_RUID 6 /* by real uid */ #define KERN_PROC_ARGS 7 /* get/set arguments/proctitle */ #define KERN_PROC_PROC 8 /* only return procs */ #define KERN_PROC_SV_NAME 9 /* get syscall vector name */ #define KERN_PROC_RGID 10 /* by real group id */ #define KERN_PROC_GID 11 /* by effective group id */ #define KERN_PROC_PATHNAME 12 /* path to executable */ #define KERN_PROC_OVMMAP 13 /* Old VM map entries for process */ #define KERN_PROC_OFILEDESC 14 /* Old file descriptors for process */ #define KERN_PROC_KSTACK 15 /* Kernel stacks for process */ #define KERN_PROC_INC_THREAD 0x10 /* * modifier for pid, pgrp, tty, * uid, ruid, gid, rgid and proc * This effectively uses 16-31 */ #define KERN_PROC_VMMAP 32 /* VM map entries for process */ #define KERN_PROC_FILEDESC 33 /* File descriptors for process */ #define KERN_PROC_GROUPS 34 /* process groups */ #define KERN_PROC_ENV 35 /* get environment */ #define KERN_PROC_AUXV 36 /* get ELF auxiliary vector */ #define KERN_PROC_RLIMIT 37 /* process resource limits */ #define KERN_PROC_PS_STRINGS 38 /* get ps_strings location */ #define KERN_PROC_UMASK 39 /* process umask */ #define KERN_PROC_OSREL 40 /* osreldate for process binary */ #define KERN_PROC_SIGTRAMP 41 /* signal trampoline location */ /* * KERN_IPC identifiers */ #define KIPC_MAXSOCKBUF 1 /* int: max size of a socket buffer */ #define KIPC_SOCKBUF_WASTE 2 /* int: wastage factor in sockbuf */ #define KIPC_SOMAXCONN 3 /* int: max length of connection q */ #define KIPC_MAX_LINKHDR 4 /* int: max length of link header */ #define KIPC_MAX_PROTOHDR 5 /* int: max length of network header */ #define KIPC_MAX_HDR 6 /* int: max total length of headers */ #define KIPC_MAX_DATALEN 7 /* int: max length of data? */ /* * CTL_HW identifiers */ #define HW_MACHINE 1 /* string: machine class */ #define HW_MODEL 2 /* string: specific machine model */ #define HW_NCPU 3 /* int: number of cpus */ #define HW_BYTEORDER 4 /* int: machine byte order */ #define HW_PHYSMEM 5 /* int: total memory */ #define HW_USERMEM 6 /* int: non-kernel memory */ #define HW_PAGESIZE 7 /* int: software page size */ #define HW_DISKNAMES 8 /* strings: disk drive names */ #define HW_DISKSTATS 9 /* struct: diskstats[] */ #define HW_FLOATINGPT 10 /* int: has HW floating point? */ #define HW_MACHINE_ARCH 11 /* string: machine architecture */ #define HW_REALMEM 12 /* int: 'real' memory */ #define HW_MAXID 13 /* number of valid hw ids */ #define CTL_HW_NAMES { \ { 0, 0 }, \ { "machine", CTLTYPE_STRING }, \ { "model", CTLTYPE_STRING }, \ { "ncpu", CTLTYPE_INT }, \ { "byteorder", CTLTYPE_INT }, \ { "physmem", CTLTYPE_ULONG }, \ { "usermem", CTLTYPE_ULONG }, \ { "pagesize", CTLTYPE_INT }, \ { "disknames", CTLTYPE_STRUCT }, \ { "diskstats", CTLTYPE_STRUCT }, \ { "floatingpoint", CTLTYPE_INT }, \ { "machine_arch", CTLTYPE_STRING }, \ { "realmem", CTLTYPE_ULONG }, \ } /* * CTL_USER definitions */ #define USER_CS_PATH 1 /* string: _CS_PATH */ #define USER_BC_BASE_MAX 2 /* int: BC_BASE_MAX */ #define USER_BC_DIM_MAX 3 /* int: BC_DIM_MAX */ #define USER_BC_SCALE_MAX 4 /* int: BC_SCALE_MAX */ #define USER_BC_STRING_MAX 5 /* int: BC_STRING_MAX */ #define USER_COLL_WEIGHTS_MAX 6 /* int: COLL_WEIGHTS_MAX */ #define USER_EXPR_NEST_MAX 7 /* int: EXPR_NEST_MAX */ #define USER_LINE_MAX 8 /* int: LINE_MAX */ #define USER_RE_DUP_MAX 9 /* int: RE_DUP_MAX */ #define USER_POSIX2_VERSION 10 /* int: POSIX2_VERSION */ #define USER_POSIX2_C_BIND 11 /* int: POSIX2_C_BIND */ #define USER_POSIX2_C_DEV 12 /* int: POSIX2_C_DEV */ #define USER_POSIX2_CHAR_TERM 13 /* int: POSIX2_CHAR_TERM */ #define USER_POSIX2_FORT_DEV 14 /* int: POSIX2_FORT_DEV */ #define USER_POSIX2_FORT_RUN 15 /* int: POSIX2_FORT_RUN */ #define USER_POSIX2_LOCALEDEF 16 /* int: POSIX2_LOCALEDEF */ #define USER_POSIX2_SW_DEV 17 /* int: POSIX2_SW_DEV */ #define USER_POSIX2_UPE 18 /* int: POSIX2_UPE */ #define USER_STREAM_MAX 19 /* int: POSIX2_STREAM_MAX */ #define USER_TZNAME_MAX 20 /* int: POSIX2_TZNAME_MAX */ #define USER_MAXID 21 /* number of valid user ids */ #define CTL_USER_NAMES { \ { 0, 0 }, \ { "cs_path", CTLTYPE_STRING }, \ { "bc_base_max", CTLTYPE_INT }, \ { "bc_dim_max", CTLTYPE_INT }, \ { "bc_scale_max", CTLTYPE_INT }, \ { "bc_string_max", CTLTYPE_INT }, \ { "coll_weights_max", CTLTYPE_INT }, \ { "expr_nest_max", CTLTYPE_INT }, \ { "line_max", CTLTYPE_INT }, \ { "re_dup_max", CTLTYPE_INT }, \ { "posix2_version", CTLTYPE_INT }, \ { "posix2_c_bind", CTLTYPE_INT }, \ { "posix2_c_dev", CTLTYPE_INT }, \ { "posix2_char_term", CTLTYPE_INT }, \ { "posix2_fort_dev", CTLTYPE_INT }, \ { "posix2_fort_run", CTLTYPE_INT }, \ { "posix2_localedef", CTLTYPE_INT }, \ { "posix2_sw_dev", CTLTYPE_INT }, \ { "posix2_upe", CTLTYPE_INT }, \ { "stream_max", CTLTYPE_INT }, \ { "tzname_max", CTLTYPE_INT }, \ } #define CTL_P1003_1B_ASYNCHRONOUS_IO 1 /* boolean */ #define CTL_P1003_1B_MAPPED_FILES 2 /* boolean */ #define CTL_P1003_1B_MEMLOCK 3 /* boolean */ #define CTL_P1003_1B_MEMLOCK_RANGE 4 /* boolean */ #define CTL_P1003_1B_MEMORY_PROTECTION 5 /* boolean */ #define CTL_P1003_1B_MESSAGE_PASSING 6 /* boolean */ #define CTL_P1003_1B_PRIORITIZED_IO 7 /* boolean */ #define CTL_P1003_1B_PRIORITY_SCHEDULING 8 /* boolean */ #define CTL_P1003_1B_REALTIME_SIGNALS 9 /* boolean */ #define CTL_P1003_1B_SEMAPHORES 10 /* boolean */ #define CTL_P1003_1B_FSYNC 11 /* boolean */ #define CTL_P1003_1B_SHARED_MEMORY_OBJECTS 12 /* boolean */ #define CTL_P1003_1B_SYNCHRONIZED_IO 13 /* boolean */ #define CTL_P1003_1B_TIMERS 14 /* boolean */ #define CTL_P1003_1B_AIO_LISTIO_MAX 15 /* int */ #define CTL_P1003_1B_AIO_MAX 16 /* int */ #define CTL_P1003_1B_AIO_PRIO_DELTA_MAX 17 /* int */ #define CTL_P1003_1B_DELAYTIMER_MAX 18 /* int */ #define CTL_P1003_1B_MQ_OPEN_MAX 19 /* int */ #define CTL_P1003_1B_PAGESIZE 20 /* int */ #define CTL_P1003_1B_RTSIG_MAX 21 /* int */ #define CTL_P1003_1B_SEM_NSEMS_MAX 22 /* int */ #define CTL_P1003_1B_SEM_VALUE_MAX 23 /* int */ #define CTL_P1003_1B_SIGQUEUE_MAX 24 /* int */ #define CTL_P1003_1B_TIMER_MAX 25 /* int */ #define CTL_P1003_1B_MAXID 26 #define CTL_P1003_1B_NAMES { \ { 0, 0 }, \ { "asynchronous_io", CTLTYPE_INT }, \ { "mapped_files", CTLTYPE_INT }, \ { "memlock", CTLTYPE_INT }, \ { "memlock_range", CTLTYPE_INT }, \ { "memory_protection", CTLTYPE_INT }, \ { "message_passing", CTLTYPE_INT }, \ { "prioritized_io", CTLTYPE_INT }, \ { "priority_scheduling", CTLTYPE_INT }, \ { "realtime_signals", CTLTYPE_INT }, \ { "semaphores", CTLTYPE_INT }, \ { "fsync", CTLTYPE_INT }, \ { "shared_memory_objects", CTLTYPE_INT }, \ { "synchronized_io", CTLTYPE_INT }, \ { "timers", CTLTYPE_INT }, \ { "aio_listio_max", CTLTYPE_INT }, \ { "aio_max", CTLTYPE_INT }, \ { "aio_prio_delta_max", CTLTYPE_INT }, \ { "delaytimer_max", CTLTYPE_INT }, \ { "mq_open_max", CTLTYPE_INT }, \ { "pagesize", CTLTYPE_INT }, \ { "rtsig_max", CTLTYPE_INT }, \ { "nsems_max", CTLTYPE_INT }, \ { "sem_value_max", CTLTYPE_INT }, \ { "sigqueue_max", CTLTYPE_INT }, \ { "timer_max", CTLTYPE_INT }, \ } #ifdef _KERNEL /* * Declare some common oids. */ extern struct sysctl_oid_list sysctl__children; SYSCTL_DECL(_kern); SYSCTL_DECL(_kern_features); SYSCTL_DECL(_kern_ipc); SYSCTL_DECL(_kern_proc); SYSCTL_DECL(_kern_sched); SYSCTL_DECL(_kern_sched_stats); SYSCTL_DECL(_sysctl); SYSCTL_DECL(_vm); SYSCTL_DECL(_vm_stats); SYSCTL_DECL(_vm_stats_misc); SYSCTL_DECL(_vfs); SYSCTL_DECL(_net); SYSCTL_DECL(_debug); SYSCTL_DECL(_debug_sizeof); SYSCTL_DECL(_dev); SYSCTL_DECL(_hw); SYSCTL_DECL(_hw_bus); SYSCTL_DECL(_hw_bus_devices); SYSCTL_DECL(_hw_bus_info); SYSCTL_DECL(_machdep); SYSCTL_DECL(_user); SYSCTL_DECL(_compat); SYSCTL_DECL(_regression); SYSCTL_DECL(_security); SYSCTL_DECL(_security_bsd); extern char machine[]; extern char osrelease[]; extern char ostype[]; extern char kern_ident[]; /* Dynamic oid handling */ struct sysctl_oid *sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int nbr, const char *name, int kind, void *arg1, intptr_t arg2, int (*handler) (SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr); int sysctl_remove_name(struct sysctl_oid *parent, const char *name, int del, int recurse); void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name); int sysctl_move_oid(struct sysctl_oid *oidp, struct sysctl_oid_list *parent); int sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse); int sysctl_ctx_init(struct sysctl_ctx_list *clist); int sysctl_ctx_free(struct sysctl_ctx_list *clist); struct sysctl_ctx_entry *sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp); struct sysctl_ctx_entry *sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp); int sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp); int kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags); int kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags); int userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, int flags); int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, int *nindx, struct sysctl_req *req); void sysctl_lock(void); void sysctl_unlock(void); int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len); struct sbuf; struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int, struct sysctl_req *); #else /* !_KERNEL */ #include __BEGIN_DECLS int sysctl(const int *, u_int, void *, size_t *, const void *, size_t); int sysctlbyname(const char *, void *, size_t *, const void *, size_t); int sysctlnametomib(const char *, int *, size_t *); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_SYSCTL_H_ */ Index: stable/9/sys/sys/systm.h =================================================================== --- stable/9/sys/sys/systm.h (revision 273911) +++ stable/9/sys/sys/systm.h (revision 273912) @@ -1,423 +1,421 @@ /*- * Copyright (c) 1982, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD$ */ #ifndef _SYS_SYSTM_H_ #define _SYS_SYSTM_H_ #include #include #include #include #include #include /* for people using printf mainly */ extern int cold; /* nonzero if we are doing a cold boot */ extern int rebooting; /* kern_reboot() has been called. */ extern const char *panicstr; /* panic message */ extern char version[]; /* system version */ extern char compiler_version[]; /* compiler version */ extern char copyright[]; /* system copyright */ extern int kstack_pages; /* number of kernel stack pages */ extern u_long pagesizes[]; /* supported page sizes */ extern long physmem; /* physical memory */ extern long realmem; /* 'real' memory */ extern char *rootdevnames[2]; /* names of possible root devices */ extern int boothowto; /* reboot flags, from console subsystem */ extern int bootverbose; /* nonzero to print verbose messages */ extern int maxusers; /* system tune hint */ extern int ngroups_max; /* max # of supplemental groups */ extern int vm_guest; /* Running as virtual machine guest? */ /* * Detected virtual machine guest types. The intention is to expand * and/or add to the VM_GUEST_VM type if specific VM functionality is * ever implemented (e.g. vendor-specific paravirtualization features). */ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN }; #ifdef INVARIANTS /* The option is always available */ #define KASSERT(exp,msg) do { \ if (__predict_false(!(exp))) \ panic msg; \ } while (0) #define VNASSERT(exp, vp, msg) do { \ if (__predict_false(!(exp))) { \ vn_printf(vp, "VNASSERT failed\n"); \ panic msg; \ } \ } while (0) #else #define KASSERT(exp,msg) do { \ } while (0) #define VNASSERT(exp, vp, msg) do { \ } while (0) #endif -#ifndef CTASSERT /* Allow lint to override */ -#define CTASSERT(x) _CTASSERT(x, __LINE__) -#define _CTASSERT(x, y) __CTASSERT(x, y) -#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] +#ifndef CTASSERT /* Allow lint to override */ +#define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif /* * Assert that a pointer can be loaded from memory atomically. * * This assertion enforces stronger alignment than necessary. For example, * on some architectures, atomicity for unaligned loads will depend on * whether or not the load spans multiple cache lines. */ #define ASSERT_ATOMIC_LOAD_PTR(var, msg) \ KASSERT(sizeof(var) == sizeof(void *) && \ ((uintptr_t)&(var) & (sizeof(void *) - 1)) == 0, msg) /* * If we have already panic'd and this is the thread that called * panic(), then don't block on any mutexes but silently succeed. * Otherwise, the kernel will deadlock since the scheduler isn't * going to run the thread that holds any lock we need. */ #define SCHEDULER_STOPPED() __predict_false(curthread->td_stopsched) /* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used * in two files. * XXX most of these variables should be const. */ extern int osreldate; extern int envmode; extern int hintmode; /* 0 = off. 1 = config, 2 = fallback */ extern int dynamic_kenv; extern struct mtx kenv_lock; extern char *kern_envp; extern char static_env[]; extern char static_hints[]; /* by config for now */ extern char **kenvp; extern const void *zero_region; /* address space maps to a zeroed page */ extern int unmapped_buf_allowed; extern int iosize_max_clamp; extern int devfs_iosize_max_clamp; #define IOSIZE_MAX (iosize_max_clamp ? INT_MAX : SSIZE_MAX) #define DEVFS_IOSIZE_MAX (devfs_iosize_max_clamp ? INT_MAX : SSIZE_MAX) /* * General function declarations. */ struct inpcb; struct lock_object; struct malloc_type; struct mtx; struct proc; struct socket; struct thread; struct tty; struct ucred; struct uio; struct _jmp_buf; struct trapframe; int setjmp(struct _jmp_buf *) __returns_twice; void longjmp(struct _jmp_buf *, int) __dead2; int dumpstatus(vm_offset_t addr, off_t count); int nullop(void); int eopnotsupp(void); int ureadc(int, struct uio *); void hashdestroy(void *, struct malloc_type *, u_long); void *hashinit(int count, struct malloc_type *type, u_long *hashmask); void *hashinit_flags(int count, struct malloc_type *type, u_long *hashmask, int flags); #define HASH_NOWAIT 0x00000001 #define HASH_WAITOK 0x00000002 void *phashinit(int count, struct malloc_type *type, u_long *nentries); void g_waitidle(void); void panic(const char *, ...) __dead2 __printflike(1, 2); void cpu_boot(int); void cpu_flush_dcache(void *, size_t); void cpu_rootconf(void); void critical_enter(void); void critical_exit(void); void init_param1(void); void init_param2(long physpages); void init_static_kenv(char *, size_t); void tablefull(const char *); int kvprintf(char const *, void (*)(int, void*), void *, int, __va_list) __printflike(1, 0); void log(int, const char *, ...) __printflike(2, 3); void log_console(struct uio *); int printf(const char *, ...) __printflike(1, 2); int snprintf(char *, size_t, const char *, ...) __printflike(3, 4); int sprintf(char *buf, const char *, ...) __printflike(2, 3); int uprintf(const char *, ...) __printflike(1, 2); int vprintf(const char *, __va_list) __printflike(1, 0); int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); int ttyprintf(struct tty *, const char *, ...) __printflike(2, 3); int sscanf(const char *, char const *, ...) __nonnull(1) __nonnull(2); int vsscanf(const char *, char const *, __va_list) __nonnull(1) __nonnull(2); long strtol(const char *, char **, int) __nonnull(1); u_long strtoul(const char *, char **, int) __nonnull(1); quad_t strtoq(const char *, char **, int) __nonnull(1); u_quad_t strtouq(const char *, char **, int) __nonnull(1); void tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4); void hexdump(const void *ptr, int length, const char *hdr, int flags); #define HD_COLUMN_MASK 0xff #define HD_DELIM_MASK 0xff00 #define HD_OMIT_COUNT (1 << 16) #define HD_OMIT_HEX (1 << 17) #define HD_OMIT_CHARS (1 << 18) #define ovbcopy(f, t, l) bcopy((f), (t), (l)) void bcopy(const void *from, void *to, size_t len) __nonnull(1) __nonnull(2); void bzero(void *buf, size_t len) __nonnull(1); void *memcpy(void *to, const void *from, size_t len) __nonnull(1) __nonnull(2); void *memmove(void *dest, const void *src, size_t n) __nonnull(1) __nonnull(2); int copystr(const void * __restrict kfaddr, void * __restrict kdaddr, size_t len, size_t * __restrict lencopied) __nonnull(1) __nonnull(2); int copyinstr(const void * __restrict udaddr, void * __restrict kaddr, size_t len, size_t * __restrict lencopied) __nonnull(1) __nonnull(2); int copyin(const void * __restrict udaddr, void * __restrict kaddr, size_t len) __nonnull(1) __nonnull(2); int copyin_nofault(const void * __restrict udaddr, void * __restrict kaddr, size_t len) __nonnull(1) __nonnull(2); int copyout(const void * __restrict kaddr, void * __restrict udaddr, size_t len) __nonnull(1) __nonnull(2); int copyout_nofault(const void * __restrict kaddr, void * __restrict udaddr, size_t len) __nonnull(1) __nonnull(2); int fubyte(const void *base); long fuword(const void *base); int fuword16(void *base); int32_t fuword32(const void *base); int64_t fuword64(const void *base); int subyte(void *base, int byte); int suword(void *base, long word); int suword16(void *base, int word); int suword32(void *base, int32_t word); int suword64(void *base, int64_t word); uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval); u_long casuword(volatile u_long *p, u_long oldval, u_long newval); void realitexpire(void *); int sysbeep(int hertz, int period); void hardclock(int usermode, uintfptr_t pc); void hardclock_cnt(int cnt, int usermode); void hardclock_cpu(int usermode); void hardclock_sync(int cpu); void softclock(void *); void statclock(int usermode); void statclock_cnt(int cnt, int usermode); void profclock(int usermode, uintfptr_t pc); void profclock_cnt(int cnt, int usermode, uintfptr_t pc); int hardclockintr(void); void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); void cpu_idleclock(void); void cpu_activeclock(void); extern int cpu_can_deep_sleep; extern int cpu_disable_deep_sleep; int cr_cansee(struct ucred *u1, struct ucred *u2); int cr_canseesocket(struct ucred *cred, struct socket *so); int cr_canseeinpcb(struct ucred *cred, struct inpcb *inp); char *getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data); int getenv_uint(const char *name, unsigned int *data); int getenv_long(const char *name, long *data); int getenv_ulong(const char *name, unsigned long *data); int getenv_string(const char *name, char *data, int size); int getenv_quad(const char *name, quad_t *data); int setenv(const char *name, const char *value); int unsetenv(const char *name); int testenv(const char *name); typedef uint64_t (cpu_tick_f)(void); void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var); extern cpu_tick_f *cpu_ticks; uint64_t cpu_tickrate(void); uint64_t cputick2usec(uint64_t tick); #ifdef APM_FIXUP_CALLTODO struct timeval; void adjust_timeout_calltodo(struct timeval *time_change); #endif /* APM_FIXUP_CALLTODO */ #include /* Initialize the world */ void consinit(void); void cpu_initclocks(void); void cpu_initclocks_bsp(void); void cpu_initclocks_ap(void); void usrinfoinit(void); /* Finalize the world */ void kern_reboot(int) __dead2; void shutdown_nice(int); /* Timeouts */ typedef void timeout_t(void *); /* timeout function type */ #define CALLOUT_HANDLE_INITIALIZER(handle) \ { NULL } void callout_handle_init(struct callout_handle *); struct callout_handle timeout(timeout_t *, void *, int); void untimeout(timeout_t *, void *, struct callout_handle); caddr_t kern_timeout_callwheel_alloc(caddr_t v); void kern_timeout_callwheel_init(void); /* Stubs for obsolete functions that used to be for interrupt management */ static __inline void spl0(void) { return; } static __inline intrmask_t splbio(void) { return 0; } static __inline intrmask_t splcam(void) { return 0; } static __inline intrmask_t splclock(void) { return 0; } static __inline intrmask_t splhigh(void) { return 0; } static __inline intrmask_t splimp(void) { return 0; } static __inline intrmask_t splnet(void) { return 0; } static __inline intrmask_t splsoftcam(void) { return 0; } static __inline intrmask_t splsoftclock(void) { return 0; } static __inline intrmask_t splsofttty(void) { return 0; } static __inline intrmask_t splsoftvm(void) { return 0; } static __inline intrmask_t splsofttq(void) { return 0; } static __inline intrmask_t splstatclock(void) { return 0; } static __inline intrmask_t spltty(void) { return 0; } static __inline intrmask_t splvm(void) { return 0; } static __inline void splx(intrmask_t ipl __unused) { return; } /* * Common `proc' functions are declared here so that proc.h can be included * less often. */ int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg, int timo) __nonnull(1); #define msleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo)) int msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int timo) __nonnull(1); int pause(const char *wmesg, int timo); #define tsleep(chan, pri, wmesg, timo) \ _sleep((chan), NULL, (pri), (wmesg), (timo)) void wakeup(void *chan) __nonnull(1); void wakeup_one(void *chan) __nonnull(1); /* * Common `struct cdev *' stuff are declared here to avoid #include poisoning */ struct cdev; dev_t dev2udev(struct cdev *x); const char *devtoname(struct cdev *cdev); int poll_no_poll(int events); /* XXX: Should be void nanodelay(u_int nsec); */ void DELAY(int usec); /* Root mount holdback API */ struct root_hold_token; struct root_hold_token *root_mount_hold(const char *identifier); void root_mount_rel(struct root_hold_token *h); void root_mount_wait(void); int root_mounted(void); /* * Unit number allocation API. (kern/subr_unit.c) */ struct unrhdr; struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh); int alloc_unr(struct unrhdr *uh); int alloc_unr_specific(struct unrhdr *uh, u_int item); int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); /* * Population count algorithm using SWAR approach * - "SIMD Within A Register". */ static __inline uint32_t bitcount32(uint32_t x) { x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); x = (x + (x >> 4)) & 0x0f0f0f0f; x = (x + (x >> 8)); x = (x + (x >> 16)) & 0x000000ff; return (x); } static __inline uint16_t bitcount16(uint32_t x) { x = (x & 0x5555) + ((x & 0xaaaa) >> 1); x = (x & 0x3333) + ((x & 0xcccc) >> 2); x = (x + (x >> 4)) & 0x0f0f; x = (x + (x >> 8)) & 0x00ff; return (x); } #endif /* !_SYS_SYSTM_H_ */ Index: stable/9/sys/sys =================================================================== --- stable/9/sys/sys (revision 273911) +++ stable/9/sys/sys (revision 273912) Property changes on: stable/9/sys/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/sys:r228478,263710,273377-273378,273423,273455,273899 Index: stable/9/sys/vm/memguard.c =================================================================== --- stable/9/sys/vm/memguard.c (revision 273911) +++ stable/9/sys/vm/memguard.c (revision 273912) @@ -1,469 +1,469 @@ /*- * Copyright (c) 2005, Bosko Milekic . * Copyright (c) 2010 Isilon Systems, Inc. (http://www.isilon.com/) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * MemGuard is a simple replacement allocator for debugging only * which provides ElectricFence-style memory barrier protection on * objects being allocated, and is used to detect tampering-after-free * scenarios. * * See the memguard(9) man page for more information on using MemGuard. */ #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static SYSCTL_NODE(_vm, OID_AUTO, memguard, CTLFLAG_RW, NULL, "MemGuard data"); /* * The vm_memguard_divisor variable controls how much of kmem_map should be * reserved for MemGuard. */ static u_int vm_memguard_divisor; SYSCTL_UINT(_vm_memguard, OID_AUTO, divisor, CTLFLAG_RDTUN, &vm_memguard_divisor, 0, "(kmem_size/memguard_divisor) == memguard submap size"); /* * Short description (ks_shortdesc) of memory type to monitor. */ static char vm_memguard_desc[128] = ""; static struct malloc_type *vm_memguard_mtype = NULL; TUNABLE_STR("vm.memguard.desc", vm_memguard_desc, sizeof(vm_memguard_desc)); static int memguard_sysctl_desc(SYSCTL_HANDLER_ARGS) { char desc[sizeof(vm_memguard_desc)]; int error; strlcpy(desc, vm_memguard_desc, sizeof(desc)); error = sysctl_handle_string(oidp, desc, sizeof(desc), req); if (error != 0 || req->newptr == NULL) return (error); mtx_lock(&malloc_mtx); /* * If mtp is NULL, it will be initialized in memguard_cmp(). */ vm_memguard_mtype = malloc_desc2type(desc); strlcpy(vm_memguard_desc, desc, sizeof(vm_memguard_desc)); mtx_unlock(&malloc_mtx); return (error); } SYSCTL_PROC(_vm_memguard, OID_AUTO, desc, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, memguard_sysctl_desc, "A", "Short description of memory type to monitor"); static vm_map_t memguard_map = NULL; static vm_offset_t memguard_cursor; static vm_size_t memguard_mapsize; static vm_size_t memguard_physlimit; static u_long memguard_wasted; static u_long memguard_wrap; static u_long memguard_succ; static u_long memguard_fail_kva; static u_long memguard_fail_pgs; SYSCTL_ULONG(_vm_memguard, OID_AUTO, cursor, CTLFLAG_RD, &memguard_cursor, 0, "MemGuard cursor"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, mapsize, CTLFLAG_RD, &memguard_mapsize, 0, "MemGuard private vm_map size"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, phys_limit, CTLFLAG_RD, &memguard_physlimit, 0, "Limit on MemGuard memory consumption"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, wasted, CTLFLAG_RD, &memguard_wasted, 0, "Excess memory used through page promotion"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, wrapcnt, CTLFLAG_RD, &memguard_wrap, 0, "MemGuard cursor wrap count"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, numalloc, CTLFLAG_RD, &memguard_succ, 0, "Count of successful MemGuard allocations"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, fail_kva, CTLFLAG_RD, &memguard_fail_kva, 0, "MemGuard failures due to lack of KVA"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, fail_pgs, CTLFLAG_RD, &memguard_fail_pgs, 0, "MemGuard failures due to lack of pages"); #define MG_GUARD 0x001 #define MG_ALLLARGE 0x002 static int memguard_options = MG_GUARD; TUNABLE_INT("vm.memguard.options", &memguard_options); SYSCTL_INT(_vm_memguard, OID_AUTO, options, CTLFLAG_RW, &memguard_options, 0, "MemGuard options:\n" "\t0x001 - add guard pages around each allocation\n" "\t0x002 - always use MemGuard for allocations over a page"); static u_int memguard_minsize; static u_long memguard_minsize_reject; SYSCTL_UINT(_vm_memguard, OID_AUTO, minsize, CTLFLAG_RW, &memguard_minsize, 0, "Minimum size for page promotion"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, minsize_reject, CTLFLAG_RD, &memguard_minsize_reject, 0, "# times rejected for size"); static u_int memguard_frequency; static u_long memguard_frequency_hits; TUNABLE_INT("vm.memguard.frequency", &memguard_frequency); SYSCTL_UINT(_vm_memguard, OID_AUTO, frequency, CTLFLAG_RW, &memguard_frequency, 0, "Times in 100000 that MemGuard will randomly run"); SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD, &memguard_frequency_hits, 0, "# times MemGuard randomly chose"); /* * Return a fudged value to be used for vm_kmem_size for allocating * the kmem_map. The memguard memory will be a submap. */ unsigned long memguard_fudge(unsigned long km_size, const struct vm_map *parent_map) { u_long mem_pgs, parent_size; vm_memguard_divisor = 10; TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor); parent_size = vm_map_max(parent_map) - vm_map_min(parent_map) + PAGE_SIZE; /* Pick a conservative value if provided value sucks. */ if ((vm_memguard_divisor <= 0) || ((parent_size / vm_memguard_divisor) == 0)) vm_memguard_divisor = 10; /* * Limit consumption of physical pages to * 1/vm_memguard_divisor of system memory. If the KVA is * smaller than this then the KVA limit comes into play first. * This prevents memguard's page promotions from completely * using up memory, since most malloc(9) calls are sub-page. */ mem_pgs = cnt.v_page_count; memguard_physlimit = (mem_pgs / vm_memguard_divisor) * PAGE_SIZE; /* * We want as much KVA as we can take safely. Use at most our * allotted fraction of the parent map's size. Limit this to * twice the physical memory to avoid using too much memory as * pagetable pages (size must be multiple of PAGE_SIZE). */ memguard_mapsize = round_page(parent_size / vm_memguard_divisor); if (memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs) memguard_mapsize = mem_pgs * 2 * PAGE_SIZE; if (km_size + memguard_mapsize > parent_size) memguard_mapsize = 0; return (km_size + memguard_mapsize); } /* * Initialize the MemGuard mock allocator. All objects from MemGuard come * out of a single VM map (contiguous chunk of address space). */ void memguard_init(vm_map_t parent_map) { vm_offset_t base, limit; memguard_map = kmem_suballoc(parent_map, &base, &limit, memguard_mapsize, FALSE); memguard_map->system_map = 1; KASSERT(memguard_mapsize == limit - base, ("Expected %lu, got %lu", (u_long)memguard_mapsize, (u_long)(limit - base))); memguard_cursor = base; printf("MEMGUARD DEBUGGING ALLOCATOR INITIALIZED:\n"); printf("\tMEMGUARD map base: 0x%lx\n", (u_long)base); printf("\tMEMGUARD map limit: 0x%lx\n", (u_long)limit); printf("\tMEMGUARD map size: %jd KBytes\n", (uintmax_t)memguard_mapsize >> 10); } /* * Run things that can't be done as early as memguard_init(). */ static void memguard_sysinit(void) { struct sysctl_oid_list *parent; parent = SYSCTL_STATIC_CHILDREN(_vm_memguard); - SYSCTL_ADD_ULONG(NULL, parent, OID_AUTO, "mapstart", CTLFLAG_RD, + SYSCTL_ADD_UAUTO(NULL, parent, OID_AUTO, "mapstart", CTLFLAG_RD, &memguard_map->min_offset, "MemGuard KVA base"); - SYSCTL_ADD_ULONG(NULL, parent, OID_AUTO, "maplimit", CTLFLAG_RD, + SYSCTL_ADD_UAUTO(NULL, parent, OID_AUTO, "maplimit", CTLFLAG_RD, &memguard_map->max_offset, "MemGuard KVA end"); - SYSCTL_ADD_ULONG(NULL, parent, OID_AUTO, "mapused", CTLFLAG_RD, + SYSCTL_ADD_UAUTO(NULL, parent, OID_AUTO, "mapused", CTLFLAG_RD, &memguard_map->size, "MemGuard KVA used"); } SYSINIT(memguard, SI_SUB_KLD, SI_ORDER_ANY, memguard_sysinit, NULL); /* * v2sizep() converts a virtual address of the first page allocated for * an item to a pointer to u_long recording the size of the original * allocation request. * * This routine is very similar to those defined by UMA in uma_int.h. * The difference is that this routine stores the originally allocated * size in one of the page's fields that is unused when the page is * wired rather than the object field, which is used. */ static u_long * v2sizep(vm_offset_t va) { vm_paddr_t pa; struct vm_page *p; pa = pmap_kextract(va); if (pa == 0) panic("MemGuard detected double-free of %p", (void *)va); p = PHYS_TO_VM_PAGE(pa); KASSERT(p->wire_count != 0 && p->queue == PQ_NONE, ("MEMGUARD: Expected wired page %p in vtomgfifo!", p)); return ((u_long *)&p->pageq.tqe_next); } /* * Allocate a single object of specified size with specified flags * (either M_WAITOK or M_NOWAIT). */ void * memguard_alloc(unsigned long req_size, int flags) { vm_offset_t addr; u_long size_p, size_v; int do_guard, rv; size_p = round_page(req_size); if (size_p == 0) return (NULL); /* * To ensure there are holes on both sides of the allocation, * request 2 extra pages of KVA. We will only actually add a * vm_map_entry and get pages for the original request. Save * the value of memguard_options so we have a consistent * value. */ size_v = size_p; do_guard = (memguard_options & MG_GUARD) != 0; if (do_guard) size_v += 2 * PAGE_SIZE; vm_map_lock(memguard_map); /* * When we pass our memory limit, reject sub-page allocations. * Page-size and larger allocations will use the same amount * of physical memory whether we allocate or hand off to * uma_large_alloc(), so keep those. */ if (memguard_map->size >= memguard_physlimit && req_size < PAGE_SIZE) { addr = (vm_offset_t)NULL; memguard_fail_pgs++; goto out; } /* * Keep a moving cursor so we don't recycle KVA as long as * possible. It's not perfect, since we don't know in what * order previous allocations will be free'd, but it's simple * and fast, and requires O(1) additional storage if guard * pages are not used. * * XXX This scheme will lead to greater fragmentation of the * map, unless vm_map_findspace() is tweaked. */ for (;;) { rv = vm_map_findspace(memguard_map, memguard_cursor, size_v, &addr); if (rv == KERN_SUCCESS) break; /* * The map has no space. This may be due to * fragmentation, or because the cursor is near the * end of the map. */ if (memguard_cursor == vm_map_min(memguard_map)) { memguard_fail_kva++; addr = (vm_offset_t)NULL; goto out; } memguard_wrap++; memguard_cursor = vm_map_min(memguard_map); } if (do_guard) addr += PAGE_SIZE; rv = kmem_back(memguard_map, addr, size_p, flags); if (rv != KERN_SUCCESS) { memguard_fail_pgs++; addr = (vm_offset_t)NULL; goto out; } memguard_cursor = addr + size_p; *v2sizep(trunc_page(addr)) = req_size; memguard_succ++; if (req_size < PAGE_SIZE) { memguard_wasted += (PAGE_SIZE - req_size); if (do_guard) { /* * Align the request to 16 bytes, and return * an address near the end of the page, to * better detect array overrun. */ req_size = roundup2(req_size, 16); addr += (PAGE_SIZE - req_size); } } out: vm_map_unlock(memguard_map); return ((void *)addr); } int is_memguard_addr(void *addr) { vm_offset_t a = (vm_offset_t)(uintptr_t)addr; return (a >= memguard_map->min_offset && a < memguard_map->max_offset); } /* * Free specified single object. */ void memguard_free(void *ptr) { vm_offset_t addr; u_long req_size, size; char *temp; int i; addr = trunc_page((uintptr_t)ptr); req_size = *v2sizep(addr); size = round_page(req_size); /* * Page should not be guarded right now, so force a write. * The purpose of this is to increase the likelihood of * catching a double-free, but not necessarily a * tamper-after-free (the second thread freeing might not * write before freeing, so this forces it to and, * subsequently, trigger a fault). */ temp = ptr; for (i = 0; i < size; i += PAGE_SIZE) temp[i] = 'M'; /* * This requires carnal knowledge of the implementation of * kmem_free(), but since we've already replaced kmem_malloc() * above, it's not really any worse. We want to use the * vm_map lock to serialize updates to memguard_wasted, since * we had the lock at increment. */ vm_map_lock(memguard_map); if (req_size < PAGE_SIZE) memguard_wasted -= (PAGE_SIZE - req_size); (void)vm_map_delete(memguard_map, addr, addr + size); vm_map_unlock(memguard_map); } /* * Re-allocate an allocation that was originally guarded. */ void * memguard_realloc(void *addr, unsigned long size, struct malloc_type *mtp, int flags) { void *newaddr; u_long old_size; /* * Allocate the new block. Force the allocation to be guarded * as the original may have been guarded through random * chance, and that should be preserved. */ if ((newaddr = memguard_alloc(size, flags)) == NULL) return (NULL); /* Copy over original contents. */ old_size = *v2sizep(trunc_page((uintptr_t)addr)); bcopy(addr, newaddr, min(size, old_size)); memguard_free(addr); return (newaddr); } int memguard_cmp(struct malloc_type *mtp, unsigned long size) { if (size < memguard_minsize) { memguard_minsize_reject++; return (0); } if ((memguard_options & MG_ALLLARGE) != 0 && size >= PAGE_SIZE) return (1); if (memguard_frequency > 0 && (random() % 100000) < memguard_frequency) { memguard_frequency_hits++; return (1); } #if 1 /* * The safest way of comparsion is to always compare short description * string of memory type, but it is also the slowest way. */ return (strcmp(mtp->ks_shortdesc, vm_memguard_desc) == 0); #else /* * If we compare pointers, there are two possible problems: * 1. Memory type was unloaded and new memory type was allocated at the * same address. * 2. Memory type was unloaded and loaded again, but allocated at a * different address. */ if (vm_memguard_mtype != NULL) return (mtp == vm_memguard_mtype); if (strcmp(mtp->ks_shortdesc, vm_memguard_desc) == 0) { vm_memguard_mtype = mtp; return (1); } return (0); #endif } Index: stable/9/sys =================================================================== --- stable/9/sys (revision 273911) +++ stable/9/sys (revision 273912) Property changes on: stable/9/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,2 ## Merged /head/sys:r228478,263710,273377-273378,273423,273455,273899 Merged /projects/pf/head/sys:r251993