Index: projects/pnfs-planb-server/sys/amd64/amd64/machdep.c =================================================================== --- projects/pnfs-planb-server/sys/amd64/amd64/machdep.c (revision 334966) +++ projects/pnfs-planb-server/sys/amd64/amd64/machdep.c (revision 334967) @@ -1,2647 +1,2650 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" +#include "opt_pci.h" #include "opt_platform.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_ATPIC #include #else #include #endif #include #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); /* * The PTI trampoline stack needs enough space for a hardware trapframe and a * couple of scratch registers, as well as the trapframe left behind after an * iret fault. */ CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - offsetof(struct pti_frame, pti_rip)); extern u_int64_t hammer_time(u_int64_t, u_int64_t); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Preload data parse function */ static caddr_t native_parse_preload_data(u_int64_t); /* Native function to fetch and parse the e820 map */ static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); /* Default init_ops implementation. */ struct init_ops init_ops = { .parse_preload_data = native_parse_preload_data, .early_clock_source_init = i8254_init, .early_delay = i8254_delay, .parse_memmap = native_parse_memmap, #ifdef SMP .mp_bootaddress = mp_bootaddress, .start_all_aps = native_start_all_aps, #endif +#ifdef DEV_PCI .msi_init = msi_init, +#endif }; /* * Physical address of the EFI System Table. Stashed from the metadata hints * passed into the kernel and used by the EFI code to call runtime services. */ vm_paddr_t efi_systbl_phys; /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2) #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; struct mtx dt_lock; /* lock for GDT and LDT */ void (*vmm_resume_p)(void); static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_free_count())) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_free_count()), ptoa((uintmax_t)vm_free_count()) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct pcb *pcb; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; pcb = td->td_pcb; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); update_pcb_bases(pcb); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, sizeof(sf.sf_uc.uc_mcontext.mc_spare)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned long)sp & ~0x3Ful); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = p->p_sysent->sv_sigcode_base; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct pcb *pcb; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; long rflags; int cs, error, ret; ksiginfo_t ksi; pcb = td->td_pcb; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) { uprintf("pid %d (%s): sigreturn copyin failed\n", p->p_pid, td->td_name); return (error); } ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(rflags, regs->tf_rflags)) { uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, td->td_name, rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) { uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); update_pcb_bases(pcb); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sys_sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs; struct pcb *pcb; register_t saved_rflags; regs = td->td_frame; pcb = td->td_pcb; if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); update_pcb_bases(pcb); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; saved_rflags = regs->tf_rflags & PSL_T; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | saved_rflags; regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); } /* * Initialize amd64 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); static char mce0_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); static char dbg0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; /* * Software prototypes -- in more palatable form. * * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same * slots as corresponding segments for i386 kernel. */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GNULL2_SEL 1 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 8 64 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, .ssd_type = SDT_SYSTSS, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Actually, the TSS is a system descriptor which is double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 LDT Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 12 LDT Descriptor, double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; void setidt(int idx, inthand_t *func, int typ, int dpl, int ist) { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(div_pti), IDTVEC(bpt_pti), IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), IDTVEC(xmm_pti), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), #endif IDTVEC(fast_syscall), IDTVEC(fast_syscall32), IDTVEC(fast_syscall_pti); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { struct { uint16_t limit; uint64_t base; } __packed idtr, gdtr; uint16_t ldt, tr; __asm __volatile("sidt %0" : "=m" (idtr)); db_printf("idtr\t0x%016lx/%04x\n", (u_long)idtr.base, (u_int)idtr.limit); __asm __volatile("sgdt %0" : "=m" (gdtr)); db_printf("gdtr\t0x%016lx/%04x\n", (u_long)gdtr.base, (u_int)gdtr.limit); __asm __volatile("sldt %0" : "=r" (ldt)); db_printf("ldtr\t0x%04x\n", ldt); __asm __volatile("str %0" : "=r" (tr)); db_printf("tr\t0x%04x\n", tr); db_printf("cr0\t0x%016lx\n", rcr0()); db_printf("cr2\t0x%016lx\n", rcr2()); db_printf("cr3\t0x%016lx\n", rcr3()); db_printf("cr4\t0x%016lx\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016lx\n", rxcr(0)); db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t%016lx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%016lx\n", rdr0()); db_printf("dr1\t0x%016lx\n", rdr1()); db_printf("dr2\t0x%016lx\n", rdr2()); db_printf("dr3\t0x%016lx\n", rdr3()); db_printf("dr6\t0x%016lx\n", rdr6()); db_printf("dr7\t0x%016lx\n", rdr7()); } #endif void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include #include /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. * This is only here to pacify sio. It is NOT FATAL if this doesn't work. * It shouldn't be here. There should probably be an APIC centric * implementation in the apic driver code, if at all. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } #endif u_int basemem; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. * * NB: physmap_idx points to the next free slot. */ insert_idx = physmap_idx; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } void bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, vm_paddr_t *physmap, int *physmap_idx) { struct bios_smap *smap, *smapend; smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) continue; if (!add_physmap_entry(smap->base, smap->length, physmap, physmap_idx)) break; } } static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, int *physmap_idx) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode", "PersistentMemory" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type < nitems(types)) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_NV) printf("NV "); if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) printf("MORE_RELIABLE "); if (p->md_attr & EFI_MD_ATTR_RO) printf("RO "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idx)) break; } } static char bootmethod[16] = ""; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); static void native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct bios_smap *smap; struct efi_map_header *efihdr; u_int32_t size; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); smap = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (efihdr == NULL && smap == NULL) panic("No BIOS smap or EFI map info from loader!"); if (efihdr != NULL) { add_efi_map_entries(efihdr, physmap, physmap_idx); strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); } else { size = *((u_int32_t *)smap - 1); bios_add_smap_entries(smap, size, physmap, physmap_idx); strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); } } #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx, da_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; u_long physmem_start, physmem_tunable, memtest; pt_entry_t *pte; quad_t dcons_addr, dcons_size; int page_counter; bzero(physmap, sizeof(physmap)); physmap_idx = 0; init_ops.parse_memmap(kmdp, physmap, &physmap_idx); physmap_idx -= 2; /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] <= 0xA0000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0 || basemem > 640) { if (bootverbose) printf( "Memory map doesn't contain a basemem segment, faking it"); basemem = 640; } /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * The boot memory test is disabled by default, as it takes a * significant amount of time on large-memory systems, and is * unfriendly to virtual machines as it unnecessarily touches all * pages. * * A general name is used as the code may be extended to support * additional tests beyond the current "page present" test. */ memtest = 0; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); /* * Don't allow MAXMEM or hw.physmem to extend the amount of memory * in the system. */ if (Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * Make hole for "AP -> long mode" bootstrap code. The * mp_bootaddress vector is only available when the kernel * is configured to support APs and APs for the system start * in real mode mode (e.g. SMP bare metal). */ if (init_ops.mp_bootaddress) init_ops.mp_bootaddress(physmap, &physmap_idx); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. * * XXX Some BIOSes corrupt low 64KB between suspend and resume. * By default, mask off the first 16 pages unless we appear to be * running in a VM. */ physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); if (physmap[0] < physmem_start) { if (physmem_start < PAGE_SIZE) physmap[0] = PAGE_SIZE; else if (physmem_start >= physmap[1]) physmap[0] = round_page(physmap[1] - PAGE_SIZE); else physmap[0] = round_page(physmem_start); } pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ page_counter = 0; if (memtest != 0) printf("Testing system memory"); for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= (vm_paddr_t)kernphys && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * Print a "." every GB to show we're making * progress. */ page_counter++; if ((page_counter % PAGES_PER_GB) == 0) printf("."); /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); if (memtest != 0) printf("\n"); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); } static caddr_t native_parse_preload_data(u_int64_t modulep) { caddr_t kmdp; char *envp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); if (envp != NULL) envp += KERNBASE; init_static_kenv(envp, 0); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); return (kmdp); } static void amd64_kdb_init(void) { kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } /* Set up the fast syscall stuff */ void amd64_conf_fast_syscall(void) { uint64_t msr; msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D); } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; struct xstate_hdr *xhdr; u_int64_t rsp0; char *env; size_t kstack0_sz; int late_console; TSRAW(&thread0, TS_ENTER, __func__, NULL); kmdp = init_ops.parse_preload_data(modulep); identify_cpu1(); identify_hypervisor(); /* * hw.cpu_stdext_disable is ignored by the call, it will be * re-evaluted by the below call to finishidentcpu(). */ identify_cpu2(); link_elf_ireloc(kmdp); /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); /* Init basic tunables, hz etc */ init_param1(); thread0.td_kstack = physfree + KERNBASE; thread0.td_kstack_pages = kstack_pages; kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; bzero((void *)thread0.td_kstack, kstack0_sz); physfree += kstack0_sz; /* * make gdt memory segments */ for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); dpcpu_init((void *)(physfree + KERNBASE), 0); physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* Non-late cninit() and printf() can be moved up to here. */ PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ pti = pti_get_default(); TUNABLE_INT_FETCH("vm.pmap.pti", &pti); for (x = 0; x < NIDT; x++) setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); #endif #ifdef XENHVM setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); #endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); /* * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) * transition). * Once bootblocks have updated, we can test directly for * efi_systbl != NULL here... */ if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP) != NULL) vty_set_preferred(VTY_VT); finishidentcpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* * NMI stack, runs on ist2. The pcpu pointer is stored just * above the start of the ist2 stack. */ np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; /* * MC# stack, runs on ist3. The pcpu pointer is stored just * above the start of the ist3 stack. */ np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist3 = (long) np; /* * DB# stack, runs on ist4. */ np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist4 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); amd64_conf_fast_syscall(); /* * Temporary forge some valid pointer to PCB, for exception * handlers. It is reinitialized properly below after FPU is * set up. Also set up td_critnest to short-cut the page * fault handler. */ cpu_max_ext_state_size = sizeof(struct savefpu); thread0.td_pcb = get_pcb_td(&thread0); thread0.td_critnest = 1; /* * The console and kdb should be initialized even earlier than here, * but some console drivers don't work until after getmemsize(). * Default to late console initialization to support these drivers. * This loses mainly printf()s in getmemsize() and early debugging. */ late_console = 1; TUNABLE_INT_FETCH("debug.late_console", &late_console); if (!late_console) { cninit(); amd64_kdb_init(); } getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ if (late_console) cninit(); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); #endif #else #error "have you forgotten the isa device?"; #endif if (late_console) amd64_kdb_init(); msgbufinit(msgbufp, msgbufsize); fpuinit(); /* * Set up thread0 pcb after fpuinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } /* make an initial tss so cpu can get interrupt stack on syscall! */ rsp0 = (vm_offset_t)thread0.td_pcb; /* Ensure the stack is aligned to 16 bytes */ rsp0 &= ~0xFul; common_tss[0].tss_rsp0 = rsp0; PCPU_SET(rsp0, rsp0); PCPU_SET(pti_rsp0, ((vm_offset_t)PCPU_PTR(pti_stack) + PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); PCPU_SET(curpcb, thread0.td_pcb); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); _ufssel = GSEL(GUFS32_SEL, SEL_UPL); _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; thread0.td_frame = &proc0_tf; env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif thread0.td_critnest = 0; TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); TSEXIT(); /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); static int efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct efi_map_header *efihdr; caddr_t kmdp; uint32_t efisize; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr == NULL) return (0); efisize = *((uint32_t *)efihdr - 1); return (SYSCTL_OUT(req, efihdr, efisize)); } SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map"); void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; critical_enter(); } else td->td_md.md_spinlock_count++; } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) { critical_exit(); intr_restore(flags); } } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_r12 = tf->tf_r12; pcb->pcb_r13 = tf->tf_r13; pcb->pcb_r14 = tf->tf_r14; pcb->pcb_r15 = tf->tf_r15; pcb->pcb_rbp = tf->tf_rbp; pcb->pcb_rbx = tf->tf_rbx; pcb->pcb_rip = tf->tf_rip; pcb->pcb_rsp = tf->tf_rsp; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } int ptrace_single_step(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if ((td->td_frame->tf_rflags & PSL_T) == 0) { td->td_frame->tf_rflags |= PSL_T; td->td_dbgflags |= TDB_STEP; } return (0); } int ptrace_clear_single_step(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); td->td_frame->tf_rflags &= ~PSL_T; td->td_dbgflags &= ~TDB_STEP; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; tp = td->td_frame; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; if (tp->tf_flags & TF_HASSEGS) { regs->r_ds = tp->tf_ds; regs->r_es = tp->tf_es; regs->r_fs = tp->tf_fs; regs->r_gs = tp->tf_gs; } else { regs->r_ds = 0; regs->r_es = 0; regs->r_fs = 0; regs->r_gs = 0; } return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; register_t rflags; tp = td->td_frame; rflags = regs->r_rflags & 0xffffffff; if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; if (0) { /* XXXKIB */ tp->tf_ds = regs->r_ds; tp->tf_es = regs->r_es; tp->tf_fs = regs->r_fs; tp->tf_gs = regs->r_gs; tp->tf_flags = TF_HASSEGS; } set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); fpugetregs(td); fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); fpuuserinited(td); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; struct trapframe *tp; pcb = td->td_pcb; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; mcp->mc_rflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; mcp->mc_rflags &= ~PSL_C; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_ds = tp->tf_ds; mcp->mc_es = tp->tf_es; mcp->mc_fs = tp->tf_fs; mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); update_pcb_bases(pcb); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tp; char *xfpustate; long rflags; int ret; pcb = td->td_pcb; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; tp->tf_flags = mcp->mc_flags; if (tp->tf_flags & TF_HASSEGS) { tp->tf_ds = mcp->mc_ds; tp->tf_es = mcp->mc_es; tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } set_pcb_flags(pcb, PCB_FULL_IRET); if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { size_t max_len, len; mcp->mc_ownedfp = fpugetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(struct savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate, xfpustate, xfpustate_len); } else return (EINVAL); return (error); } void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetuserregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetuserregs()... perhaps we just * have too many layers. */ clear_pcb_flags(curthread->td_pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[8] = 0; dbregs->dr[9] = 0; dbregs->dr[10] = 0; dbregs->dr[11] = 0; dbregs->dr[12] = 0; dbregs->dr[13] = 0; dbregs->dr[14] = 0; dbregs->dr[15] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP or a general protection fault right here. * Upper bits of dr6 and dr7 must not be set */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (td->td_frame->tf_cs == _ucode32sel && DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) return (EINVAL); } if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || (dbregs->dr[7] & 0xffffffff00000000ul) != 0) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; set_pcb_flags(pcb, PCB_DBREGS); } return (0); } void reset_dbregs(void) { load_dr7(0); /* Turn off the control bits first */ load_dr0(0); load_dr1(0); load_dr2(0); load_dr3(0); load_dr6(0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(register_t dr6) { u_int64_t dr7; u_int64_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; bp = dr6 & DBREG_DR6_BMASK; if (bp == 0) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } /* * The pcb_flags is only modified by current thread, or by other threads * when current thread is stopped. However, current thread may change it * from the interrupt context in cpu_switch(), or in the trap handler. * When we read-modify-write pcb_flags from C sources, compiler may generate * code that is not atomic regarding the interrupt handler. If a trap or * interrupt happens and any flag is modified from the handler, it can be * clobbered with the cached value later. Therefore, we implement setting * and clearing flags with single-instruction functions, which do not race * with possible modification of the flags from the trap or interrupt context, * because traps and interrupts are executed only on instruction boundary. */ void set_pcb_flags_raw(struct pcb *pcb, const u_int flags) { __asm __volatile("orl %1,%0" : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) : "cc", "memory"); } /* * The support for RDFSBASE, WRFSBASE and similar instructions for %gs * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into * pcb if user space modified the bases. We must save on the context * switch or if the return to usermode happens through the doreti. * * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, * which have a consequence that the base MSRs must be saved each time * the PCB_FULL_IRET flag is set. We disable interrupts to sync with * context switches. */ void set_pcb_flags(struct pcb *pcb, const u_int flags) { register_t r; if (curpcb == pcb && (flags & PCB_FULL_IRET) != 0 && (pcb->pcb_flags & PCB_FULL_IRET) == 0 && (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) { r = intr_disable(); if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { if (rfs() == _ufssel) pcb->pcb_fsbase = rdfsbase(); if (rgs() == _ugssel) pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); } set_pcb_flags_raw(pcb, flags); intr_restore(r); } else { set_pcb_flags_raw(pcb, flags); } } void clear_pcb_flags(struct pcb *pcb, const u_int flags) { __asm __volatile("andl %1,%0" : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) : "cc", "memory"); } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ Index: projects/pnfs-planb-server/sys/amd64/amd64/mp_machdep.c =================================================================== --- projects/pnfs-planb-server/sys/amd64/amd64/mp_machdep.c (revision 334966) +++ projects/pnfs-planb-server/sys/amd64/amd64/mp_machdep.c (revision 334967) @@ -1,675 +1,675 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1996, by Steve Passe * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_sched.h" #include "opt_smp.h" #include #include #include #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) #define GiB(v) (v ## ULL << 30) extern struct pcpu __pcpu[]; /* Temporary variables for init_secondary() */ char *doublefault_stack; char *mce_stack; char *nmi_stack; char *dbg_stack; /* * Local data and functions. */ static int start_ap(int apic_id); /* * Calculate usable address in base memory for AP trampoline code. */ void mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx) { unsigned int i; bool allocated; alloc_ap_trampoline(physmap, physmap_idx); allocated = false; for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { /* * Find a memory region big enough below the 4GB * boundary to store the initial page tables. Region * must be mapped by the direct map. * * Note that it needs to be aligned to a page * boundary. */ if (physmap[i] >= GiB(4) || physmap[i + 1] - round_page(physmap[i]) < PAGE_SIZE * 3 || - physmap[i + 1] > Maxmem) + atop(physmap[i + 1]) > Maxmem) continue; allocated = true; mptramp_pagetables = round_page(physmap[i]); physmap[i] = round_page(physmap[i]) + (PAGE_SIZE * 3); if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { memmove(&physmap[i], &physmap[i + 2], sizeof(*physmap) * (*physmap_idx - i + 2)); *physmap_idx -= 2; } break; } if (!allocated) { mptramp_pagetables = trunc_page(boot_address) - (PAGE_SIZE * 3); if (bootverbose) printf( "Cannot find enough space for the initial AP page tables, placing them at %#x", mptramp_pagetables); } } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ if (pmap_pcid_enabled) { if (invpcid_works) { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_invpcid_pti_pti) : IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); } else { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); } } else { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); } /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ init_ops.start_all_aps(); set_interrupt_apic_ids(); } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; struct nmi_pcpu *np; u_int64_t cr0; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; /* Set by the startup code for us to use */ cpu = bootAP; /* Init tss */ common_tss[cpu] = common_tss[0]; common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; /* The NMI stack runs on IST2. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist2 = (long) np; /* The MC# stack runs on IST3. */ np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist3 = (long) np; /* The DB# stack runs on IST4. */ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist4 = (long) np; /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); } ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; lgdt(&ap_gdt); /* does magic intra-segment return */ /* Get per-cpu data */ pc = &__pcpu[cpu]; /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); dpcpu_init(dpcpu, cpu); pc->pc_apic_id = cpu_apic_ids[cpu]; pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_tssp = &common_tss[cpu]; pc->pc_commontssp = &common_tss[cpu]; pc->pc_rsp0 = 0; pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack + PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]; pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GUSERLDT_SEL]; pc->pc_curpmap = kernel_pmap; pc->pc_pcid_gen = 1; pc->pc_pcid_next = PMAP_PCID_KERN + 1; common_tss[cpu].tss_rsp0 = 0; /* Save the per-cpu pointer for use by the NMI handler. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; /* Save the per-cpu pointer for use by the MC# handler. */ np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; /* Save the per-cpu pointer for use by the DB# handler. */ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ fix_cpuid(); lidt(&r_idt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); amd64_conf_fast_syscall(); /* signal our startup to the BSP. */ mp_naps++; /* Spin until the BSP releases the AP's. */ while (atomic_load_acq_int(&aps_ready) == 0) ia32_pause(); init_secondary_tail(); } /******************************************************************* * local functions and data */ /* * start each AP in our list */ int native_start_all_aps(void) { u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; int apic_id, cpu, i; u_char mpbiosreason; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* copy the AP 1st level boot code */ bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size); /* Locate the page tables, they'll be below the trampoline */ pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables); pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* Each slot of the level 4 pages points to the same level 3 page */ pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); pt4[i] |= PG_V | PG_RW | PG_U; /* Each slot of the level 3 pages points to the same level 2 page */ pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; bootAP = cpu; /* attempt to start the Application Processor */ if (!start_ap(apic_id)) { /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* number of APs actually started */ return mp_naps; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } void invltlb_invpcid_handler(void) { struct invpcid_descr d; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : INVPCID_CTX); PCPU_SET(smp_tlb_done, generation); } void invltlb_invpcid_pti_handler(void) { struct invpcid_descr d; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; d.pad = 0; d.addr = 0; if (smp_tlb_pmap == kernel_pmap) { /* * This invalidation actually needs to clear kernel * mappings from the TLB in the current pmap, but * since we were asked for the flush in the kernel * pmap, achieve it by performing global flush. */ invpcid(&d, INVPCID_CTXGLOB); } else { invpcid(&d, INVPCID_CTX); d.pcid |= PMAP_PCID_USER_PT; invpcid(&d, INVPCID_CTX); } PCPU_SET(smp_tlb_done, generation); } void invltlb_pcid_handler(void) { uint64_t kcr3, ucr3; uint32_t generation, pcid; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ if (smp_tlb_pmap == kernel_pmap) { invltlb_glob(); } else { /* * The current pmap might not be equal to * smp_tlb_pmap. The clearing of the pm_gen in * pmap_invalidate_all() takes care of TLB * invalidation when switching to the pmap on this * CPU. */ if (PCPU_GET(curpmap) == smp_tlb_pmap) { pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; kcr3 = smp_tlb_pmap->pm_cr3 | pcid; ucr3 = smp_tlb_pmap->pm_ucr3; if (ucr3 != PMAP_NO_CR3) { ucr3 |= PMAP_PCID_USER_PT | pcid; pmap_pti_pcid_invalidate(ucr3, kcr3); } else load_cr3(kcr3); } } PCPU_SET(smp_tlb_done, generation); } void invlpg_invpcid_handler(void) { struct invpcid_descr d; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ invlpg(smp_tlb_addr1); if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = smp_tlb_addr1; invpcid(&d, INVPCID_ADDR); } PCPU_SET(smp_tlb_done, generation); } void invlpg_pcid_handler(void) { uint64_t kcr3, ucr3; uint32_t generation; uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ invlpg(smp_tlb_addr1); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); } PCPU_SET(smp_tlb_done, generation); } void invlrng_invpcid_handler(void) { struct invpcid_descr d; vm_offset_t addr, addr2; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; generation = smp_tlb_generation; /* Overlap with serialization */ do { invlpg(addr); addr += PAGE_SIZE; } while (addr < addr2); if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = smp_tlb_addr1; do { invpcid(&d, INVPCID_ADDR); d.addr += PAGE_SIZE; } while (d.addr < addr2); } PCPU_SET(smp_tlb_done, generation); } void invlrng_pcid_handler(void) { vm_offset_t addr, addr2; uint64_t kcr3, ucr3; uint32_t generation; uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; generation = smp_tlb_generation; /* Overlap with serialization */ do { invlpg(addr); addr += PAGE_SIZE; } while (addr < addr2); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); } PCPU_SET(smp_tlb_done, generation); } Index: projects/pnfs-planb-server/sys/arm64/arm64/cpu_errata.c =================================================================== --- projects/pnfs-planb-server/sys/arm64/arm64/cpu_errata.c (revision 334966) +++ projects/pnfs-planb-server/sys/arm64/arm64/cpu_errata.c (revision 334967) @@ -1,103 +1,99 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018 Andrew Turner * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include -#ifdef DEV_PSCI #include -#endif typedef void (cpu_quirk_install)(void); struct cpu_quirks { cpu_quirk_install *quirk_install; u_int midr_mask; u_int midr_value; }; static cpu_quirk_install install_psci_bp_hardening; static struct cpu_quirks cpu_quirks[] = { { .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0), .quirk_install = install_psci_bp_hardening, }, { .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0), .quirk_install = install_psci_bp_hardening, }, { .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0), .quirk_install = install_psci_bp_hardening, }, { .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK, .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0), .quirk_install = install_psci_bp_hardening, }, }; static void install_psci_bp_hardening(void) { -#ifdef DEV_PSCI PCPU_SET(bp_harden, psci_get_version); -#endif } void install_cpu_errata(void) { u_int midr; size_t i; midr = get_midr(); for (i = 0; i < nitems(cpu_quirks); i++) { if ((midr & cpu_quirks[i].midr_mask) == cpu_quirks[i].midr_value) { cpu_quirks[i].quirk_install(); } } } Index: projects/pnfs-planb-server/sys/arm64/arm64/exception.S =================================================================== --- projects/pnfs-planb-server/sys/arm64/arm64/exception.S (revision 334966) +++ projects/pnfs-planb-server/sys/arm64/arm64/exception.S (revision 334967) @@ -1,229 +1,230 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include __FBSDID("$FreeBSD$"); #include "assym.inc" .text .macro save_registers el .if \el == 1 mov x18, sp sub sp, sp, #128 .endif sub sp, sp, #(TF_SIZE + 16) stp x29, x30, [sp, #(TF_SIZE)] stp x28, x29, [sp, #(TF_X + 28 * 8)] stp x26, x27, [sp, #(TF_X + 26 * 8)] stp x24, x25, [sp, #(TF_X + 24 * 8)] stp x22, x23, [sp, #(TF_X + 22 * 8)] stp x20, x21, [sp, #(TF_X + 20 * 8)] stp x18, x19, [sp, #(TF_X + 18 * 8)] stp x16, x17, [sp, #(TF_X + 16 * 8)] stp x14, x15, [sp, #(TF_X + 14 * 8)] stp x12, x13, [sp, #(TF_X + 12 * 8)] stp x10, x11, [sp, #(TF_X + 10 * 8)] stp x8, x9, [sp, #(TF_X + 8 * 8)] stp x6, x7, [sp, #(TF_X + 6 * 8)] stp x4, x5, [sp, #(TF_X + 4 * 8)] stp x2, x3, [sp, #(TF_X + 2 * 8)] stp x0, x1, [sp, #(TF_X + 0 * 8)] mrs x10, elr_el1 mrs x11, spsr_el1 mrs x12, esr_el1 .if \el == 0 mrs x18, sp_el0 .endif str x10, [sp, #(TF_ELR)] stp w11, w12, [sp, #(TF_SPSR)] stp x18, lr, [sp, #(TF_SP)] mrs x18, tpidr_el1 add x29, sp, #(TF_SIZE) .endm .macro restore_registers el .if \el == 1 msr daifset, #2 /* * Disable interrupts, x18 may change in the interrupt exception * handler. For EL0 exceptions, do_ast already did this. */ .endif ldp x18, lr, [sp, #(TF_SP)] ldp x10, x11, [sp, #(TF_ELR)] .if \el == 0 msr sp_el0, x18 .endif msr spsr_el1, x11 msr elr_el1, x10 ldp x0, x1, [sp, #(TF_X + 0 * 8)] ldp x2, x3, [sp, #(TF_X + 2 * 8)] ldp x4, x5, [sp, #(TF_X + 4 * 8)] ldp x6, x7, [sp, #(TF_X + 6 * 8)] ldp x8, x9, [sp, #(TF_X + 8 * 8)] ldp x10, x11, [sp, #(TF_X + 10 * 8)] ldp x12, x13, [sp, #(TF_X + 12 * 8)] ldp x14, x15, [sp, #(TF_X + 14 * 8)] ldp x16, x17, [sp, #(TF_X + 16 * 8)] .if \el == 0 /* * We only restore the callee saved registers when returning to * userland as they may have been updated by a system call or signal. */ ldp x18, x19, [sp, #(TF_X + 18 * 8)] ldp x20, x21, [sp, #(TF_X + 20 * 8)] ldp x22, x23, [sp, #(TF_X + 22 * 8)] ldp x24, x25, [sp, #(TF_X + 24 * 8)] ldp x26, x27, [sp, #(TF_X + 26 * 8)] ldp x28, x29, [sp, #(TF_X + 28 * 8)] .else ldr x29, [sp, #(TF_X + 29 * 8)] .endif .if \el == 0 add sp, sp, #(TF_SIZE + 16) .else mov sp, x18 mrs x18, tpidr_el1 .endif .endm .macro do_ast mrs x19, daif /* Make sure the IRQs are enabled before calling ast() */ bic x19, x19, #PSR_I 1: /* Disable interrupts */ msr daifset, #2 /* Read the current thread flags */ ldr x1, [x18, #PC_CURTHREAD] /* Load curthread */ ldr x2, [x1, #TD_FLAGS] /* Check if we have either bits set */ mov x3, #((TDF_ASTPENDING|TDF_NEEDRESCHED) >> 8) lsl x3, x3, #8 and x2, x2, x3 cbz x2, 2f /* Restore interrupts */ msr daif, x19 /* handle the ast */ mov x0, sp bl _C_LABEL(ast) /* Re-check for new ast scheduled */ b 1b 2: .endm ENTRY(handle_el1h_sync) save_registers 1 ldr x0, [x18, #PC_CURTHREAD] mov x1, sp bl do_el1h_sync restore_registers 1 eret END(handle_el1h_sync) ENTRY(handle_el1h_irq) save_registers 1 mov x0, sp bl intr_irq_handler restore_registers 1 eret END(handle_el1h_irq) -ENTRY(handle_el1h_error) - brk 0xf13 -END(handle_el1h_error) - ENTRY(handle_el0_sync) save_registers 0 ldr x0, [x18, #PC_CURTHREAD] mov x1, sp str x1, [x0, #TD_FRAME] bl do_el0_sync do_ast restore_registers 0 eret END(handle_el0_sync) ENTRY(handle_el0_irq) save_registers 0 mov x0, sp bl intr_irq_handler do_ast restore_registers 0 eret END(handle_el0_irq) -ENTRY(handle_el0_error) +ENTRY(handle_serror) save_registers 0 mov x0, sp - bl do_el0_error - brk 0xf23 - 1: b 1b -END(handle_el0_error) +1: bl do_serror + b 1b +END(handle_serror) +ENTRY(handle_empty_exception) + save_registers 0 + mov x0, sp +1: bl unhandled_exception + b 1b +END(handle_unhandled_exception) + .macro vempty .align 7 - brk 0xfff - 1: b 1b + b handle_empty_exception .endm .macro vector name .align 7 b handle_\name .endm .align 11 .globl exception_vectors exception_vectors: vempty /* Synchronous EL1t */ vempty /* IRQ EL1t */ vempty /* FIQ EL1t */ vempty /* Error EL1t */ vector el1h_sync /* Synchronous EL1h */ vector el1h_irq /* IRQ EL1h */ vempty /* FIQ EL1h */ - vector el1h_error /* Error EL1h */ + vector serror /* Error EL1h */ vector el0_sync /* Synchronous 64-bit EL0 */ vector el0_irq /* IRQ 64-bit EL0 */ vempty /* FIQ 64-bit EL0 */ - vector el0_error /* Error 64-bit EL0 */ + vector serror /* Error 64-bit EL0 */ vector el0_sync /* Synchronous 32-bit EL0 */ vector el0_irq /* IRQ 32-bit EL0 */ vempty /* FIQ 32-bit EL0 */ - vector el0_error /* Error 32-bit EL0 */ + vector serror /* Error 32-bit EL0 */ Index: projects/pnfs-planb-server/sys/arm64/arm64/trap.c =================================================================== --- projects/pnfs-planb-server/sys/arm64/arm64/trap.c (revision 334966) +++ projects/pnfs-planb-server/sys/arm64/arm64/trap.c (revision 334967) @@ -1,486 +1,512 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif #ifdef VFP #include #endif #ifdef KDB #include #endif #ifdef DDB #include #endif extern register_t fsu_intr_fault; /* Called from exception.S */ void do_el1h_sync(struct thread *, struct trapframe *); void do_el0_sync(struct thread *, struct trapframe *); void do_el0_error(struct trapframe *); +void do_serror(struct trapframe *); +void unhandled_exception(struct trapframe *); + static void print_registers(struct trapframe *frame); int (*dtrace_invop_jump_addr)(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int nap; nap = 8; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); if (sa->narg > nap) panic("ARM64TODO: Could we have more than 8 args?"); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void svc_handler(struct thread *td, struct trapframe *frame) { int error; if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) { error = syscallenter(td); syscallret(td, error); } else { call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr); userret(td, frame); } } static void data_abort(struct thread *td, struct trapframe *frame, uint64_t esr, uint64_t far, int lower) { struct vm_map *map; struct proc *p; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; int error, sig, ucode; #ifdef KDB bool handled; #endif /* * According to the ARMv8-A rev. A.g, B2.10.5 "Load-Exclusive * and Store-Exclusive instruction usage restrictions", state * of the exclusive monitors after data abort exception is unknown. */ clrex(); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif pcb = td->td_pcb; p = td->td_proc; if (lower) map = &p->p_vmspace->vm_map; else { /* The top bit tells us which range to use */ if (far >= VM_MAXUSER_ADDRESS) { map = kernel_map; } else { map = &p->p_vmspace->vm_map; if (map == NULL) map = kernel_map; } } /* * The call to pmap_fault can be dangerous when coming from the * kernel as it may be not be able to lock the pmap to check if * the address is now valid. Because of this we filter the cases * when we are not going to see superpage activity. */ if (!lower) { /* * We may fault in a DMAP region due to a superpage being * unmapped when the access took place. */ if (map == kernel_map && !VIRT_IN_DMAP(far)) goto no_pmap_fault; /* * We can also fault in the userspace handling functions, * e.g. copyin. In these cases we will have set a fault * handler so we can check if this is set before calling * pmap_fault. */ if (map != kernel_map && pcb->pcb_onfault == 0) goto no_pmap_fault; } if (pmap_fault(map->pmap, esr, far) == KERN_SUCCESS) return; no_pmap_fault: KASSERT(td->td_md.md_spinlock_count == 0, ("data abort with spinlock held")); if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); panic("data abort in critical section or under mutex"); } va = trunc_page(far); ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ; /* Fault in the page. */ error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (error != KERN_SUCCESS) { if (lower) { sig = SIGSEGV; if (error == KERN_PROTECTION_FAILURE) ucode = SEGV_ACCERR; else ucode = SEGV_MAPERR; call_trapsignal(td, sig, ucode, (void *)far); } else { if (td->td_intr_nesting_level == 0 && pcb->pcb_onfault != 0) { frame->tf_x[0] = error; frame->tf_elr = pcb->pcb_onfault; return; } printf("Fatal data abort:\n"); print_registers(frame); printf(" far: %16lx\n", far); printf(" esr: %.8lx\n", esr); #ifdef KDB if (debugger_on_panic) { kdb_why = KDB_WHY_TRAP; handled = kdb_trap(ESR_ELx_EXCEPTION(esr), 0, frame); kdb_why = KDB_WHY_UNSET; if (handled) return; } #endif panic("vm_fault failed: %lx", frame->tf_elr); } } if (lower) userret(td, frame); } static void print_registers(struct trapframe *frame) { u_int reg; for (reg = 0; reg < nitems(frame->tf_x); reg++) { printf(" %sx%d: %16lx\n", (reg < 10) ? " " : "", reg, frame->tf_x[reg]); } printf(" sp: %16lx\n", frame->tf_sp); printf(" lr: %16lx\n", frame->tf_lr); printf(" elr: %16lx\n", frame->tf_elr); printf("spsr: %8x\n", frame->tf_spsr); } void do_el1h_sync(struct thread *td, struct trapframe *frame) { struct trapframe *oframe; uint32_t exception; uint64_t esr, far; /* Read the esr register to get the exception details */ esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR4(KTR_TRAP, "do_el1_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); oframe = td->td_frame; switch (exception) { case EXCP_BRK: case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: break; default: td->td_frame = frame; break; } switch(exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP if ((td->td_pcb->pcb_fpflags & PCB_FP_KERN) != 0) { vfp_restore_state(); } else #endif { print_registers(frame); printf(" esr: %.8lx\n", esr); panic("VFP exception in the kernel"); } break; case EXCP_INSN_ABORT: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); intr_enable(); data_abort(td, frame, esr, far, 0); break; case EXCP_BRK: #ifdef KDTRACE_HOOKS if ((esr & ESR_ELx_ISS_MASK) == 0x40d && \ dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(frame); break; } #endif #ifdef KDB kdb_trap(exception, 0, (td->td_frame != NULL) ? td->td_frame : frame); #else panic("No debugger in kernel.\n"); #endif frame->tf_elr += 4; break; case EXCP_WATCHPT_EL1: case EXCP_SOFTSTP_EL1: #ifdef KDB kdb_trap(exception, 0, (td->td_frame != NULL) ? td->td_frame : frame); #else panic("No debugger in kernel.\n"); #endif break; case EXCP_UNKNOWN: if (undef_insn(1, frame)) break; /* FALLTHROUGH */ default: print_registers(frame); panic("Unknown kernel exception %x esr_el1 %lx\n", exception, esr); } td->td_frame = oframe; } void do_el0_sync(struct thread *td, struct trapframe *frame) { pcpu_bp_harden bp_harden; uint32_t exception; uint64_t esr, far; /* Check we have a sane environment when entering from userland */ KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS, ("Invalid pcpu address from userland: %p (tpidr %lx)", get_pcpu(), READ_SPECIALREG(tpidr_el1))); esr = frame->tf_esr; exception = ESR_ELx_EXCEPTION(esr); switch (exception) { case EXCP_INSN_ABORT_L: far = READ_SPECIALREG(far_el1); /* * Userspace may be trying to train the branch predictor to * attack the kernel. If we are on a CPU affected by this * call the handler to clear the branch predictor state. */ if (far > VM_MAXUSER_ADDRESS) { bp_harden = PCPU_GET(bp_harden); if (bp_harden != NULL) bp_harden(); } break; case EXCP_UNKNOWN: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: far = READ_SPECIALREG(far_el1); break; } intr_enable(); CTR4(KTR_TRAP, "do_el0_sync: curthread: %p, esr %lx, elr: %lx, frame: %p", td, esr, frame->tf_elr, frame); switch(exception) { case EXCP_FP_SIMD: case EXCP_TRAP_FP: #ifdef VFP vfp_restore_state(); #else panic("VFP exception in userland"); #endif break; case EXCP_SVC32: case EXCP_SVC64: svc_handler(td, frame); break; case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: case EXCP_DATA_ABORT: data_abort(td, frame, esr, far, 1); break; case EXCP_UNKNOWN: if (!undef_insn(0, frame)) call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)far); userret(td, frame); break; case EXCP_SP_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_sp); userret(td, frame); break; case EXCP_PC_ALIGN: call_trapsignal(td, SIGBUS, BUS_ADRALN, (void *)frame->tf_elr); userret(td, frame); break; case EXCP_BRK: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_elr); userret(td, frame); break; case EXCP_MSR: call_trapsignal(td, SIGILL, ILL_PRVOPC, (void *)frame->tf_elr); userret(td, frame); break; case EXCP_SOFTSTP_EL0: td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; WRITE_SPECIALREG(MDSCR_EL1, READ_SPECIALREG(MDSCR_EL1) & ~DBG_MDSCR_SS); call_trapsignal(td, SIGTRAP, TRAP_TRACE, (void *)frame->tf_elr); userret(td, frame); break; default: call_trapsignal(td, SIGBUS, BUS_OBJERR, (void *)frame->tf_elr); userret(td, frame); break; } KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Kernel VFP flags set while entering userspace")); KASSERT( td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate, ("Kernel VFP state in use when entering userspace")); } +/* + * TODO: We will need to handle these later when we support ARMv8.2 RAS. + */ void -do_el0_error(struct trapframe *frame) +do_serror(struct trapframe *frame) { + uint64_t esr, far; - panic("ARM64TODO: do_el0_error"); + far = READ_SPECIALREG(far_el1); + esr = frame->tf_esr; + + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Unhandled System Error"); } +void +unhandled_exception(struct trapframe *frame) +{ + uint64_t esr, far; + + far = READ_SPECIALREG(far_el1); + esr = frame->tf_esr; + + print_registers(frame); + printf(" far: %16lx\n", far); + printf(" esr: %.8lx\n", esr); + panic("Unhandled exception"); +} Index: projects/pnfs-planb-server/sys/arm64/arm64/vm_machdep.c =================================================================== --- projects/pnfs-planb-server/sys/arm64/arm64/vm_machdep.c (revision 334966) +++ projects/pnfs-planb-server/sys/arm64/arm64/vm_machdep.c (revision 334967) @@ -1,276 +1,272 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif -#ifdef DEV_PSCI #include -#endif /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; if ((flags & RFPROC) == 0) return; if (td1 == curthread) { /* * Save the tpidr_el0 and the vfp state, these normally happen * in cpu_switch, but if userland changes these then forks * this may not have happened. */ td1->td_pcb->pcb_tpidr_el0 = READ_SPECIALREG(tpidr_el0); #ifdef VFP if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0) vfp_save_state(td1, td1->td_pcb); #endif } pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); td2->td_proc->p_md.md_l0addr = vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); tf->tf_x[0] = 0; tf->tf_x[1] = 0; tf->tf_spsr = 0; td2->td_frame = tf; /* Set the return value registers for fork() */ td2->td_pcb->pcb_x[8] = (uintptr_t)fork_return; td2->td_pcb->pcb_x[9] = (uintptr_t)td2; td2->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame; td2->td_pcb->pcb_fpusaved = &td2->td_pcb->pcb_fpustate; td2->td_pcb->pcb_vfpcpu = UINT_MAX; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_daif = 0; } void cpu_reset(void) { -#ifdef DEV_PSCI psci_reset(); -#endif printf("cpu_reset failed"); while(1) __asm volatile("wfi" ::: "memory"); } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: frame->tf_elr -= 4; break; case EJUSTRETURN: break; default: frame->tf_spsr |= PSR_C; /* carry bit */ frame->tf_x[0] = error; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_x[8] = (uintptr_t)fork_return; td->td_pcb->pcb_x[9] = (uintptr_t)td; td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; td->td_pcb->pcb_fpusaved = &td->td_pcb->pcb_fpustate; td->td_pcb->pcb_vfpcpu = UINT_MAX; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = 0; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); tf->tf_elr = (register_t)entry; tf->tf_x[0] = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; pcb->pcb_tpidr_el0 = (register_t)tls_base; if (td == curthread) WRITE_SPECIALREG(tpidr_el0, tls_base); return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)STACKALIGN( td->td_pcb - 1); } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_x[8] = (uintptr_t)func; td->td_pcb->pcb_x[9] = (uintptr_t)arg; td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; td->td_pcb->pcb_fpusaved = &td->td_pcb->pcb_fpustate; td->td_pcb->pcb_vfpcpu = UINT_MAX; } void cpu_exit(struct thread *td) { } void swi_vm(void *v) { if (busdma_swi_pending != 0) busdma_swi(); } Index: projects/pnfs-planb-server/sys/arm64/conf/GENERIC =================================================================== --- projects/pnfs-planb-server/sys/arm64/conf/GENERIC (revision 334966) +++ projects/pnfs-planb-server/sys/arm64/conf/GENERIC (revision 334967) @@ -1,265 +1,264 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/arm64 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu ARM64 ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption #options VIMAGE # Subsystem virtualization, e.g. VNET options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_HHOOK # hhook(9) framework for TCP options TCP_OFFLOAD # TCP offload options TCP_RFC7413 # TCP Fast Open options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD32 # Incomplete, but used by cloudabi32.ko. options COMPAT_FREEBSD11 # Compatible with FreeBSD11 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options VFP # Floating-point support options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits options SMP options INTRNG # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. #options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options ALT_BREAK_TO_DEBUGGER # Enter debugger on keyboard escape sequence options USB_DEBUG # enable debug msgs # Kernel dump features. options EKCD # Support for encrypted kernel dumps options GZIO # gzip-compressed kernel and user dumps options ZSTDIO # zstd-compressed kernel and user dumps options NETDUMP # netdump(4) client support # SoC support options SOC_ALLWINNER_A64 options SOC_ALLWINNER_H5 options SOC_CAVM_THUNDERX options SOC_HISI_HI6220 options SOC_BRCM_BCM2837 options SOC_ROCKCHIP_RK3328 # Annapurna Alpine drivers device al_ccu # Alpine Cache Coherency Unit device al_nb_service # Alpine North Bridge Service device al_iofic # I/O Fabric Interrupt Controller device al_serdes # Serializer/Deserializer device al_udma # Universal DMA # Qualcomm Snapdragon drivers device qcom_gcc # Global Clock Controller # VirtIO support device virtio device virtio_pci device virtio_mmio device virtio_blk device vtnet # CPU frequency control device cpufreq # Bus drivers device pci device al_pci # Annapurna Alpine PCI-E options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support # Ethernet NICs device mdio device mii device miibus # MII bus support device awg # Allwinner EMAC Gigabit Ethernet device axgbe # AMD Opteron A1100 integrated NIC device em # Intel PRO/1000 Gigabit Ethernet Family device ix # Intel 10Gb Ethernet Family device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device neta # Marvell Armada 370/38x/XP/3700 NIC device smc # SMSC LAN91C111 device vnic # Cavium ThunderX NIC device al_eth # Annapurna Alpine Ethernet NIC # Block devices device ahci device scbus device da # ATA/SCSI peripherals device pass # Passthrough device (direct ATA/SCSI access) # MMC/SD/SDIO Card slot support device sdhci device aw_mmc # Allwinner SD/MMC controller device mmc # mmc/sd bus device mmcsd # mmc/sd flash cards device dwmmc # Serial (COM) ports device uart # Generic UART driver device uart_msm # Qualcomm MSM UART driver device uart_mvebu # Armada 3700 UART driver device uart_ns8250 # ns8250-type UART driver device uart_snps device pl011 # USB support device aw_ehci # Allwinner EHCI USB interface (USB 2.0) device aw_usbphy # Allwinner USB PHY device dwcotg # DWC OTG controller device ohci # OHCI USB interface device ehci # EHCI USB interface (USB 2.0) device ehci_mv # Marvell EHCI USB interface device xhci # XHCI PCI->USB interface (USB 3.0) device xhci_mv # Marvell XHCI USB interface device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # USB ethernet support device smcphy device smsc # GPIO device aw_gpio # Allwinner GPIO controller device gpio device gpioled device fdt_pinctrl # I2C device aw_rsb # Allwinner Reduced Serial Bus device bcm2835_bsc # Broadcom BCM283x I2C bus device iicbus device iic device twsi # Allwinner I2C controller # Clock and reset controllers device aw_ccu # Allwinner clock controller # Interrupt controllers device aw_nmi # Allwinner NMI support # Real-time clock support device aw_rtc # Allwinner Real-time Clock device mv_rtc # Marvell Real-time Clock # Watchdog controllers device aw_wdog # Allwinner Watchdog # Power management controllers device axp81x # X-Powers AXP81x PMIC # EFUSE device aw_sid # Allwinner Secure ID EFUSE # Thermal sensors device aw_thermal # Allwinner Thermal Sensor Controller # SPI device spibus device bcm2835_spi # Broadcom BCM283x SPI bus # Console device vt device kbdmux # Pseudo devices. device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module -device psci # Support for ARM PSCI options EFIRT # EFI Runtime Services # EXT_RESOURCES pseudo devices options EXT_RESOURCES device clk device phy device hwreset device regulator device syscon # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # Chip-specific errata options THUNDERX_PASS_1_1_ERRATA options FDT device acpi # The crypto framework is required by IPSEC device crypto # Required by IPSEC Index: projects/pnfs-planb-server/sys/compat/linuxkpi/common/include/asm/uaccess.h =================================================================== --- projects/pnfs-planb-server/sys/compat/linuxkpi/common/include/asm/uaccess.h (revision 334966) +++ projects/pnfs-planb-server/sys/compat/linuxkpi/common/include/asm/uaccess.h (revision 334967) @@ -1,55 +1,68 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ASM_UACCESS_H_ #define _ASM_UACCESS_H_ #include static inline long copy_to_user(void *to, const void *from, unsigned long n) { if (linux_copyout(from, to, n) != 0) return n; return 0; } #define __copy_to_user(...) copy_to_user(__VA_ARGS__) static inline long copy_from_user(void *to, const void *from, unsigned long n) { if (linux_copyin(from, to, n) != 0) return n; return 0; } #define __copy_from_user(...) copy_from_user(__VA_ARGS__) #define __copy_in_user(...) copy_from_user(__VA_ARGS__) +#define user_access_begin() do { } while (0) +#define user_access_end() do { } while (0) + +#define unsafe_get_user(x, ptr, err) do { \ + if (unlikely(__get_user(x, ptr))) \ + goto err; \ +} while (0) + +#define unsafe_put_user(x, ptr, err) do { \ + if (unlikely(__put_user(x, ptr))) \ + goto err; \ +} while (0) + #endif /* _ASM_UACCESS_H_ */ Index: projects/pnfs-planb-server/sys/compat/linuxkpi/common/include/linux/kernel.h =================================================================== --- projects/pnfs-planb-server/sys/compat/linuxkpi/common/include/linux/kernel.h (revision 334966) +++ projects/pnfs-planb-server/sys/compat/linuxkpi/common/include/linux/kernel.h (revision 334967) @@ -1,460 +1,502 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * Copyright (c) 2014-2015 François Tigeot * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_KERNEL_H_ #define _LINUX_KERNEL_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include + #include +#include #include #define KERN_CONT "" #define KERN_EMERG "<0>" #define KERN_ALERT "<1>" #define KERN_CRIT "<2>" #define KERN_ERR "<3>" #define KERN_WARNING "<4>" #define KERN_NOTICE "<5>" #define KERN_INFO "<6>" #define KERN_DEBUG "<7>" #define U8_MAX ((u8)~0U) #define S8_MAX ((s8)(U8_MAX >> 1)) #define S8_MIN ((s8)(-S8_MAX - 1)) #define U16_MAX ((u16)~0U) #define S16_MAX ((s16)(U16_MAX >> 1)) #define S16_MIN ((s16)(-S16_MAX - 1)) #define U32_MAX ((u32)~0U) #define S32_MAX ((s32)(U32_MAX >> 1)) #define S32_MIN ((s32)(-S32_MAX - 1)) #define U64_MAX ((u64)~0ULL) #define S64_MAX ((s64)(U64_MAX >> 1)) #define S64_MIN ((s64)(-S64_MAX - 1)) #define S8_C(x) x #define U8_C(x) x ## U #define S16_C(x) x #define U16_C(x) x ## U #define S32_C(x) x #define U32_C(x) x ## U #define S64_C(x) x ## LL #define U64_C(x) x ## ULL #define BUILD_BUG() do { CTASSERT(0); } while (0) #define BUILD_BUG_ON(x) CTASSERT(!(x)) #define BUILD_BUG_ON_MSG(x, msg) BUILD_BUG_ON(x) #define BUILD_BUG_ON_NOT_POWER_OF_2(x) BUILD_BUG_ON(!powerof2(x)) #define BUILD_BUG_ON_INVALID(expr) while (0) { (void)(expr); } #define BUG() panic("BUG at %s:%d", __FILE__, __LINE__) #define BUG_ON(cond) do { \ if (cond) { \ panic("BUG ON %s failed at %s:%d", \ __stringify(cond), __FILE__, __LINE__); \ } \ } while (0) #define WARN_ON(cond) ({ \ bool __ret = (cond); \ if (__ret) { \ printf("WARNING %s failed at %s:%d\n", \ __stringify(cond), __FILE__, __LINE__); \ } \ unlikely(__ret); \ }) #define WARN_ON_SMP(cond) WARN_ON(cond) #define WARN_ON_ONCE(cond) ({ \ static bool __warn_on_once; \ bool __ret = (cond); \ if (__ret && !__warn_on_once) { \ __warn_on_once = 1; \ printf("WARNING %s failed at %s:%d\n", \ __stringify(cond), __FILE__, __LINE__); \ } \ unlikely(__ret); \ }) #define oops_in_progress SCHEDULER_STOPPED() #undef ALIGN #define ALIGN(x, y) roundup2((x), (y)) #undef PTR_ALIGN #define PTR_ALIGN(p, a) ((__typeof(p))ALIGN((uintptr_t)(p), (a))) #define DIV_ROUND_UP(x, n) howmany(x, n) #define DIV_ROUND_UP_ULL(x, n) DIV_ROUND_UP((unsigned long long)(x), (n)) #define FIELD_SIZEOF(t, f) sizeof(((t *)0)->f) #define printk(...) printf(__VA_ARGS__) #define vprintk(f, a) vprintf(f, a) struct va_format { const char *fmt; va_list *va; }; static inline int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) { ssize_t ssize = size; int i; i = vsnprintf(buf, size, fmt, args); return ((i >= ssize) ? (ssize - 1) : i); } static inline int scnprintf(char *buf, size_t size, const char *fmt, ...) { va_list args; int i; va_start(args, fmt); i = vscnprintf(buf, size, fmt, args); va_end(args); return (i); } /* * The "pr_debug()" and "pr_devel()" macros should produce zero code * unless DEBUG is defined: */ #ifdef DEBUG #define pr_debug(fmt, ...) \ log(LOG_DEBUG, fmt, ##__VA_ARGS__) #define pr_devel(fmt, ...) \ log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ ({ if (0) log(LOG_DEBUG, fmt, ##__VA_ARGS__); 0; }) #define pr_devel(fmt, ...) \ ({ if (0) log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__); 0; }) #endif #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #define printk_once(...) do { \ static bool __print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk(__VA_ARGS__); \ } \ } while (0) /* * Log a one-time message (analogous to WARN_ONCE() et al): */ #define log_once(level,...) do { \ static bool __log_once; \ \ if (unlikely(!__log_once)) { \ __log_once = true; \ log(level, __VA_ARGS__); \ } \ } while (0) #define pr_emerg(fmt, ...) \ log(LOG_EMERG, pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert(fmt, ...) \ log(LOG_ALERT, pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit(fmt, ...) \ log(LOG_CRIT, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ log(LOG_ERR, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ log(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn(...) \ pr_warning(__VA_ARGS__) #define pr_warn_once(fmt, ...) \ log_once(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice(fmt, ...) \ log(LOG_NOTICE, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ log(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ log_once(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__) #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) #define pr_warn_ratelimited(...) do { \ static linux_ratelimit_t __ratelimited; \ if (linux_ratelimited(&__ratelimited)) \ pr_warning(__VA_ARGS__); \ } while (0) #ifndef WARN #define WARN(condition, ...) ({ \ bool __ret_warn_on = (condition); \ if (unlikely(__ret_warn_on)) \ pr_warning(__VA_ARGS__); \ unlikely(__ret_warn_on); \ }) #endif #ifndef WARN_ONCE #define WARN_ONCE(condition, ...) ({ \ bool __ret_warn_on = (condition); \ if (unlikely(__ret_warn_on)) \ pr_warn_once(__VA_ARGS__); \ unlikely(__ret_warn_on); \ }) #endif #define container_of(ptr, type, member) \ ({ \ const __typeof(((type *)0)->member) *__p = (ptr); \ (type *)((uintptr_t)__p - offsetof(type, member)); \ }) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define u64_to_user_ptr(val) ((void *)(uintptr_t)(val)) static inline unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { return (strtouq(cp, endp, base)); } static inline long long simple_strtoll(const char *cp, char **endp, unsigned int base) { return (strtoq(cp, endp, base)); } static inline unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) { return (strtoul(cp, endp, base)); } static inline long simple_strtol(const char *cp, char **endp, unsigned int base) { return (strtol(cp, endp, base)); } static inline int kstrtoul(const char *cp, unsigned int base, unsigned long *res) { char *end; *res = strtoul(cp, &end, base); /* skip newline character, if any */ if (*end == '\n') end++; if (*cp == 0 || *end != 0) return (-EINVAL); return (0); } static inline int kstrtol(const char *cp, unsigned int base, long *res) { char *end; *res = strtol(cp, &end, base); /* skip newline character, if any */ if (*end == '\n') end++; if (*cp == 0 || *end != 0) return (-EINVAL); return (0); } static inline int kstrtoint(const char *cp, unsigned int base, int *res) { char *end; long temp; *res = temp = strtol(cp, &end, base); /* skip newline character, if any */ if (*end == '\n') end++; if (*cp == 0 || *end != 0) return (-EINVAL); if (temp != (int)temp) return (-ERANGE); return (0); } static inline int kstrtouint(const char *cp, unsigned int base, unsigned int *res) { char *end; unsigned long temp; *res = temp = strtoul(cp, &end, base); /* skip newline character, if any */ if (*end == '\n') end++; if (*cp == 0 || *end != 0) return (-EINVAL); if (temp != (unsigned int)temp) return (-ERANGE); return (0); } static inline int kstrtou32(const char *cp, unsigned int base, u32 *res) { char *end; unsigned long temp; *res = temp = strtoul(cp, &end, base); /* skip newline character, if any */ if (*end == '\n') end++; if (*cp == 0 || *end != 0) return (-EINVAL); if (temp != (u32)temp) return (-ERANGE); return (0); +} + +static inline int +kstrtobool(const char *s, bool *res) +{ + int len; + + if (s == NULL || (len = strlen(s)) == 0 || res == NULL) + return (-EINVAL); + + /* skip newline character, if any */ + if (s[len - 1] == '\n') + len--; + + if (len == 1 && strchr("yY1", s[0]) != NULL) + *res = true; + else if (len == 1 && strchr("nN0", s[0]) != NULL) + *res = false; + else if (strncasecmp("on", s, len) == 0) + *res = true; + else if (strncasecmp("off", s, len) == 0) + *res = false; + else + return (-EINVAL); + + return (0); +} + +static inline int +kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + char buf[8] = {}; + + if (count > (sizeof(buf) - 1)) + count = (sizeof(buf) - 1); + + if (copy_from_user(buf, s, count)) + return (-EFAULT); + + return (kstrtobool(buf, res)); } #define min(x, y) ((x) < (y) ? (x) : (y)) #define max(x, y) ((x) > (y) ? (x) : (y)) #define min3(a, b, c) min(a, min(b,c)) #define max3(a, b, c) max(a, max(b,c)) #define min_t(type, x, y) ({ \ type __min1 = (x); \ type __min2 = (y); \ __min1 < __min2 ? __min1 : __min2; }) #define max_t(type, x, y) ({ \ type __max1 = (x); \ type __max2 = (y); \ __max1 > __max2 ? __max1 : __max2; }) #define clamp_t(type, _x, min, max) min_t(type, max_t(type, _x, min), max) #define clamp(x, lo, hi) min( max(x,lo), hi) #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be * as wide as the result!), and we want to evaluate the macro * arguments just once each. */ #define __round_mask(x, y) ((__typeof__(x))((y)-1)) #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define smp_processor_id() PCPU_GET(cpuid) #define num_possible_cpus() mp_ncpus #define num_online_cpus() mp_ncpus #if defined(__i386__) || defined(__amd64__) extern bool linux_cpu_has_clflush; #define cpu_has_clflush linux_cpu_has_clflush #endif typedef struct pm_message { int event; } pm_message_t; /* Swap values of a and b */ #define swap(a, b) do { \ typeof(a) _swap_tmp = a; \ a = b; \ b = _swap_tmp; \ } while (0) #define DIV_ROUND_CLOSEST(x, divisor) (((x) + ((divisor) / 2)) / (divisor)) #define DIV_ROUND_CLOSEST_ULL(x, divisor) ({ \ __typeof(divisor) __d = (divisor); \ unsigned long long __ret = (x) + (__d) / 2; \ __ret /= __d; \ __ret; \ }) static inline uintmax_t mult_frac(uintmax_t x, uintmax_t multiplier, uintmax_t divisor) { uintmax_t q = (x / divisor); uintmax_t r = (x % divisor); return ((q * multiplier) + ((r * multiplier) / divisor)); } static inline int64_t abs64(int64_t x) { return (x < 0 ? -x : x); } typedef struct linux_ratelimit { struct timeval lasttime; int counter; } linux_ratelimit_t; static inline bool linux_ratelimited(linux_ratelimit_t *rl) { return (ppsratecheck(&rl->lasttime, &rl->counter, 1)); } #endif /* _LINUX_KERNEL_H_ */ Index: projects/pnfs-planb-server/sys/conf/files.arm64 =================================================================== --- projects/pnfs-planb-server/sys/conf/files.arm64 (revision 334966) +++ projects/pnfs-planb-server/sys/conf/files.arm64 (revision 334967) @@ -1,256 +1,256 @@ # $FreeBSD$ cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_armv6_on_64bit.S" \ compile-with "${CC} -x assembler-with-cpp -m32 -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_armv6_on_64bit.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # cloudabi64_vdso.o optional compat_cloudabi64 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_aarch64.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_aarch64.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi64_vdso.o" # cloudabi64_vdso_blob.o optional compat_cloudabi64 \ dependency "cloudabi64_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi64_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi64_vdso_blob.o" # # Allwinner common files arm/allwinner/a10_ehci.c optional ehci aw_ehci fdt arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt arm/allwinner/aw_mmc.c optional mmc aw_mmc fdt arm/allwinner/aw_nmi.c optional aw_nmi fdt \ compile-with "${NORMAL_C} -I$S/gnu/dts/include" arm/allwinner/aw_rsb.c optional aw_rsb fdt arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_sid.c optional aw_sid fdt arm/allwinner/aw_spi.c optional aw_spi fdt arm/allwinner/aw_thermal.c optional aw_thermal fdt arm/allwinner/aw_usbphy.c optional ehci aw_usbphy fdt arm/allwinner/aw_wdog.c optional aw_wdog fdt arm/allwinner/axp81x.c optional axp81x fdt arm/allwinner/if_awg.c optional awg ext_resources syscon fdt # Allwinner clock driver arm/allwinner/clkng/aw_ccung.c optional aw_ccu fdt arm/allwinner/clkng/aw_clk_nkmp.c optional aw_ccu fdt arm/allwinner/clkng/aw_clk_nm.c optional aw_ccu fdt arm/allwinner/clkng/aw_clk_prediv_mux.c optional aw_ccu fdt arm/allwinner/clkng/ccu_a64.c optional soc_allwinner_a64 aw_ccu fdt arm/allwinner/clkng/ccu_h3.c optional soc_allwinner_h5 aw_ccu fdt arm/allwinner/clkng/ccu_sun8i_r.c optional aw_ccu fdt # Allwinner padconf files arm/allwinner/a64/a64_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/a64/a64_r_padconf.c optional soc_allwinner_a64 fdt arm/allwinner/h3/h3_padconf.c optional soc_allwinner_h5 fdt arm/allwinner/h3/h3_r_padconf.c optional soc_allwinner_h5 fdt arm/annapurna/alpine/alpine_ccu.c optional al_ccu fdt arm/annapurna/alpine/alpine_nb_service.c optional al_nb_service fdt arm/annapurna/alpine/alpine_pci.c optional al_pci fdt arm/annapurna/alpine/alpine_pci_msix.c optional al_pci fdt arm/annapurna/alpine/alpine_serdes.c optional al_serdes fdt \ no-depend \ compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}" arm/arm/generic_timer.c standard arm/arm/gic.c standard arm/arm/gic_acpi.c optional acpi arm/arm/gic_fdt.c optional fdt arm/arm/pmu.c standard arm/arm/physmem.c standard arm/broadcom/bcm2835/bcm2835_audio.c optional sound vchiq fdt \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" arm/broadcom/bcm2835/bcm2835_bsc.c optional bcm2835_bsc soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_cpufreq.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_dma.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_fbd.c optional vt soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_ft5406.c optional evdev bcm2835_ft5406 soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_gpio.c optional gpio soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_intr.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_mbox.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_rng.c optional random soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_sdhci.c optional sdhci soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_spi.c optional bcm2835_spi soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_vcio.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2835_wdog.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm2836.c optional soc_brcm_bcm2837 fdt arm/broadcom/bcm2835/bcm283x_dwc_fdt.c optional dwcotg fdt soc_brcm_bcm2837 arm/mv/armada38x/armada38x_rtc.c optional mv_rtc fdt arm64/acpica/acpi_machdep.c optional acpi arm64/acpica/OsdEnvironment.c optional acpi arm64/acpica/acpi_wakeup.c optional acpi arm64/acpica/pci_cfgreg.c optional acpi pci arm64/arm64/autoconf.c standard arm64/arm64/bus_machdep.c standard arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard arm64/arm64/bzero.S standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/copystr.c standard arm64/arm64/cpu_errata.c standard arm64/arm64/cpufunc_asm.S standard arm64/arm64/db_disasm.c optional ddb arm64/arm64/db_interface.c optional ddb arm64/arm64/db_trace.c optional ddb arm64/arm64/debug_monitor.c optional ddb arm64/arm64/disassem.c optional ddb arm64/arm64/dump_machdep.c standard arm64/arm64/efirt_machdep.c optional efirt arm64/arm64/elf32_machdep.c optional compat_freebsd32 arm64/arm64/elf_machdep.c standard arm64/arm64/exception.S standard arm64/arm64/freebsd32_machdep.c optional compat_freebsd32 arm64/arm64/gicv3_its.c optional intrng fdt arm64/arm64/gic_v3.c standard arm64/arm64/gic_v3_acpi.c optional acpi arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/identcpu.c standard arm64/arm64/in_cksum.c optional inet | inet6 arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/mem.c standard arm64/arm64/memcpy.S standard arm64/arm64/memmove.S standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp arm64/arm64/nexus.c standard arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pmap.c standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/support.S standard arm64/arm64/swtch.S standard arm64/arm64/sys_machdep.c standard arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/uma_machdep.c standard arm64/arm64/undefined.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard arm64/cavium/thunder_pcie_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_pem.c optional soc_cavm_thunderx pci arm64/cavium/thunder_pcie_pem_fdt.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_common.c optional soc_cavm_thunderx pci arm64/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 arm64/cloudabi64/cloudabi64_sysvec.c optional compat_cloudabi64 arm64/coresight/coresight.c standard arm64/coresight/coresight_if.m standard arm64/coresight/coresight-cmd.c standard arm64/coresight/coresight-cpu-debug.c standard arm64/coresight/coresight-dynamic-replicator.c standard arm64/coresight/coresight-etm4x.c standard arm64/coresight/coresight-funnel.c standard arm64/coresight/coresight-tmc.c standard arm64/qualcomm/qcom_gcc.c optional qcom_gcc fdt contrib/vchiq/interface/compat/vchi_bsd.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_arm.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_connected.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_core.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_shim.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" contrib/vchiq/interface/vchiq_arm/vchiq_util.c optional vchiq soc_brcm_bcm2837 \ compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq" crypto/armv8/armv8_crypto.c optional armv8crypto armv8_crypto_wrap.o optional armv8crypto \ dependency "$S/crypto/armv8/armv8_crypto_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8-a+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "armv8_crypto_wrap.o" crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/acpica/acpi_bus_if.m optional acpi dev/acpica/acpi_if.m optional acpi dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/ahci/ahci_generic.c optional ahci dev/axgbe/if_axgbe.c optional axgbe dev/axgbe/xgbe-desc.c optional axgbe dev/axgbe/xgbe-dev.c optional axgbe dev/axgbe/xgbe-drv.c optional axgbe dev/axgbe/xgbe-mdio.c optional axgbe dev/cpufreq/cpufreq_dt.c optional cpufreq fdt dev/iicbus/twsi/a10_twsi.c optional twsi fdt dev/iicbus/twsi/twsi.c optional twsi fdt dev/hwpmc/hwpmc_arm64.c optional hwpmc dev/hwpmc/hwpmc_arm64_md.c optional hwpmc dev/mbox/mbox_if.m optional soc_brcm_bcm2837 dev/mmc/host/dwmmc.c optional dwmmc fdt dev/mmc/host/dwmmc_hisi.c optional dwmmc fdt soc_hisi_hi6220 dev/mmc/host/dwmmc_rockchip.c optional dwmmc fdt soc_rockchip_rk3328 dev/neta/if_mvneta_fdt.c optional neta fdt dev/neta/if_mvneta.c optional neta mdio mii dev/ofw/ofw_cpu.c optional fdt dev/ofw/ofwpci.c optional fdt pci dev/pci/pci_host_generic.c optional pci dev/pci/pci_host_generic_acpi.c optional pci acpi dev/pci/pci_host_generic_fdt.c optional pci fdt -dev/psci/psci.c optional psci -dev/psci/psci_arm64.S optional psci +dev/psci/psci.c standard +dev/psci/psci_arm64.S standard dev/uart/uart_cpu_arm64.c optional uart dev/uart/uart_dev_pl011.c optional uart pl011 dev/usb/controller/dwc_otg_hisi.c optional dwcotg fdt soc_hisi_hi6220 dev/usb/controller/ehci_mv.c optional ehci_mv fdt dev/usb/controller/generic_ehci.c optional ehci acpi dev/usb/controller/generic_ohci.c optional ohci fdt dev/usb/controller/generic_usb_if.m optional ohci fdt dev/usb/controller/xhci_mv.c optional xhci_mv fdt dev/vnic/mrml_bridge.c optional vnic fdt dev/vnic/nic_main.c optional vnic pci dev/vnic/nicvf_main.c optional vnic pci pci_iov dev/vnic/nicvf_queues.c optional vnic pci pci_iov dev/vnic/thunder_bgx_fdt.c optional vnic fdt dev/vnic/thunder_bgx.c optional vnic pci dev/vnic/thunder_mdio_fdt.c optional vnic fdt dev/vnic/thunder_mdio.c optional vnic dev/vnic/lmac_if.m optional inet | inet6 | vnic kern/kern_clocksource.c standard kern/msi_if.m optional intrng kern/pic_if.m optional intrng kern/subr_devmap.c standard kern/subr_intr.c optional intrng libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memset.c standard libkern/arm64/crc32c_armv8.S standard cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/aarch64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/aarch64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/aarch64/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" arm64/rockchip/rk_grf.c optional fdt soc_rockchip_rk3328 arm64/rockchip/rk_pinctrl.c optional fdt soc_rockchip_rk3328 arm64/rockchip/rk_gpio.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk_cru.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk_clk_armclk.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk_clk_composite.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk_clk_gate.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk_clk_mux.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk_clk_pll.c optional fdt soc_rockchip_rk3328 arm64/rockchip/clk/rk3328_cru.c optional fdt soc_rockchip_rk3328 Index: projects/pnfs-planb-server/sys/conf/options.arm64 =================================================================== --- projects/pnfs-planb-server/sys/conf/options.arm64 (revision 334966) +++ projects/pnfs-planb-server/sys/conf/options.arm64 (revision 334967) @@ -1,25 +1,22 @@ # $FreeBSD$ ARM64 opt_global.h INTRNG opt_global.h SOCDEV_PA opt_global.h SOCDEV_VA opt_global.h THUNDERX_PASS_1_1_ERRATA opt_global.h VFP opt_global.h # Binary compatibility COMPAT_FREEBSD32 opt_global.h # EFI Runtime services support EFIRT opt_efirt.h -# Devices -DEV_PSCI opt_platform.h - # SoC Support SOC_ALLWINNER_A64 opt_soc.h SOC_ALLWINNER_H5 opt_soc.h SOC_BRCM_BCM2837 opt_soc.h SOC_CAVM_THUNDERX opt_soc.h SOC_HISI_HI6220 opt_soc.h SOC_ROCKCHIP_RK3328 opt_soc.h Index: projects/pnfs-planb-server/sys/dev/atkbdc/psm.c =================================================================== --- projects/pnfs-planb-server/sys/dev/atkbdc/psm.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/atkbdc/psm.c (revision 334967) @@ -1,7310 +1,7292 @@ /*- * Copyright (c) 1992, 1993 Erik Forsberg. * Copyright (c) 1996, 1997 Kazutaka YOKOTA. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL I BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Ported to 386bsd Oct 17, 1992 * Sandi Donno, Computer Science, University of Cape Town, South Africa * Please send bug reports to sandi@cs.uct.ac.za * * Thanks are also due to Rick Macklem, rick@snowhite.cis.uoguelph.ca - * although I was only partially successful in getting the alpha release * of his "driver for the Logitech and ATI Inport Bus mice for use with * 386bsd and the X386 port" to work with my Microsoft mouse, I nevertheless * found his code to be an invaluable reference when porting this driver * to 386bsd. * * Further modifications for latest 386BSD+patchkit and port to NetBSD, * Andrew Herbert - 8 June 1993 * * Cloned from the Microsoft Bus Mouse driver, also by Erik Forsberg, by * Andrew Herbert - 12 June 1993 * * Modified for PS/2 mouse by Charles Hannum * - 13 June 1993 * * Modified for PS/2 AUX mouse by Shoji Yuen * - 24 October 1993 * * Hardware access routines and probe logic rewritten by * Kazutaka Yokota * - 3, 14, 22 October 1996. * - 12 November 1996. IOCTLs and rearranging `psmread', `psmioctl'... * - 14, 30 November 1996. Uses `kbdio.c'. * - 13 December 1996. Uses queuing version of `kbdio.c'. * - January/February 1997. Tweaked probe logic for * HiNote UltraII/Latitude/Armada laptops. * - 30 July 1997. Added APM support. * - 5 March 1997. Defined driver configuration flags (PSM_CONFIG_XXX). * Improved sync check logic. * Vendor specific support routines. */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_psm.h" #include "opt_evdev.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #endif #ifdef EVDEV_SUPPORT #include #include #endif #include #include /* * Driver specific options: the following options may be set by * `options' statements in the kernel configuration file. */ /* debugging */ #ifndef PSM_DEBUG #define PSM_DEBUG 0 /* * logging: 0: none, 1: brief, 2: verbose * 3: sync errors, 4: all packets */ #endif #define VLOG(level, args) do { \ if (verbose >= level) \ log args; \ } while (0) #ifndef PSM_INPUT_TIMEOUT #define PSM_INPUT_TIMEOUT 2000000 /* 2 sec */ #endif #ifndef PSM_TAP_TIMEOUT #define PSM_TAP_TIMEOUT 125000 #endif #ifndef PSM_TAP_THRESHOLD #define PSM_TAP_THRESHOLD 25 #endif /* end of driver specific options */ #define PSMCPNP_DRIVER_NAME "psmcpnp" struct psmcpnp_softc { enum { PSMCPNP_GENERIC, PSMCPNP_FORCEPAD, PSMCPNP_HPSYN81, } type; /* Based on PnP ID */ }; /* input queue */ #define PSM_BUFSIZE 960 #define PSM_SMALLBUFSIZE 240 /* operation levels */ #define PSM_LEVEL_BASE 0 #define PSM_LEVEL_STANDARD 1 #define PSM_LEVEL_NATIVE 2 #define PSM_LEVEL_MIN PSM_LEVEL_BASE #define PSM_LEVEL_MAX PSM_LEVEL_NATIVE /* Logitech PS2++ protocol */ #define MOUSE_PS2PLUS_CHECKBITS(b) \ ((((b[2] & 0x03) << 2) | 0x02) == (b[1] & 0x0f)) #define MOUSE_PS2PLUS_PACKET_TYPE(b) \ (((b[0] & 0x30) >> 2) | ((b[1] & 0x30) >> 4)) /* ring buffer */ typedef struct ringbuf { int count; /* # of valid elements in the buffer */ int head; /* head pointer */ int tail; /* tail poiner */ u_char buf[PSM_BUFSIZE]; } ringbuf_t; /* data buffer */ typedef struct packetbuf { u_char ipacket[16]; /* interim input buffer */ int inputbytes; /* # of bytes in the input buffer */ } packetbuf_t; #ifndef PSM_PACKETQUEUE #define PSM_PACKETQUEUE 128 #endif /* * Typical bezel limits. Taken from 'Synaptics * PS/2 TouchPad Interfacing Guide' p.3.2.3. */ #define SYNAPTICS_DEFAULT_MAX_X 5472 #define SYNAPTICS_DEFAULT_MAX_Y 4448 #define SYNAPTICS_DEFAULT_MIN_X 1472 #define SYNAPTICS_DEFAULT_MIN_Y 1408 typedef struct synapticsinfo { struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; int directional_scrolls; int two_finger_scroll; int min_pressure; int max_pressure; int max_width; int margin_top; int margin_right; int margin_bottom; int margin_left; int na_top; int na_right; int na_bottom; int na_left; int window_min; int window_max; int multiplicator; int weight_current; int weight_previous; int weight_previous_na; int weight_len_squared; int div_min; int div_max; int div_max_na; int div_len; int tap_max_delta; int tap_min_queue; int taphold_timeout; int vscroll_ver_area; int vscroll_hor_area; int vscroll_min_delta; int vscroll_div_min; int vscroll_div_max; int touchpad_off; int softbuttons_y; int softbutton2_x; int softbutton3_x; int max_x; int max_y; } synapticsinfo_t; typedef struct synapticspacket { int x; int y; } synapticspacket_t; #define SYNAPTICS_PACKETQUEUE 10 #define SYNAPTICS_QUEUE_CURSOR(x) \ (x + SYNAPTICS_PACKETQUEUE) % SYNAPTICS_PACKETQUEUE #define SYNAPTICS_VERSION_GE(synhw, major, minor) \ ((synhw).infoMajor > (major) || \ ((synhw).infoMajor == (major) && (synhw).infoMinor >= (minor))) typedef struct smoother { synapticspacket_t queue[SYNAPTICS_PACKETQUEUE]; int queue_len; int queue_cursor; int start_x; int start_y; int avg_dx; int avg_dy; int squelch_x; int squelch_y; int is_fuzzy; int active; } smoother_t; typedef struct gesture { int window_min; int fingers_nb; int tap_button; int in_taphold; int in_vscroll; int zmax; /* maximum pressure value */ struct timeval taptimeout; /* tap timeout for touchpads */ } gesture_t; enum { TRACKPOINT_SYSCTL_SENSITIVITY, TRACKPOINT_SYSCTL_NEGATIVE_INERTIA, TRACKPOINT_SYSCTL_UPPER_PLATEAU, TRACKPOINT_SYSCTL_BACKUP_RANGE, TRACKPOINT_SYSCTL_DRAG_HYSTERESIS, TRACKPOINT_SYSCTL_MINIMUM_DRAG, TRACKPOINT_SYSCTL_UP_THRESHOLD, TRACKPOINT_SYSCTL_THRESHOLD, TRACKPOINT_SYSCTL_JENKS_CURVATURE, TRACKPOINT_SYSCTL_Z_TIME, TRACKPOINT_SYSCTL_PRESS_TO_SELECT, TRACKPOINT_SYSCTL_SKIP_BACKUPS }; typedef struct trackpointinfo { struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; int sensitivity; int inertia; int uplateau; int reach; int draghys; int mindrag; int upthresh; int threshold; int jenks; int ztime; int pts; int skipback; } trackpointinfo_t; typedef struct finger { int x; int y; int p; int w; int flags; } finger_t; #define PSM_FINGERS 2 /* # of processed fingers */ #define PSM_FINGER_IS_PEN (1<<0) #define PSM_FINGER_FUZZY (1<<1) #define PSM_FINGER_DEFAULT_P tap_threshold #define PSM_FINGER_DEFAULT_W 1 #define PSM_FINGER_IS_SET(f) ((f).x != -1 && (f).y != -1 && (f).p != 0) #define PSM_FINGER_RESET(f) do { \ (f) = (finger_t) { .x = -1, .y = -1, .p = 0, .w = 0, .flags = 0 }; \ } while (0) typedef struct elantechhw { int hwversion; int fwversion; int sizex; int sizey; int dpmmx; int dpmmy; int ntracesx; int ntracesy; int dptracex; int dptracey; int issemimt; int isclickpad; int hascrc; int hastrackpoint; int haspressure; } elantechhw_t; /* minimum versions supported by this driver */ #define ELANTECH_HW_IS_V1(fwver) ((fwver) < 0x020030 || (fwver) == 0x020600) #define ELANTECH_MAGIC(magic) \ ((magic)[0] == 0x3c && (magic)[1] == 0x03 && \ ((magic)[2] == 0xc8 || (magic)[2] == 0x00)) #define ELANTECH_FW_ID 0x00 #define ELANTECH_FW_VERSION 0x01 #define ELANTECH_CAPABILITIES 0x02 #define ELANTECH_SAMPLE 0x03 #define ELANTECH_RESOLUTION 0x04 #define ELANTECH_REG_READ 0x10 #define ELANTECH_REG_WRITE 0x11 #define ELANTECH_REG_RDWR 0x00 #define ELANTECH_CUSTOM_CMD 0xf8 #ifdef EVDEV_SUPPORT #define ELANTECH_MAX_FINGERS 5 #else #define ELANTECH_MAX_FINGERS PSM_FINGERS #endif #define ELANTECH_FINGER_MAX_P 255 #define ELANTECH_FINGER_MAX_W 15 #define ELANTECH_FINGER_SET_XYP(pb) (finger_t) { \ .x = (((pb)->ipacket[1] & 0x0f) << 8) | (pb)->ipacket[2], \ .y = (((pb)->ipacket[4] & 0x0f) << 8) | (pb)->ipacket[5], \ .p = ((pb)->ipacket[1] & 0xf0) | (((pb)->ipacket[4] >> 4) & 0x0f), \ .w = PSM_FINGER_DEFAULT_W, \ .flags = 0 \ } enum { ELANTECH_PKT_NOP, ELANTECH_PKT_TRACKPOINT, ELANTECH_PKT_V2_COMMON, ELANTECH_PKT_V2_2FINGER, ELANTECH_PKT_V3, ELANTECH_PKT_V4_STATUS, ELANTECH_PKT_V4_HEAD, ELANTECH_PKT_V4_MOTION }; #define ELANTECH_PKT_IS_TRACKPOINT(pb) (((pb)->ipacket[3] & 0x0f) == 0x06) #define ELANTECH_PKT_IS_DEBOUNCE(pb, hwversion) ((hwversion) == 4 ? 0 : \ (pb)->ipacket[0] == ((hwversion) == 2 ? 0x84 : 0xc4) && \ (pb)->ipacket[1] == 0xff && (pb)->ipacket[2] == 0xff && \ (pb)->ipacket[3] == 0x02 && (pb)->ipacket[4] == 0xff && \ (pb)->ipacket[5] == 0xff) #define ELANTECH_PKT_IS_V2(pb) \ (((pb)->ipacket[0] & 0x0c) == 0x04 && ((pb)->ipacket[3] & 0x0f) == 0x02) #define ELANTECH_PKT_IS_V3_HEAD(pb, hascrc) ((hascrc) ? \ ((pb)->ipacket[3] & 0x09) == 0x08 : \ ((pb)->ipacket[0] & 0x0c) == 0x04 && ((pb)->ipacket[3] & 0xcf) == 0x02) #define ELANTECH_PKT_IS_V3_TAIL(pb, hascrc) ((hascrc) ? \ ((pb)->ipacket[3] & 0x09) == 0x09 : \ ((pb)->ipacket[0] & 0x0c) == 0x0c && ((pb)->ipacket[3] & 0xce) == 0x0c) #define ELANTECH_PKT_IS_V4(pb, hascrc) ((hascrc) ? \ ((pb)->ipacket[3] & 0x08) == 0x00 : \ ((pb)->ipacket[0] & 0x0c) == 0x04 && ((pb)->ipacket[3] & 0x1c) == 0x10) typedef struct elantechaction { finger_t fingers[ELANTECH_MAX_FINGERS]; int mask; int mask_v4wait; } elantechaction_t; /* driver control block */ struct psm_softc { /* Driver status information */ int unit; struct selinfo rsel; /* Process selecting for Input */ u_char state; /* Mouse driver state */ int config; /* driver configuration flags */ int flags; /* other flags */ KBDC kbdc; /* handle to access kbd controller */ struct resource *intr; /* IRQ resource */ void *ih; /* interrupt handle */ mousehw_t hw; /* hardware information */ synapticshw_t synhw; /* Synaptics hardware information */ synapticsinfo_t syninfo; /* Synaptics configuration */ smoother_t smoother[PSM_FINGERS]; /* Motion smoothing */ gesture_t gesture; /* Gesture context */ elantechhw_t elanhw; /* Elantech hardware information */ elantechaction_t elanaction; /* Elantech action context */ int tphw; /* TrackPoint hardware information */ trackpointinfo_t tpinfo; /* TrackPoint configuration */ mousemode_t mode; /* operation mode */ mousemode_t dflt_mode; /* default operation mode */ mousestatus_t status; /* accumulated mouse movement */ ringbuf_t queue; /* mouse status queue */ packetbuf_t pqueue[PSM_PACKETQUEUE]; /* mouse data queue */ int pqueue_start; /* start of data in queue */ int pqueue_end; /* end of data in queue */ int button; /* the latest button state */ int xold; /* previous absolute X position */ int yold; /* previous absolute Y position */ int xaverage; /* average X position */ int yaverage; /* average Y position */ int squelch; /* level to filter movement at low speed */ int syncerrors; /* # of bytes discarded to synchronize */ int pkterrors; /* # of packets failed during quaranteen. */ int fpcount; /* forcePad valid packet counter */ struct timeval inputtimeout; struct timeval lastsoftintr; /* time of last soft interrupt */ struct timeval lastinputerr; /* time last sync error happened */ struct timeval idletimeout; packetbuf_t idlepacket; /* packet to send after idle timeout */ int watchdog; /* watchdog timer flag */ struct callout callout; /* watchdog timer call out */ struct callout softcallout; /* buffer timer call out */ struct cdev *dev; struct cdev *bdev; int lasterr; int cmdcount; struct sigio *async; /* Processes waiting for SIGIO */ int extended_buttons; #ifdef EVDEV_SUPPORT struct evdev_dev *evdev_a; /* Absolute reporting device */ struct evdev_dev *evdev_r; /* Relative reporting device */ #endif }; static devclass_t psm_devclass; /* driver state flags (state) */ #define PSM_VALID 0x80 #define PSM_OPEN 1 /* Device is open */ #define PSM_ASLP 2 /* Waiting for mouse data */ #define PSM_SOFTARMED 4 /* Software interrupt armed */ #define PSM_NEED_SYNCBITS 8 /* Set syncbits using next data pkt */ #define PSM_EV_OPEN_R 0x10 /* Relative evdev device is open */ #define PSM_EV_OPEN_A 0x20 /* Absolute evdev device is open */ /* driver configuration flags (config) */ #define PSM_CONFIG_RESOLUTION 0x000f /* resolution */ #define PSM_CONFIG_ACCEL 0x00f0 /* acceleration factor */ #define PSM_CONFIG_NOCHECKSYNC 0x0100 /* disable sync. test */ #define PSM_CONFIG_NOIDPROBE 0x0200 /* disable mouse model probe */ #define PSM_CONFIG_NORESET 0x0400 /* don't reset the mouse */ #define PSM_CONFIG_FORCETAP 0x0800 /* assume `tap' action exists */ #define PSM_CONFIG_IGNPORTERROR 0x1000 /* ignore error in aux port test */ #define PSM_CONFIG_HOOKRESUME 0x2000 /* hook the system resume event */ #define PSM_CONFIG_INITAFTERSUSPEND 0x4000 /* init the device at the resume event */ #define PSM_CONFIG_FLAGS \ (PSM_CONFIG_RESOLUTION | \ PSM_CONFIG_ACCEL | \ PSM_CONFIG_NOCHECKSYNC | \ PSM_CONFIG_NOIDPROBE | \ PSM_CONFIG_NORESET | \ PSM_CONFIG_FORCETAP | \ PSM_CONFIG_IGNPORTERROR | \ PSM_CONFIG_HOOKRESUME | \ PSM_CONFIG_INITAFTERSUSPEND) /* other flags (flags) */ #define PSM_FLAGS_FINGERDOWN 0x0001 /* VersaPad finger down */ #define kbdcp(p) ((atkbdc_softc_t *)(p)) #define ALWAYS_RESTORE_CONTROLLER(kbdc) !(kbdcp(kbdc)->quirks \ & KBDC_QUIRK_KEEP_ACTIVATED) /* Tunables */ static int tap_enabled = -1; static int verbose = PSM_DEBUG; static int synaptics_support = 0; static int trackpoint_support = 0; static int elantech_support = 0; /* for backward compatibility */ #define OLD_MOUSE_GETHWINFO _IOR('M', 1, old_mousehw_t) #define OLD_MOUSE_GETMODE _IOR('M', 2, old_mousemode_t) #define OLD_MOUSE_SETMODE _IOW('M', 3, old_mousemode_t) typedef struct old_mousehw { int buttons; int iftype; int type; int hwid; } old_mousehw_t; typedef struct old_mousemode { int protocol; int rate; int resolution; int accelfactor; } old_mousemode_t; #define SYN_OFFSET(field) offsetof(struct psm_softc, syninfo.field) enum { SYNAPTICS_SYSCTL_MIN_PRESSURE = SYN_OFFSET(min_pressure), SYNAPTICS_SYSCTL_MAX_PRESSURE = SYN_OFFSET(max_pressure), SYNAPTICS_SYSCTL_MAX_WIDTH = SYN_OFFSET(max_width), SYNAPTICS_SYSCTL_MARGIN_TOP = SYN_OFFSET(margin_top), SYNAPTICS_SYSCTL_MARGIN_RIGHT = SYN_OFFSET(margin_right), SYNAPTICS_SYSCTL_MARGIN_BOTTOM = SYN_OFFSET(margin_bottom), SYNAPTICS_SYSCTL_MARGIN_LEFT = SYN_OFFSET(margin_left), SYNAPTICS_SYSCTL_NA_TOP = SYN_OFFSET(na_top), SYNAPTICS_SYSCTL_NA_RIGHT = SYN_OFFSET(na_right), SYNAPTICS_SYSCTL_NA_BOTTOM = SYN_OFFSET(na_bottom), SYNAPTICS_SYSCTL_NA_LEFT = SYN_OFFSET(na_left), SYNAPTICS_SYSCTL_WINDOW_MIN = SYN_OFFSET(window_min), SYNAPTICS_SYSCTL_WINDOW_MAX = SYN_OFFSET(window_max), SYNAPTICS_SYSCTL_MULTIPLICATOR = SYN_OFFSET(multiplicator), SYNAPTICS_SYSCTL_WEIGHT_CURRENT = SYN_OFFSET(weight_current), SYNAPTICS_SYSCTL_WEIGHT_PREVIOUS = SYN_OFFSET(weight_previous), SYNAPTICS_SYSCTL_WEIGHT_PREVIOUS_NA = SYN_OFFSET(weight_previous_na), SYNAPTICS_SYSCTL_WEIGHT_LEN_SQUARED = SYN_OFFSET(weight_len_squared), SYNAPTICS_SYSCTL_DIV_MIN = SYN_OFFSET(div_min), SYNAPTICS_SYSCTL_DIV_MAX = SYN_OFFSET(div_max), SYNAPTICS_SYSCTL_DIV_MAX_NA = SYN_OFFSET(div_max_na), SYNAPTICS_SYSCTL_DIV_LEN = SYN_OFFSET(div_len), SYNAPTICS_SYSCTL_TAP_MAX_DELTA = SYN_OFFSET(tap_max_delta), SYNAPTICS_SYSCTL_TAP_MIN_QUEUE = SYN_OFFSET(tap_min_queue), SYNAPTICS_SYSCTL_TAPHOLD_TIMEOUT = SYN_OFFSET(taphold_timeout), SYNAPTICS_SYSCTL_VSCROLL_HOR_AREA = SYN_OFFSET(vscroll_hor_area), SYNAPTICS_SYSCTL_VSCROLL_VER_AREA = SYN_OFFSET(vscroll_ver_area), SYNAPTICS_SYSCTL_VSCROLL_MIN_DELTA = SYN_OFFSET(vscroll_min_delta), SYNAPTICS_SYSCTL_VSCROLL_DIV_MIN = SYN_OFFSET(vscroll_div_min), SYNAPTICS_SYSCTL_VSCROLL_DIV_MAX = SYN_OFFSET(vscroll_div_max), SYNAPTICS_SYSCTL_TOUCHPAD_OFF = SYN_OFFSET(touchpad_off), SYNAPTICS_SYSCTL_SOFTBUTTONS_Y = SYN_OFFSET(softbuttons_y), SYNAPTICS_SYSCTL_SOFTBUTTON2_X = SYN_OFFSET(softbutton2_x), SYNAPTICS_SYSCTL_SOFTBUTTON3_X = SYN_OFFSET(softbutton3_x), }; /* packet formatting function */ typedef int packetfunc_t(struct psm_softc *, u_char *, int *, int, mousestatus_t *); /* function prototypes */ static void psmidentify(driver_t *, device_t); static int psmprobe(device_t); static int psmattach(device_t); static int psmdetach(device_t); static int psmresume(device_t); static d_open_t psm_cdev_open; static d_close_t psm_cdev_close; static d_read_t psmread; static d_write_t psmwrite; static d_ioctl_t psmioctl; static d_poll_t psmpoll; static int psmopen(struct psm_softc *); static int psmclose(struct psm_softc *); #ifdef EVDEV_SUPPORT static evdev_open_t psm_ev_open_r; static evdev_close_t psm_ev_close_r; static evdev_open_t psm_ev_open_a; static evdev_close_t psm_ev_close_a; #endif static int enable_aux_dev(KBDC); static int disable_aux_dev(KBDC); static int get_mouse_status(KBDC, int *, int, int); static int get_aux_id(KBDC); static int set_mouse_sampling_rate(KBDC, int); static int set_mouse_scaling(KBDC, int); static int set_mouse_resolution(KBDC, int); static int set_mouse_mode(KBDC); static int get_mouse_buttons(KBDC); static int is_a_mouse(int); static void recover_from_error(KBDC); static int restore_controller(KBDC, int); static int doinitialize(struct psm_softc *, mousemode_t *); static int doopen(struct psm_softc *, int); static int reinitialize(struct psm_softc *, int); static char *model_name(int); static void psmsoftintr(void *); static void psmsoftintridle(void *); static void psmintr(void *); static void psmtimeout(void *); static int timeelapsed(const struct timeval *, int, int, const struct timeval *); static void dropqueue(struct psm_softc *); static void flushpackets(struct psm_softc *); static void proc_mmanplus(struct psm_softc *, packetbuf_t *, mousestatus_t *, int *, int *, int *); static int proc_synaptics(struct psm_softc *, packetbuf_t *, mousestatus_t *, int *, int *, int *); static void proc_versapad(struct psm_softc *, packetbuf_t *, mousestatus_t *, int *, int *, int *); static int proc_elantech(struct psm_softc *, packetbuf_t *, mousestatus_t *, int *, int *, int *); static int psmpalmdetect(struct psm_softc *, finger_t *, int); static void psmgestures(struct psm_softc *, finger_t *, int, mousestatus_t *); static void psmsmoother(struct psm_softc *, finger_t *, int, mousestatus_t *, int *, int *); static int tame_mouse(struct psm_softc *, packetbuf_t *, mousestatus_t *, u_char *); /* vendor specific features */ enum probearg { PROBE, REINIT }; typedef int probefunc_t(struct psm_softc *, enum probearg); static int mouse_id_proc1(KBDC, int, int, int *); static int mouse_ext_command(KBDC, int); static probefunc_t enable_groller; static probefunc_t enable_gmouse; static probefunc_t enable_aglide; static probefunc_t enable_kmouse; static probefunc_t enable_msexplorer; static probefunc_t enable_msintelli; static probefunc_t enable_4dmouse; static probefunc_t enable_4dplus; static probefunc_t enable_mmanplus; static probefunc_t enable_synaptics; static probefunc_t enable_trackpoint; static probefunc_t enable_versapad; static probefunc_t enable_elantech; static void set_trackpoint_parameters(struct psm_softc *sc); static void synaptics_passthrough_on(struct psm_softc *sc); static void synaptics_passthrough_off(struct psm_softc *sc); static int synaptics_preferred_mode(struct psm_softc *sc); static void synaptics_set_mode(struct psm_softc *sc, int mode_byte); static struct { int model; u_char syncmask; int packetsize; probefunc_t *probefunc; } vendortype[] = { /* * WARNING: the order of probe is very important. Don't mess it * unless you know what you are doing. */ { MOUSE_MODEL_NET, /* Genius NetMouse */ 0x08, MOUSE_PS2INTELLI_PACKETSIZE, enable_gmouse }, { MOUSE_MODEL_NETSCROLL, /* Genius NetScroll */ 0xc8, 6, enable_groller }, { MOUSE_MODEL_MOUSEMANPLUS, /* Logitech MouseMan+ */ 0x08, MOUSE_PS2_PACKETSIZE, enable_mmanplus }, { MOUSE_MODEL_EXPLORER, /* Microsoft IntelliMouse Explorer */ 0x08, MOUSE_PS2INTELLI_PACKETSIZE, enable_msexplorer }, { MOUSE_MODEL_4D, /* A4 Tech 4D Mouse */ 0x08, MOUSE_4D_PACKETSIZE, enable_4dmouse }, { MOUSE_MODEL_4DPLUS, /* A4 Tech 4D+ Mouse */ 0xc8, MOUSE_4DPLUS_PACKETSIZE, enable_4dplus }, { MOUSE_MODEL_SYNAPTICS, /* Synaptics Touchpad */ 0xc0, MOUSE_SYNAPTICS_PACKETSIZE, enable_synaptics }, { MOUSE_MODEL_ELANTECH, /* Elantech Touchpad */ 0x04, MOUSE_ELANTECH_PACKETSIZE, enable_elantech }, { MOUSE_MODEL_INTELLI, /* Microsoft IntelliMouse */ 0x08, MOUSE_PS2INTELLI_PACKETSIZE, enable_msintelli }, { MOUSE_MODEL_GLIDEPOINT, /* ALPS GlidePoint */ 0xc0, MOUSE_PS2_PACKETSIZE, enable_aglide }, { MOUSE_MODEL_THINK, /* Kensington ThinkingMouse */ 0x80, MOUSE_PS2_PACKETSIZE, enable_kmouse }, { MOUSE_MODEL_VERSAPAD, /* Interlink electronics VersaPad */ 0xe8, MOUSE_PS2VERSA_PACKETSIZE, enable_versapad }, { MOUSE_MODEL_TRACKPOINT, /* IBM/Lenovo TrackPoint */ 0xc0, MOUSE_PS2_PACKETSIZE, enable_trackpoint }, { MOUSE_MODEL_GENERIC, 0xc0, MOUSE_PS2_PACKETSIZE, NULL }, }; #define GENERIC_MOUSE_ENTRY (nitems(vendortype) - 1) /* device driver declarateion */ static device_method_t psm_methods[] = { /* Device interface */ DEVMETHOD(device_identify, psmidentify), DEVMETHOD(device_probe, psmprobe), DEVMETHOD(device_attach, psmattach), DEVMETHOD(device_detach, psmdetach), DEVMETHOD(device_resume, psmresume), { 0, 0 } }; static driver_t psm_driver = { PSM_DRIVER_NAME, psm_methods, sizeof(struct psm_softc), }; static struct cdevsw psm_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = psm_cdev_open, .d_close = psm_cdev_close, .d_read = psmread, .d_write = psmwrite, .d_ioctl = psmioctl, .d_poll = psmpoll, .d_name = PSM_DRIVER_NAME, }; #ifdef EVDEV_SUPPORT static const struct evdev_methods psm_ev_methods_r = { .ev_open = psm_ev_open_r, .ev_close = psm_ev_close_r, }; static const struct evdev_methods psm_ev_methods_a = { .ev_open = psm_ev_open_a, .ev_close = psm_ev_close_a, }; #endif /* device I/O routines */ static int enable_aux_dev(KBDC kbdc) { int res; res = send_aux_command(kbdc, PSMC_ENABLE_DEV); VLOG(2, (LOG_DEBUG, "psm: ENABLE_DEV return code:%04x\n", res)); return (res == PSM_ACK); } static int disable_aux_dev(KBDC kbdc) { int res; res = send_aux_command(kbdc, PSMC_DISABLE_DEV); VLOG(2, (LOG_DEBUG, "psm: DISABLE_DEV return code:%04x\n", res)); return (res == PSM_ACK); } static int get_mouse_status(KBDC kbdc, int *status, int flag, int len) { int cmd; int res; int i; switch (flag) { case 0: default: cmd = PSMC_SEND_DEV_STATUS; break; case 1: cmd = PSMC_SEND_DEV_DATA; break; } empty_aux_buffer(kbdc, 5); res = send_aux_command(kbdc, cmd); VLOG(2, (LOG_DEBUG, "psm: SEND_AUX_DEV_%s return code:%04x\n", (flag == 1) ? "DATA" : "STATUS", res)); if (res != PSM_ACK) return (0); for (i = 0; i < len; ++i) { status[i] = read_aux_data(kbdc); if (status[i] < 0) break; } if (len >= 3) { for (; i < 3; ++i) status[i] = 0; VLOG(1, (LOG_DEBUG, "psm: %s %02x %02x %02x\n", (flag == 1) ? "data" : "status", status[0], status[1], status[2])); } return (i); } static int get_aux_id(KBDC kbdc) { int res; int id; empty_aux_buffer(kbdc, 5); res = send_aux_command(kbdc, PSMC_SEND_DEV_ID); VLOG(2, (LOG_DEBUG, "psm: SEND_DEV_ID return code:%04x\n", res)); if (res != PSM_ACK) return (-1); /* 10ms delay */ DELAY(10000); id = read_aux_data(kbdc); VLOG(2, (LOG_DEBUG, "psm: device ID: %04x\n", id)); return (id); } static int set_mouse_sampling_rate(KBDC kbdc, int rate) { int res; res = send_aux_command_and_data(kbdc, PSMC_SET_SAMPLING_RATE, rate); VLOG(2, (LOG_DEBUG, "psm: SET_SAMPLING_RATE (%d) %04x\n", rate, res)); return ((res == PSM_ACK) ? rate : -1); } static int set_mouse_scaling(KBDC kbdc, int scale) { int res; switch (scale) { case 1: default: scale = PSMC_SET_SCALING11; break; case 2: scale = PSMC_SET_SCALING21; break; } res = send_aux_command(kbdc, scale); VLOG(2, (LOG_DEBUG, "psm: SET_SCALING%s return code:%04x\n", (scale == PSMC_SET_SCALING21) ? "21" : "11", res)); return (res == PSM_ACK); } /* `val' must be 0 through PSMD_MAX_RESOLUTION */ static int set_mouse_resolution(KBDC kbdc, int val) { int res; res = send_aux_command_and_data(kbdc, PSMC_SET_RESOLUTION, val); VLOG(2, (LOG_DEBUG, "psm: SET_RESOLUTION (%d) %04x\n", val, res)); return ((res == PSM_ACK) ? val : -1); } /* * NOTE: once `set_mouse_mode()' is called, the mouse device must be * re-enabled by calling `enable_aux_dev()' */ static int set_mouse_mode(KBDC kbdc) { int res; res = send_aux_command(kbdc, PSMC_SET_STREAM_MODE); VLOG(2, (LOG_DEBUG, "psm: SET_STREAM_MODE return code:%04x\n", res)); return (res == PSM_ACK); } static int get_mouse_buttons(KBDC kbdc) { int c = 2; /* assume two buttons by default */ int status[3]; /* * NOTE: a special sequence to obtain Logitech Mouse specific * information: set resolution to 25 ppi, set scaling to 1:1, set * scaling to 1:1, set scaling to 1:1. Then the second byte of the * mouse status bytes is the number of available buttons. * Some manufactures also support this sequence. */ if (set_mouse_resolution(kbdc, PSMD_RES_LOW) != PSMD_RES_LOW) return (c); if (set_mouse_scaling(kbdc, 1) && set_mouse_scaling(kbdc, 1) && set_mouse_scaling(kbdc, 1) && get_mouse_status(kbdc, status, 0, 3) >= 3 && status[1] != 0) return (status[1]); return (c); } /* misc subroutines */ /* * Someday, I will get the complete list of valid pointing devices and * their IDs... XXX */ static int is_a_mouse(int id) { #if 0 static int valid_ids[] = { PSM_MOUSE_ID, /* mouse */ PSM_BALLPOINT_ID, /* ballpoint device */ PSM_INTELLI_ID, /* Intellimouse */ PSM_EXPLORER_ID, /* Intellimouse Explorer */ -1 /* end of table */ }; int i; for (i = 0; valid_ids[i] >= 0; ++i) if (valid_ids[i] == id) return (TRUE); return (FALSE); #else return (TRUE); #endif } static char * model_name(int model) { static struct { int model_code; char *model_name; } models[] = { { MOUSE_MODEL_NETSCROLL, "NetScroll" }, { MOUSE_MODEL_NET, "NetMouse/NetScroll Optical" }, { MOUSE_MODEL_GLIDEPOINT, "GlidePoint" }, { MOUSE_MODEL_THINK, "ThinkingMouse" }, { MOUSE_MODEL_INTELLI, "IntelliMouse" }, { MOUSE_MODEL_MOUSEMANPLUS, "MouseMan+" }, { MOUSE_MODEL_VERSAPAD, "VersaPad" }, { MOUSE_MODEL_EXPLORER, "IntelliMouse Explorer" }, { MOUSE_MODEL_4D, "4D Mouse" }, { MOUSE_MODEL_4DPLUS, "4D+ Mouse" }, { MOUSE_MODEL_SYNAPTICS, "Synaptics Touchpad" }, { MOUSE_MODEL_TRACKPOINT, "IBM/Lenovo TrackPoint" }, { MOUSE_MODEL_ELANTECH, "Elantech Touchpad" }, { MOUSE_MODEL_GENERIC, "Generic PS/2 mouse" }, { MOUSE_MODEL_UNKNOWN, "Unknown" }, }; int i; for (i = 0; models[i].model_code != MOUSE_MODEL_UNKNOWN; ++i) if (models[i].model_code == model) break; return (models[i].model_name); } static void recover_from_error(KBDC kbdc) { /* discard anything left in the output buffer */ empty_both_buffers(kbdc, 10); #if 0 /* * NOTE: KBDC_RESET_KBD may not restore the communication between the * keyboard and the controller. */ reset_kbd(kbdc); #else /* * NOTE: somehow diagnostic and keyboard port test commands bring the * keyboard back. */ if (!test_controller(kbdc)) log(LOG_ERR, "psm: keyboard controller failed.\n"); /* if there isn't a keyboard in the system, the following error is OK */ if (test_kbd_port(kbdc) != 0) VLOG(1, (LOG_ERR, "psm: keyboard port failed.\n")); #endif } static int restore_controller(KBDC kbdc, int command_byte) { empty_both_buffers(kbdc, 10); if (!set_controller_command_byte(kbdc, 0xff, command_byte)) { log(LOG_ERR, "psm: failed to restore the keyboard controller " "command byte.\n"); empty_both_buffers(kbdc, 10); return (FALSE); } else { empty_both_buffers(kbdc, 10); return (TRUE); } } /* * Re-initialize the aux port and device. The aux port must be enabled * and its interrupt must be disabled before calling this routine. * The aux device will be disabled before returning. * The keyboard controller must be locked via `kbdc_lock()' before * calling this routine. */ static int doinitialize(struct psm_softc *sc, mousemode_t *mode) { KBDC kbdc = sc->kbdc; int stat[3]; int i; switch((i = test_aux_port(kbdc))) { case 1: /* ignore these errors */ case 2: case 3: case PSM_ACK: if (verbose) log(LOG_DEBUG, "psm%d: strange result for test aux port (%d).\n", sc->unit, i); /* FALLTHROUGH */ case 0: /* no error */ break; case -1: /* time out */ default: /* error */ recover_from_error(kbdc); if (sc->config & PSM_CONFIG_IGNPORTERROR) break; log(LOG_ERR, "psm%d: the aux port is not functioning (%d).\n", sc->unit, i); return (FALSE); } if (sc->config & PSM_CONFIG_NORESET) { /* * Don't try to reset the pointing device. It may possibly * be left in the unknown state, though... */ } else { /* * NOTE: some controllers appears to hang the `keyboard' when * the aux port doesn't exist and `PSMC_RESET_DEV' is issued. */ if (!reset_aux_dev(kbdc)) { recover_from_error(kbdc); log(LOG_ERR, "psm%d: failed to reset the aux device.\n", sc->unit); return (FALSE); } } /* * both the aux port and the aux device is functioning, see * if the device can be enabled. */ if (!enable_aux_dev(kbdc) || !disable_aux_dev(kbdc)) { log(LOG_ERR, "psm%d: failed to enable the aux device.\n", sc->unit); return (FALSE); } empty_both_buffers(kbdc, 10); /* remove stray data if any */ /* Re-enable the mouse. */ for (i = 0; vendortype[i].probefunc != NULL; ++i) if (vendortype[i].model == sc->hw.model) (*vendortype[i].probefunc)(sc, REINIT); /* set mouse parameters */ if (mode != (mousemode_t *)NULL) { if (mode->rate > 0) mode->rate = set_mouse_sampling_rate(kbdc, mode->rate); if (mode->resolution >= 0) mode->resolution = set_mouse_resolution(kbdc, mode->resolution); set_mouse_scaling(kbdc, 1); set_mouse_mode(kbdc); } /* Record sync on the next data packet we see. */ sc->flags |= PSM_NEED_SYNCBITS; /* just check the status of the mouse */ if (get_mouse_status(kbdc, stat, 0, 3) < 3) log(LOG_DEBUG, "psm%d: failed to get status (doinitialize).\n", sc->unit); return (TRUE); } static int doopen(struct psm_softc *sc, int command_byte) { int stat[3]; /* * FIXME: Synaptics TouchPad seems to go back to Relative Mode with * no obvious reason. Thus we check the current mode and restore the * Absolute Mode if it was cleared. * * The previous hack at the end of psmprobe() wasn't efficient when * moused(8) was restarted. * * A Reset (FF) or Set Defaults (F6) command would clear the * Absolute Mode bit. But a verbose boot or debug.psm.loglevel=5 * doesn't show any evidence of such a command. */ if (sc->hw.model == MOUSE_MODEL_SYNAPTICS) { mouse_ext_command(sc->kbdc, 1); get_mouse_status(sc->kbdc, stat, 0, 3); if ((SYNAPTICS_VERSION_GE(sc->synhw, 7, 5) || stat[1] == 0x46 || stat[1] == 0x47) && stat[2] == 0x40) { synaptics_set_mode(sc, synaptics_preferred_mode(sc)); VLOG(5, (LOG_DEBUG, "psm%d: Synaptis Absolute Mode " "hopefully restored\n", sc->unit)); } } /* * A user may want to disable tap and drag gestures on a Synaptics * TouchPad when it operates in Relative Mode. */ if (sc->hw.model == MOUSE_MODEL_GENERIC) { if (tap_enabled > 0) { VLOG(2, (LOG_DEBUG, "psm%d: enable tap and drag gestures\n", sc->unit)); synaptics_set_mode(sc, synaptics_preferred_mode(sc)); } else if (tap_enabled == 0) { VLOG(2, (LOG_DEBUG, "psm%d: disable tap and drag gestures\n", sc->unit)); synaptics_set_mode(sc, synaptics_preferred_mode(sc)); } } /* enable the mouse device */ if (!enable_aux_dev(sc->kbdc)) { /* MOUSE ERROR: failed to enable the mouse because: * 1) the mouse is faulty, * 2) the mouse has been removed(!?) * In the latter case, the keyboard may have hung, and need * recovery procedure... */ recover_from_error(sc->kbdc); #if 0 /* FIXME: we could reset the mouse here and try to enable * it again. But it will take long time and it's not a good * idea to disable the keyboard that long... */ if (!doinitialize(sc, &sc->mode) || !enable_aux_dev(sc->kbdc)) { recover_from_error(sc->kbdc); #else { #endif restore_controller(sc->kbdc, command_byte); /* mark this device is no longer available */ sc->state &= ~PSM_VALID; log(LOG_ERR, "psm%d: failed to enable the device (doopen).\n", sc->unit); return (EIO); } } if (get_mouse_status(sc->kbdc, stat, 0, 3) < 3) log(LOG_DEBUG, "psm%d: failed to get status (doopen).\n", sc->unit); /* enable the aux port and interrupt */ if (!set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), (command_byte & KBD_KBD_CONTROL_BITS) | KBD_ENABLE_AUX_PORT | KBD_ENABLE_AUX_INT)) { /* CONTROLLER ERROR */ disable_aux_dev(sc->kbdc); restore_controller(sc->kbdc, command_byte); log(LOG_ERR, "psm%d: failed to enable the aux interrupt (doopen).\n", sc->unit); return (EIO); } /* start the watchdog timer */ sc->watchdog = FALSE; callout_reset(&sc->callout, hz * 2, psmtimeout, sc); return (0); } static int reinitialize(struct psm_softc *sc, int doinit) { int err; int c; int s; /* don't let anybody mess with the aux device */ if (!kbdc_lock(sc->kbdc, TRUE)) return (EIO); s = spltty(); /* block our watchdog timer */ sc->watchdog = FALSE; callout_stop(&sc->callout); /* save the current controller command byte */ empty_both_buffers(sc->kbdc, 10); c = get_controller_command_byte(sc->kbdc); VLOG(2, (LOG_DEBUG, "psm%d: current command byte: %04x (reinitialize).\n", sc->unit, c)); /* enable the aux port but disable the aux interrupt and the keyboard */ if ((c == -1) || !set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* CONTROLLER ERROR */ splx(s); kbdc_lock(sc->kbdc, FALSE); log(LOG_ERR, "psm%d: unable to set the command byte (reinitialize).\n", sc->unit); return (EIO); } /* flush any data */ if (sc->state & PSM_VALID) { /* this may fail; but never mind... */ disable_aux_dev(sc->kbdc); empty_aux_buffer(sc->kbdc, 10); } flushpackets(sc); sc->syncerrors = 0; sc->pkterrors = 0; memset(&sc->lastinputerr, 0, sizeof(sc->lastinputerr)); /* try to detect the aux device; are you still there? */ err = 0; if (doinit) { if (doinitialize(sc, &sc->mode)) { /* yes */ sc->state |= PSM_VALID; } else { /* the device has gone! */ restore_controller(sc->kbdc, c); sc->state &= ~PSM_VALID; log(LOG_ERR, "psm%d: the aux device has gone! (reinitialize).\n", sc->unit); err = ENXIO; } } splx(s); /* restore the driver state */ if ((sc->state & (PSM_OPEN | PSM_EV_OPEN_R | PSM_EV_OPEN_A)) && (err == 0)) { /* enable the aux device and the port again */ err = doopen(sc, c); if (err != 0) log(LOG_ERR, "psm%d: failed to enable the device " "(reinitialize).\n", sc->unit); } else { /* restore the keyboard port and disable the aux port */ if (!set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), (c & KBD_KBD_CONTROL_BITS) | KBD_DISABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* CONTROLLER ERROR */ log(LOG_ERR, "psm%d: failed to disable the aux port " "(reinitialize).\n", sc->unit); err = EIO; } } kbdc_lock(sc->kbdc, FALSE); return (err); } /* psm driver entry points */ static void psmidentify(driver_t *driver, device_t parent) { device_t psmc; device_t psm; u_long irq; int unit; unit = device_get_unit(parent); /* always add at least one child */ psm = BUS_ADD_CHILD(parent, KBDC_RID_AUX, driver->name, unit); if (psm == NULL) return; irq = bus_get_resource_start(psm, SYS_RES_IRQ, KBDC_RID_AUX); if (irq > 0) return; /* * If the PS/2 mouse device has already been reported by ACPI or * PnP BIOS, obtain the IRQ resource from it. * (See psmcpnp_attach() below.) */ psmc = device_find_child(device_get_parent(parent), PSMCPNP_DRIVER_NAME, unit); if (psmc == NULL) return; irq = bus_get_resource_start(psmc, SYS_RES_IRQ, 0); if (irq <= 0) return; bus_delete_resource(psmc, SYS_RES_IRQ, 0); bus_set_resource(psm, SYS_RES_IRQ, KBDC_RID_AUX, irq, 1); } #define endprobe(v) do { \ if (bootverbose) \ --verbose; \ kbdc_set_device_mask(sc->kbdc, mask); \ kbdc_lock(sc->kbdc, FALSE); \ return (v); \ } while (0) static int psmprobe(device_t dev) { int unit = device_get_unit(dev); struct psm_softc *sc = device_get_softc(dev); int stat[3]; int command_byte; int mask; int rid; int i; #if 0 kbdc_debug(TRUE); #endif /* see if IRQ is available */ rid = KBDC_RID_AUX; sc->intr = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->intr == NULL) { if (bootverbose) device_printf(dev, "unable to allocate IRQ\n"); return (ENXIO); } bus_release_resource(dev, SYS_RES_IRQ, rid, sc->intr); sc->unit = unit; sc->kbdc = atkbdc_open(device_get_unit(device_get_parent(dev))); sc->config = device_get_flags(dev) & PSM_CONFIG_FLAGS; /* XXX: for backward compatibility */ #if defined(PSM_HOOKRESUME) || defined(PSM_HOOKAPM) sc->config |= #ifdef PSM_RESETAFTERSUSPEND PSM_CONFIG_INITAFTERSUSPEND; #else PSM_CONFIG_HOOKRESUME; #endif #endif /* PSM_HOOKRESUME | PSM_HOOKAPM */ sc->flags = 0; if (bootverbose) ++verbose; device_set_desc(dev, "PS/2 Mouse"); if (!kbdc_lock(sc->kbdc, TRUE)) { printf("psm%d: unable to lock the controller.\n", unit); if (bootverbose) --verbose; return (ENXIO); } /* * NOTE: two bits in the command byte controls the operation of the * aux port (mouse port): the aux port disable bit (bit 5) and the aux * port interrupt (IRQ 12) enable bit (bit 2). */ /* discard anything left after the keyboard initialization */ empty_both_buffers(sc->kbdc, 10); /* save the current command byte; it will be used later */ mask = kbdc_get_device_mask(sc->kbdc) & ~KBD_AUX_CONTROL_BITS; command_byte = get_controller_command_byte(sc->kbdc); if (verbose) printf("psm%d: current command byte:%04x\n", unit, command_byte); if (command_byte == -1) { /* CONTROLLER ERROR */ printf("psm%d: unable to get the current command byte value.\n", unit); endprobe(ENXIO); } /* * disable the keyboard port while probing the aux port, which must be * enabled during this routine */ if (!set_controller_command_byte(sc->kbdc, KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS, KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* * this is CONTROLLER ERROR; I don't know how to recover * from this error... */ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) restore_controller(sc->kbdc, command_byte); printf("psm%d: unable to set the command byte.\n", unit); endprobe(ENXIO); } write_controller_command(sc->kbdc, KBDC_ENABLE_AUX_PORT); /* * NOTE: `test_aux_port()' is designed to return with zero if the aux * port exists and is functioning. However, some controllers appears * to respond with zero even when the aux port doesn't exist. (It may * be that this is only the case when the controller DOES have the aux * port but the port is not wired on the motherboard.) The keyboard * controllers without the port, such as the original AT, are * supposed to return with an error code or simply time out. In any * case, we have to continue probing the port even when the controller * passes this test. * * XXX: some controllers erroneously return the error code 1, 2 or 3 * when it has a perfectly functional aux port. We have to ignore * this error code. Even if the controller HAS error with the aux * port, it will be detected later... * XXX: another incompatible controller returns PSM_ACK (0xfa)... */ switch ((i = test_aux_port(sc->kbdc))) { case 1: /* ignore these errors */ case 2: case 3: case PSM_ACK: if (verbose) printf("psm%d: strange result for test aux port " "(%d).\n", unit, i); /* FALLTHROUGH */ case 0: /* no error */ break; case -1: /* time out */ default: /* error */ recover_from_error(sc->kbdc); if (sc->config & PSM_CONFIG_IGNPORTERROR) break; if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: the aux port is not functioning (%d).\n", unit, i); endprobe(ENXIO); } if (sc->config & PSM_CONFIG_NORESET) { /* * Don't try to reset the pointing device. It may possibly be * left in an unknown state, though... */ } else { /* * NOTE: some controllers appears to hang the `keyboard' when * the aux port doesn't exist and `PSMC_RESET_DEV' is issued. * * Attempt to reset the controller twice -- this helps * pierce through some KVM switches. The second reset * is non-fatal. */ if (!reset_aux_dev(sc->kbdc)) { recover_from_error(sc->kbdc); if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: failed to reset the aux " "device.\n", unit); endprobe(ENXIO); } else if (!reset_aux_dev(sc->kbdc)) { recover_from_error(sc->kbdc); if (verbose >= 2) printf("psm%d: failed to reset the aux device " "(2).\n", unit); } } /* * both the aux port and the aux device are functioning, see if the * device can be enabled. NOTE: when enabled, the device will start * sending data; we shall immediately disable the device once we know * the device can be enabled. */ if (!enable_aux_dev(sc->kbdc) || !disable_aux_dev(sc->kbdc)) { /* MOUSE ERROR */ recover_from_error(sc->kbdc); if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: failed to enable the aux device.\n", unit); endprobe(ENXIO); } /* save the default values after reset */ if (get_mouse_status(sc->kbdc, stat, 0, 3) >= 3) { sc->dflt_mode.rate = sc->mode.rate = stat[2]; sc->dflt_mode.resolution = sc->mode.resolution = stat[1]; } else { sc->dflt_mode.rate = sc->mode.rate = -1; sc->dflt_mode.resolution = sc->mode.resolution = -1; } /* hardware information */ sc->hw.iftype = MOUSE_IF_PS2; /* verify the device is a mouse */ sc->hw.hwid = get_aux_id(sc->kbdc); if (!is_a_mouse(sc->hw.hwid)) { if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) restore_controller(sc->kbdc, command_byte); if (verbose) printf("psm%d: unknown device type (%d).\n", unit, sc->hw.hwid); endprobe(ENXIO); } switch (sc->hw.hwid) { case PSM_BALLPOINT_ID: sc->hw.type = MOUSE_TRACKBALL; break; case PSM_MOUSE_ID: case PSM_INTELLI_ID: case PSM_EXPLORER_ID: case PSM_4DMOUSE_ID: case PSM_4DPLUS_ID: sc->hw.type = MOUSE_MOUSE; break; default: sc->hw.type = MOUSE_UNKNOWN; break; } if (sc->config & PSM_CONFIG_NOIDPROBE) { sc->hw.buttons = 2; i = GENERIC_MOUSE_ENTRY; } else { /* # of buttons */ sc->hw.buttons = get_mouse_buttons(sc->kbdc); /* other parameters */ for (i = 0; vendortype[i].probefunc != NULL; ++i) if ((*vendortype[i].probefunc)(sc, PROBE)) { if (verbose >= 2) printf("psm%d: found %s\n", unit, model_name(vendortype[i].model)); break; } } sc->hw.model = vendortype[i].model; sc->dflt_mode.level = PSM_LEVEL_BASE; sc->dflt_mode.packetsize = MOUSE_PS2_PACKETSIZE; sc->dflt_mode.accelfactor = (sc->config & PSM_CONFIG_ACCEL) >> 4; if (sc->config & PSM_CONFIG_NOCHECKSYNC) sc->dflt_mode.syncmask[0] = 0; else sc->dflt_mode.syncmask[0] = vendortype[i].syncmask; if (sc->config & PSM_CONFIG_FORCETAP) sc->dflt_mode.syncmask[0] &= ~MOUSE_PS2_TAP; sc->dflt_mode.syncmask[1] = 0; /* syncbits */ sc->mode = sc->dflt_mode; sc->mode.packetsize = vendortype[i].packetsize; /* set mouse parameters */ #if 0 /* * A version of Logitech FirstMouse+ won't report wheel movement, * if SET_DEFAULTS is sent... Don't use this command. * This fix was found by Takashi Nishida. */ i = send_aux_command(sc->kbdc, PSMC_SET_DEFAULTS); if (verbose >= 2) printf("psm%d: SET_DEFAULTS return code:%04x\n", unit, i); #endif if (sc->config & PSM_CONFIG_RESOLUTION) sc->mode.resolution = set_mouse_resolution(sc->kbdc, (sc->config & PSM_CONFIG_RESOLUTION) - 1); else if (sc->mode.resolution >= 0) sc->mode.resolution = set_mouse_resolution(sc->kbdc, sc->dflt_mode.resolution); if (sc->mode.rate > 0) sc->mode.rate = set_mouse_sampling_rate(sc->kbdc, sc->dflt_mode.rate); set_mouse_scaling(sc->kbdc, 1); /* Record sync on the next data packet we see. */ sc->flags |= PSM_NEED_SYNCBITS; /* just check the status of the mouse */ /* * NOTE: XXX there are some arcane controller/mouse combinations out * there, which hung the controller unless there is data transmission * after ACK from the mouse. */ if (get_mouse_status(sc->kbdc, stat, 0, 3) < 3) printf("psm%d: failed to get status.\n", unit); else { /* * When in its native mode, some mice operate with different * default parameters than in the PS/2 compatible mode. */ sc->dflt_mode.rate = sc->mode.rate = stat[2]; sc->dflt_mode.resolution = sc->mode.resolution = stat[1]; } /* disable the aux port for now... */ if (!set_controller_command_byte(sc->kbdc, KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS, (command_byte & KBD_KBD_CONTROL_BITS) | KBD_DISABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* * this is CONTROLLER ERROR; I don't know the proper way to * recover from this error... */ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc)) restore_controller(sc->kbdc, command_byte); printf("psm%d: unable to set the command byte.\n", unit); endprobe(ENXIO); } /* done */ kbdc_set_device_mask(sc->kbdc, mask | KBD_AUX_CONTROL_BITS); kbdc_lock(sc->kbdc, FALSE); return (0); } #ifdef EVDEV_SUPPORT /* Values are taken from Linux drivers for userland software compatibility */ #define PS2_MOUSE_VENDOR 0x0002 #define PS2_MOUSE_GENERIC_PRODUCT 0x0001 #define PS2_MOUSE_SYNAPTICS_NAME "SynPS/2 Synaptics TouchPad" #define PS2_MOUSE_SYNAPTICS_PRODUCT 0x0007 #define PS2_MOUSE_TRACKPOINT_NAME "TPPS/2 IBM TrackPoint" #define PS2_MOUSE_TRACKPOINT_PRODUCT 0x000A #define PS2_MOUSE_ELANTECH_NAME "ETPS/2 Elantech Touchpad" #define PS2_MOUSE_ELANTECH_ST_NAME "ETPS/2 Elantech TrackPoint" #define PS2_MOUSE_ELANTECH_PRODUCT 0x000E #define ABSINFO_END { ABS_CNT, 0, 0, 0 } static void psm_support_abs_bulk(struct evdev_dev *evdev, const uint16_t info[][4]) { size_t i; for (i = 0; info[i][0] != ABS_CNT; i++) evdev_support_abs(evdev, info[i][0], 0, info[i][1], info[i][2], 0, 0, info[i][3]); } static void psm_push_mt_finger(struct psm_softc *sc, int id, const finger_t *f) { int y = sc->synhw.minimumYCoord + sc->synhw.maximumYCoord - f->y; evdev_push_abs(sc->evdev_a, ABS_MT_SLOT, id); evdev_push_abs(sc->evdev_a, ABS_MT_TRACKING_ID, id); evdev_push_abs(sc->evdev_a, ABS_MT_POSITION_X, f->x); evdev_push_abs(sc->evdev_a, ABS_MT_POSITION_Y, y); evdev_push_abs(sc->evdev_a, ABS_MT_PRESSURE, f->p); } static void psm_push_st_finger(struct psm_softc *sc, const finger_t *f) { int y = sc->synhw.minimumYCoord + sc->synhw.maximumYCoord - f->y; evdev_push_abs(sc->evdev_a, ABS_X, f->x); evdev_push_abs(sc->evdev_a, ABS_Y, y); evdev_push_abs(sc->evdev_a, ABS_PRESSURE, f->p); if (sc->synhw.capPalmDetect) evdev_push_abs(sc->evdev_a, ABS_TOOL_WIDTH, f->w); } static void psm_release_mt_slot(struct evdev_dev *evdev, int32_t slot) { evdev_push_abs(evdev, ABS_MT_SLOT, slot); evdev_push_abs(evdev, ABS_MT_TRACKING_ID, -1); } static int psm_register(device_t dev, int model_code) { struct psm_softc *sc = device_get_softc(dev); struct evdev_dev *evdev_r; int error, i, nbuttons, nwheels, product; bool is_pointing_stick; const char *name; name = model_name(model_code); nbuttons = sc->hw.buttons; product = PS2_MOUSE_GENERIC_PRODUCT; nwheels = 0; is_pointing_stick = false; switch (model_code) { case MOUSE_MODEL_TRACKPOINT: name = PS2_MOUSE_TRACKPOINT_NAME; product = PS2_MOUSE_TRACKPOINT_PRODUCT; nbuttons = 3; is_pointing_stick = true; break; case MOUSE_MODEL_ELANTECH: name = PS2_MOUSE_ELANTECH_ST_NAME; product = PS2_MOUSE_ELANTECH_PRODUCT; nbuttons = 3; is_pointing_stick = true; break; case MOUSE_MODEL_MOUSEMANPLUS: case MOUSE_MODEL_4D: nwheels = 2; break; case MOUSE_MODEL_EXPLORER: case MOUSE_MODEL_INTELLI: case MOUSE_MODEL_NET: case MOUSE_MODEL_NETSCROLL: case MOUSE_MODEL_4DPLUS: nwheels = 1; break; } evdev_r = evdev_alloc(); evdev_set_name(evdev_r, name); evdev_set_phys(evdev_r, device_get_nameunit(dev)); evdev_set_id(evdev_r, BUS_I8042, PS2_MOUSE_VENDOR, product, 0); evdev_set_methods(evdev_r, sc, &psm_ev_methods_r); evdev_support_prop(evdev_r, INPUT_PROP_POINTER); if (is_pointing_stick) evdev_support_prop(evdev_r, INPUT_PROP_POINTING_STICK); evdev_support_event(evdev_r, EV_SYN); evdev_support_event(evdev_r, EV_KEY); evdev_support_event(evdev_r, EV_REL); evdev_support_rel(evdev_r, REL_X); evdev_support_rel(evdev_r, REL_Y); switch (nwheels) { case 2: evdev_support_rel(evdev_r, REL_HWHEEL); /* FALLTHROUGH */ case 1: evdev_support_rel(evdev_r, REL_WHEEL); } for (i = 0; i < nbuttons; i++) evdev_support_key(evdev_r, BTN_MOUSE + i); error = evdev_register_mtx(evdev_r, &Giant); if (error) evdev_free(evdev_r); else sc->evdev_r = evdev_r; return (error); } static int psm_register_synaptics(device_t dev) { struct psm_softc *sc = device_get_softc(dev); const uint16_t synaptics_absinfo_st[][4] = { { ABS_X, sc->synhw.minimumXCoord, sc->synhw.maximumXCoord, sc->synhw.infoXupmm }, { ABS_Y, sc->synhw.minimumYCoord, sc->synhw.maximumYCoord, sc->synhw.infoYupmm }, { ABS_PRESSURE, 0, ELANTECH_FINGER_MAX_P, 0 }, ABSINFO_END, }; const uint16_t synaptics_absinfo_mt[][4] = { { ABS_MT_SLOT, 0, PSM_FINGERS-1, 0}, { ABS_MT_TRACKING_ID, -1, PSM_FINGERS-1, 0}, { ABS_MT_POSITION_X, sc->synhw.minimumXCoord, sc->synhw.maximumXCoord, sc->synhw.infoXupmm }, { ABS_MT_POSITION_Y, sc->synhw.minimumYCoord, sc->synhw.maximumYCoord, sc->synhw.infoYupmm }, { ABS_MT_PRESSURE, 0, ELANTECH_FINGER_MAX_P, 0 }, ABSINFO_END, }; struct evdev_dev *evdev_a; int error, i, guest_model; evdev_a = evdev_alloc(); evdev_set_name(evdev_a, PS2_MOUSE_SYNAPTICS_NAME); evdev_set_phys(evdev_a, device_get_nameunit(dev)); evdev_set_id(evdev_a, BUS_I8042, PS2_MOUSE_VENDOR, PS2_MOUSE_SYNAPTICS_PRODUCT, 0); evdev_set_methods(evdev_a, sc, &psm_ev_methods_a); evdev_support_event(evdev_a, EV_SYN); evdev_support_event(evdev_a, EV_KEY); evdev_support_event(evdev_a, EV_ABS); evdev_support_prop(evdev_a, INPUT_PROP_POINTER); if (sc->synhw.capAdvancedGestures) evdev_support_prop(evdev_a, INPUT_PROP_SEMI_MT); if (sc->synhw.capClickPad) evdev_support_prop(evdev_a, INPUT_PROP_BUTTONPAD); evdev_support_key(evdev_a, BTN_TOUCH); evdev_support_nfingers(evdev_a, 3); psm_support_abs_bulk(evdev_a, synaptics_absinfo_st); if (sc->synhw.capAdvancedGestures || sc->synhw.capReportsV) psm_support_abs_bulk(evdev_a, synaptics_absinfo_mt); if (sc->synhw.capPalmDetect) evdev_support_abs(evdev_a, ABS_TOOL_WIDTH, 0, 0, 15, 0, 0, 0); evdev_support_key(evdev_a, BTN_LEFT); if (!sc->synhw.capClickPad) { evdev_support_key(evdev_a, BTN_RIGHT); if (sc->synhw.capExtended && sc->synhw.capMiddle) evdev_support_key(evdev_a, BTN_MIDDLE); } if (sc->synhw.capExtended && sc->synhw.capFourButtons) { evdev_support_key(evdev_a, BTN_BACK); evdev_support_key(evdev_a, BTN_FORWARD); } if (sc->synhw.capExtended && (sc->synhw.nExtendedButtons > 0)) for (i = 0; i < sc->synhw.nExtendedButtons; i++) evdev_support_key(evdev_a, BTN_0 + i); error = evdev_register_mtx(evdev_a, &Giant); if (!error && sc->synhw.capPassthrough) { guest_model = sc->tpinfo.sysctl_tree != NULL ? MOUSE_MODEL_TRACKPOINT : MOUSE_MODEL_GENERIC; error = psm_register(dev, guest_model); } if (error) evdev_free(evdev_a); else sc->evdev_a = evdev_a; return (error); } static int psm_register_elantech(device_t dev) { struct psm_softc *sc = device_get_softc(dev); const uint16_t elantech_absinfo[][4] = { { ABS_X, 0, sc->elanhw.sizex, sc->elanhw.dpmmx }, { ABS_Y, 0, sc->elanhw.sizey, sc->elanhw.dpmmy }, { ABS_PRESSURE, 0, ELANTECH_FINGER_MAX_P, 0 }, { ABS_TOOL_WIDTH, 0, ELANTECH_FINGER_MAX_W, 0 }, { ABS_MT_SLOT, 0, ELANTECH_MAX_FINGERS - 1, 0 }, { ABS_MT_TRACKING_ID, -1, ELANTECH_MAX_FINGERS - 1, 0 }, { ABS_MT_POSITION_X, 0, sc->elanhw.sizex, sc->elanhw.dpmmx }, { ABS_MT_POSITION_Y, 0, sc->elanhw.sizey, sc->elanhw.dpmmy }, { ABS_MT_PRESSURE, 0, ELANTECH_FINGER_MAX_P, 0 }, { ABS_MT_TOUCH_MAJOR, 0, ELANTECH_FINGER_MAX_W * sc->elanhw.dptracex, 0 }, ABSINFO_END, }; struct evdev_dev *evdev_a; int error; evdev_a = evdev_alloc(); evdev_set_name(evdev_a, PS2_MOUSE_ELANTECH_NAME); evdev_set_phys(evdev_a, device_get_nameunit(dev)); evdev_set_id(evdev_a, BUS_I8042, PS2_MOUSE_VENDOR, PS2_MOUSE_ELANTECH_PRODUCT, 0); evdev_set_methods(evdev_a, sc, &psm_ev_methods_a); evdev_support_event(evdev_a, EV_SYN); evdev_support_event(evdev_a, EV_KEY); evdev_support_event(evdev_a, EV_ABS); evdev_support_prop(evdev_a, INPUT_PROP_POINTER); if (sc->elanhw.issemimt) evdev_support_prop(evdev_a, INPUT_PROP_SEMI_MT); if (sc->elanhw.isclickpad) evdev_support_prop(evdev_a, INPUT_PROP_BUTTONPAD); evdev_support_key(evdev_a, BTN_TOUCH); evdev_support_nfingers(evdev_a, ELANTECH_MAX_FINGERS); evdev_support_key(evdev_a, BTN_LEFT); if (!sc->elanhw.isclickpad) evdev_support_key(evdev_a, BTN_RIGHT); psm_support_abs_bulk(evdev_a, elantech_absinfo); error = evdev_register_mtx(evdev_a, &Giant); if (!error && sc->elanhw.hastrackpoint) error = psm_register(dev, MOUSE_MODEL_ELANTECH); if (error) evdev_free(evdev_a); else sc->evdev_a = evdev_a; return (error); } #endif static int psmattach(device_t dev) { int unit = device_get_unit(dev); struct psm_softc *sc = device_get_softc(dev); int error; int rid; /* Setup initial state */ sc->state = PSM_VALID; callout_init(&sc->callout, 0); callout_init(&sc->softcallout, 0); /* Setup our interrupt handler */ rid = KBDC_RID_AUX; sc->intr = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->intr == NULL) return (ENXIO); error = bus_setup_intr(dev, sc->intr, INTR_TYPE_TTY, NULL, psmintr, sc, &sc->ih); if (error) { bus_release_resource(dev, SYS_RES_IRQ, rid, sc->intr); return (error); } /* Done */ sc->dev = make_dev(&psm_cdevsw, 0, 0, 0, 0666, "psm%d", unit); sc->dev->si_drv1 = sc; sc->bdev = make_dev(&psm_cdevsw, 0, 0, 0, 0666, "bpsm%d", unit); sc->bdev->si_drv1 = sc; #ifdef EVDEV_SUPPORT switch (sc->hw.model) { case MOUSE_MODEL_SYNAPTICS: error = psm_register_synaptics(dev); break; case MOUSE_MODEL_ELANTECH: error = psm_register_elantech(dev); break; default: error = psm_register(dev, sc->hw.model); } if (error) return (error); #endif /* Some touchpad devices need full reinitialization after suspend. */ switch (sc->hw.model) { case MOUSE_MODEL_SYNAPTICS: case MOUSE_MODEL_GLIDEPOINT: case MOUSE_MODEL_VERSAPAD: case MOUSE_MODEL_ELANTECH: sc->config |= PSM_CONFIG_INITAFTERSUSPEND; break; default: if (sc->synhw.infoMajor >= 4 || sc->tphw > 0) sc->config |= PSM_CONFIG_INITAFTERSUSPEND; break; } /* Elantech trackpad`s sync bit differs from touchpad`s one */ if (sc->hw.model == MOUSE_MODEL_ELANTECH && (sc->elanhw.hascrc || sc->elanhw.hastrackpoint)) { sc->config |= PSM_CONFIG_NOCHECKSYNC; sc->flags &= ~PSM_NEED_SYNCBITS; } if (!verbose) printf("psm%d: model %s, device ID %d\n", unit, model_name(sc->hw.model), sc->hw.hwid & 0x00ff); else { printf("psm%d: model %s, device ID %d-%02x, %d buttons\n", unit, model_name(sc->hw.model), sc->hw.hwid & 0x00ff, sc->hw.hwid >> 8, sc->hw.buttons); printf("psm%d: config:%08x, flags:%08x, packet size:%d\n", unit, sc->config, sc->flags, sc->mode.packetsize); printf("psm%d: syncmask:%02x, syncbits:%02x%s\n", unit, sc->mode.syncmask[0], sc->mode.syncmask[1], sc->config & PSM_CONFIG_NOCHECKSYNC ? " (sync not checked)" : ""); } if (bootverbose) --verbose; return (0); } static int psmdetach(device_t dev) { struct psm_softc *sc; int rid; sc = device_get_softc(dev); if (sc->state & PSM_OPEN) return (EBUSY); #ifdef EVDEV_SUPPORT evdev_free(sc->evdev_r); evdev_free(sc->evdev_a); #endif rid = KBDC_RID_AUX; bus_teardown_intr(dev, sc->intr, sc->ih); bus_release_resource(dev, SYS_RES_IRQ, rid, sc->intr); destroy_dev(sc->dev); destroy_dev(sc->bdev); callout_drain(&sc->callout); callout_drain(&sc->softcallout); return (0); } #ifdef EVDEV_SUPPORT static int psm_ev_open_r(struct evdev_dev *evdev, void *ev_softc) { struct psm_softc *sc = (struct psm_softc *)ev_softc; int err = 0; /* Get device data */ if ((sc->state & PSM_VALID) == 0) { /* the device is no longer valid/functioning */ return (ENXIO); } if (!(sc->state & (PSM_OPEN | PSM_EV_OPEN_A))) err = psmopen(sc); if (err == 0) sc->state |= PSM_EV_OPEN_R; return (err); } static void psm_ev_close_r(struct evdev_dev *evdev, void *ev_softc) { struct psm_softc *sc = (struct psm_softc *)ev_softc; sc->state &= ~PSM_EV_OPEN_R; if (sc->state & (PSM_OPEN | PSM_EV_OPEN_A)) return; if (sc->state & PSM_VALID) psmclose(sc); } static int psm_ev_open_a(struct evdev_dev *evdev, void *ev_softc) { struct psm_softc *sc = (struct psm_softc *)ev_softc; int err = 0; /* Get device data */ if ((sc->state & PSM_VALID) == 0) { /* the device is no longer valid/functioning */ return (ENXIO); } if (!(sc->state & (PSM_OPEN | PSM_EV_OPEN_R))) err = psmopen(sc); if (err == 0) sc->state |= PSM_EV_OPEN_A; return (err); } static void psm_ev_close_a(struct evdev_dev *evdev, void *ev_softc) { struct psm_softc *sc = (struct psm_softc *)ev_softc; sc->state &= ~PSM_EV_OPEN_A; if (sc->state & (PSM_OPEN | PSM_EV_OPEN_R)) return; if (sc->state & PSM_VALID) psmclose(sc); } #endif static int psm_cdev_open(struct cdev *dev, int flag, int fmt, struct thread *td) { struct psm_softc *sc; int err = 0; /* Get device data */ sc = dev->si_drv1; if ((sc == NULL) || (sc->state & PSM_VALID) == 0) { /* the device is no longer valid/functioning */ return (ENXIO); } /* Disallow multiple opens */ if (sc->state & PSM_OPEN) return (EBUSY); device_busy(devclass_get_device(psm_devclass, sc->unit)); #ifdef EVDEV_SUPPORT /* Already opened by evdev */ if (!(sc->state & (PSM_EV_OPEN_R | PSM_EV_OPEN_A))) #endif err = psmopen(sc); if (err == 0) sc->state |= PSM_OPEN; else device_unbusy(devclass_get_device(psm_devclass, sc->unit)); return (err); } static int psm_cdev_close(struct cdev *dev, int flag, int fmt, struct thread *td) { struct psm_softc *sc; int err = 0; /* Get device data */ sc = dev->si_drv1; if ((sc == NULL) || (sc->state & PSM_VALID) == 0) { /* the device is no longer valid/functioning */ return (ENXIO); } #ifdef EVDEV_SUPPORT /* Still opened by evdev */ if (!(sc->state & (PSM_EV_OPEN_R | PSM_EV_OPEN_A))) #endif err = psmclose(sc); if (err == 0) { sc->state &= ~PSM_OPEN; /* clean up and sigio requests */ if (sc->async != NULL) { funsetown(&sc->async); sc->async = NULL; } device_unbusy(devclass_get_device(psm_devclass, sc->unit)); } return (err); } static int psmopen(struct psm_softc *sc) { int command_byte; int err; int s; /* Initialize state */ sc->mode.level = sc->dflt_mode.level; sc->mode.protocol = sc->dflt_mode.protocol; sc->watchdog = FALSE; sc->async = NULL; /* flush the event queue */ sc->queue.count = 0; sc->queue.head = 0; sc->queue.tail = 0; sc->status.flags = 0; sc->status.button = 0; sc->status.obutton = 0; sc->status.dx = 0; sc->status.dy = 0; sc->status.dz = 0; sc->button = 0; sc->pqueue_start = 0; sc->pqueue_end = 0; /* empty input buffer */ flushpackets(sc); sc->syncerrors = 0; sc->pkterrors = 0; /* don't let timeout routines in the keyboard driver to poll the kbdc */ if (!kbdc_lock(sc->kbdc, TRUE)) return (EIO); /* save the current controller command byte */ s = spltty(); command_byte = get_controller_command_byte(sc->kbdc); /* enable the aux port and temporalily disable the keyboard */ if (command_byte == -1 || !set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* CONTROLLER ERROR; do you know how to get out of this? */ kbdc_lock(sc->kbdc, FALSE); splx(s); log(LOG_ERR, "psm%d: unable to set the command byte (psmopen).\n", sc->unit); return (EIO); } /* * Now that the keyboard controller is told not to generate * the keyboard and mouse interrupts, call `splx()' to allow * the other tty interrupts. The clock interrupt may also occur, * but timeout routines will be blocked by the poll flag set * via `kbdc_lock()' */ splx(s); /* enable the mouse device */ err = doopen(sc, command_byte); /* done */ kbdc_lock(sc->kbdc, FALSE); return (err); } static int psmclose(struct psm_softc *sc) { int stat[3]; int command_byte; int s; /* don't let timeout routines in the keyboard driver to poll the kbdc */ if (!kbdc_lock(sc->kbdc, TRUE)) return (EIO); /* save the current controller command byte */ s = spltty(); command_byte = get_controller_command_byte(sc->kbdc); if (command_byte == -1) { kbdc_lock(sc->kbdc, FALSE); splx(s); return (EIO); } /* disable the aux interrupt and temporalily disable the keyboard */ if (!set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { log(LOG_ERR, "psm%d: failed to disable the aux int (psmclose).\n", sc->unit); /* CONTROLLER ERROR; * NOTE: we shall force our way through. Because the only * ill effect we shall see is that we may not be able * to read ACK from the mouse, and it doesn't matter much * so long as the mouse will accept the DISABLE command. */ } splx(s); /* stop the watchdog timer */ callout_stop(&sc->callout); /* remove anything left in the output buffer */ empty_aux_buffer(sc->kbdc, 10); /* disable the aux device, port and interrupt */ if (sc->state & PSM_VALID) { if (!disable_aux_dev(sc->kbdc)) { /* MOUSE ERROR; * NOTE: we don't return (error) and continue, * pretending we have successfully disabled the device. * It's OK because the interrupt routine will discard * any data from the mouse hereafter. */ log(LOG_ERR, "psm%d: failed to disable the device (psmclose).\n", sc->unit); } if (get_mouse_status(sc->kbdc, stat, 0, 3) < 3) log(LOG_DEBUG, "psm%d: failed to get status (psmclose).\n", sc->unit); } if (!set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), (command_byte & KBD_KBD_CONTROL_BITS) | KBD_DISABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* * CONTROLLER ERROR; * we shall ignore this error; see the above comment. */ log(LOG_ERR, "psm%d: failed to disable the aux port (psmclose).\n", sc->unit); } /* remove anything left in the output buffer */ empty_aux_buffer(sc->kbdc, 10); /* close is almost always successful */ kbdc_lock(sc->kbdc, FALSE); return (0); } static int tame_mouse(struct psm_softc *sc, packetbuf_t *pb, mousestatus_t *status, u_char *buf) { static u_char butmapps2[8] = { 0, MOUSE_PS2_BUTTON1DOWN, MOUSE_PS2_BUTTON2DOWN, MOUSE_PS2_BUTTON1DOWN | MOUSE_PS2_BUTTON2DOWN, MOUSE_PS2_BUTTON3DOWN, MOUSE_PS2_BUTTON1DOWN | MOUSE_PS2_BUTTON3DOWN, MOUSE_PS2_BUTTON2DOWN | MOUSE_PS2_BUTTON3DOWN, MOUSE_PS2_BUTTON1DOWN | MOUSE_PS2_BUTTON2DOWN | MOUSE_PS2_BUTTON3DOWN, }; static u_char butmapmsc[8] = { MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON2UP | MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON2UP | MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON2UP, MOUSE_MSC_BUTTON2UP, MOUSE_MSC_BUTTON1UP, 0, }; int mapped; int i; if (sc->mode.level == PSM_LEVEL_BASE) { mapped = status->button & ~MOUSE_BUTTON4DOWN; if (status->button & MOUSE_BUTTON4DOWN) mapped |= MOUSE_BUTTON1DOWN; status->button = mapped; buf[0] = MOUSE_PS2_SYNC | butmapps2[mapped & MOUSE_STDBUTTONS]; i = imax(imin(status->dx, 255), -256); if (i < 0) buf[0] |= MOUSE_PS2_XNEG; buf[1] = i; i = imax(imin(status->dy, 255), -256); if (i < 0) buf[0] |= MOUSE_PS2_YNEG; buf[2] = i; return (MOUSE_PS2_PACKETSIZE); } else if (sc->mode.level == PSM_LEVEL_STANDARD) { buf[0] = MOUSE_MSC_SYNC | butmapmsc[status->button & MOUSE_STDBUTTONS]; i = imax(imin(status->dx, 255), -256); buf[1] = i >> 1; buf[3] = i - buf[1]; i = imax(imin(status->dy, 255), -256); buf[2] = i >> 1; buf[4] = i - buf[2]; i = imax(imin(status->dz, 127), -128); buf[5] = (i >> 1) & 0x7f; buf[6] = (i - (i >> 1)) & 0x7f; buf[7] = (~status->button >> 3) & 0x7f; return (MOUSE_SYS_PACKETSIZE); } return (pb->inputbytes); } static int psmread(struct cdev *dev, struct uio *uio, int flag) { struct psm_softc *sc = dev->si_drv1; u_char buf[PSM_SMALLBUFSIZE]; int error = 0; int s; int l; if ((sc->state & PSM_VALID) == 0) return (EIO); /* block until mouse activity occurred */ s = spltty(); while (sc->queue.count <= 0) { if (dev != sc->bdev) { splx(s); return (EWOULDBLOCK); } sc->state |= PSM_ASLP; error = tsleep(sc, PZERO | PCATCH, "psmrea", 0); sc->state &= ~PSM_ASLP; if (error) { splx(s); return (error); } else if ((sc->state & PSM_VALID) == 0) { /* the device disappeared! */ splx(s); return (EIO); } } splx(s); /* copy data to the user land */ while ((sc->queue.count > 0) && (uio->uio_resid > 0)) { s = spltty(); l = imin(sc->queue.count, uio->uio_resid); if (l > sizeof(buf)) l = sizeof(buf); if (l > sizeof(sc->queue.buf) - sc->queue.head) { bcopy(&sc->queue.buf[sc->queue.head], &buf[0], sizeof(sc->queue.buf) - sc->queue.head); bcopy(&sc->queue.buf[0], &buf[sizeof(sc->queue.buf) - sc->queue.head], l - (sizeof(sc->queue.buf) - sc->queue.head)); } else bcopy(&sc->queue.buf[sc->queue.head], &buf[0], l); sc->queue.count -= l; sc->queue.head = (sc->queue.head + l) % sizeof(sc->queue.buf); splx(s); error = uiomove(buf, l, uio); if (error) break; } return (error); } static int block_mouse_data(struct psm_softc *sc, int *c) { int s; if (!kbdc_lock(sc->kbdc, TRUE)) return (EIO); s = spltty(); *c = get_controller_command_byte(sc->kbdc); if ((*c == -1) || !set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) { /* this is CONTROLLER ERROR */ splx(s); kbdc_lock(sc->kbdc, FALSE); return (EIO); } /* * The device may be in the middle of status data transmission. * The transmission will be interrupted, thus, incomplete status * data must be discarded. Although the aux interrupt is disabled * at the keyboard controller level, at most one aux interrupt * may have already been pending and a data byte is in the * output buffer; throw it away. Note that the second argument * to `empty_aux_buffer()' is zero, so that the call will just * flush the internal queue. * `psmintr()' will be invoked after `splx()' if an interrupt is * pending; it will see no data and returns immediately. */ empty_aux_buffer(sc->kbdc, 0); /* flush the queue */ read_aux_data_no_wait(sc->kbdc); /* throw away data if any */ flushpackets(sc); splx(s); return (0); } static void dropqueue(struct psm_softc *sc) { sc->queue.count = 0; sc->queue.head = 0; sc->queue.tail = 0; if ((sc->state & PSM_SOFTARMED) != 0) { sc->state &= ~PSM_SOFTARMED; callout_stop(&sc->softcallout); } sc->pqueue_start = sc->pqueue_end; } static void flushpackets(struct psm_softc *sc) { dropqueue(sc); bzero(&sc->pqueue, sizeof(sc->pqueue)); } static int unblock_mouse_data(struct psm_softc *sc, int c) { int error = 0; /* * We may have seen a part of status data during `set_mouse_XXX()'. * they have been queued; flush it. */ empty_aux_buffer(sc->kbdc, 0); /* restore ports and interrupt */ if (!set_controller_command_byte(sc->kbdc, kbdc_get_device_mask(sc->kbdc), c & (KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS))) { /* * CONTROLLER ERROR; this is serious, we may have * been left with the inaccessible keyboard and * the disabled mouse interrupt. */ error = EIO; } kbdc_lock(sc->kbdc, FALSE); return (error); } static int psmwrite(struct cdev *dev, struct uio *uio, int flag) { struct psm_softc *sc = dev->si_drv1; u_char buf[PSM_SMALLBUFSIZE]; int error = 0, i, l; if ((sc->state & PSM_VALID) == 0) return (EIO); if (sc->mode.level < PSM_LEVEL_NATIVE) return (ENODEV); /* copy data from the user land */ while (uio->uio_resid > 0) { l = imin(PSM_SMALLBUFSIZE, uio->uio_resid); error = uiomove(buf, l, uio); if (error) break; for (i = 0; i < l; i++) { VLOG(4, (LOG_DEBUG, "psm: cmd 0x%x\n", buf[i])); if (!write_aux_command(sc->kbdc, buf[i])) { VLOG(2, (LOG_DEBUG, "psm: cmd 0x%x failed.\n", buf[i])); return (reinitialize(sc, FALSE)); } } } return (error); } static int psmioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct psm_softc *sc = dev->si_drv1; mousemode_t mode; mousestatus_t status; -#if (defined(MOUSE_GETVARS)) - mousevar_t *var; -#endif mousedata_t *data; int stat[3]; int command_byte; int error = 0; int s; /* Perform IOCTL command */ switch (cmd) { case OLD_MOUSE_GETHWINFO: s = spltty(); ((old_mousehw_t *)addr)->buttons = sc->hw.buttons; ((old_mousehw_t *)addr)->iftype = sc->hw.iftype; ((old_mousehw_t *)addr)->type = sc->hw.type; ((old_mousehw_t *)addr)->hwid = sc->hw.hwid & 0x00ff; splx(s); break; case MOUSE_GETHWINFO: s = spltty(); *(mousehw_t *)addr = sc->hw; if (sc->mode.level == PSM_LEVEL_BASE) ((mousehw_t *)addr)->model = MOUSE_MODEL_GENERIC; splx(s); break; case MOUSE_SYN_GETHWINFO: s = spltty(); if (sc->synhw.infoMajor >= 4) *(synapticshw_t *)addr = sc->synhw; else error = EINVAL; splx(s); break; case OLD_MOUSE_GETMODE: s = spltty(); switch (sc->mode.level) { case PSM_LEVEL_BASE: ((old_mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2; break; case PSM_LEVEL_STANDARD: ((old_mousemode_t *)addr)->protocol = MOUSE_PROTO_SYSMOUSE; break; case PSM_LEVEL_NATIVE: ((old_mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2; break; } ((old_mousemode_t *)addr)->rate = sc->mode.rate; ((old_mousemode_t *)addr)->resolution = sc->mode.resolution; ((old_mousemode_t *)addr)->accelfactor = sc->mode.accelfactor; splx(s); break; case MOUSE_GETMODE: s = spltty(); *(mousemode_t *)addr = sc->mode; if ((sc->flags & PSM_NEED_SYNCBITS) != 0) { ((mousemode_t *)addr)->syncmask[0] = 0; ((mousemode_t *)addr)->syncmask[1] = 0; } ((mousemode_t *)addr)->resolution = MOUSE_RES_LOW - sc->mode.resolution; switch (sc->mode.level) { case PSM_LEVEL_BASE: ((mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2; ((mousemode_t *)addr)->packetsize = MOUSE_PS2_PACKETSIZE; break; case PSM_LEVEL_STANDARD: ((mousemode_t *)addr)->protocol = MOUSE_PROTO_SYSMOUSE; ((mousemode_t *)addr)->packetsize = MOUSE_SYS_PACKETSIZE; ((mousemode_t *)addr)->syncmask[0] = MOUSE_SYS_SYNCMASK; ((mousemode_t *)addr)->syncmask[1] = MOUSE_SYS_SYNC; break; case PSM_LEVEL_NATIVE: /* FIXME: this isn't quite correct... XXX */ ((mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2; break; } splx(s); break; case OLD_MOUSE_SETMODE: case MOUSE_SETMODE: if (cmd == OLD_MOUSE_SETMODE) { mode.rate = ((old_mousemode_t *)addr)->rate; /* * resolution old I/F new I/F * default 0 0 * low 1 -2 * medium low 2 -3 * medium high 3 -4 * high 4 -5 */ if (((old_mousemode_t *)addr)->resolution > 0) mode.resolution = -((old_mousemode_t *)addr)->resolution - 1; else mode.resolution = 0; mode.accelfactor = ((old_mousemode_t *)addr)->accelfactor; mode.level = -1; } else mode = *(mousemode_t *)addr; /* adjust and validate parameters. */ if (mode.rate > UCHAR_MAX) return (EINVAL); if (mode.rate == 0) mode.rate = sc->dflt_mode.rate; else if (mode.rate == -1) /* don't change the current setting */ ; else if (mode.rate < 0) return (EINVAL); if (mode.resolution >= UCHAR_MAX) return (EINVAL); if (mode.resolution >= 200) mode.resolution = MOUSE_RES_HIGH; else if (mode.resolution >= 100) mode.resolution = MOUSE_RES_MEDIUMHIGH; else if (mode.resolution >= 50) mode.resolution = MOUSE_RES_MEDIUMLOW; else if (mode.resolution > 0) mode.resolution = MOUSE_RES_LOW; if (mode.resolution == MOUSE_RES_DEFAULT) mode.resolution = sc->dflt_mode.resolution; else if (mode.resolution == -1) /* don't change the current setting */ ; else if (mode.resolution < 0) /* MOUSE_RES_LOW/MEDIUM/HIGH */ mode.resolution = MOUSE_RES_LOW - mode.resolution; if (mode.level == -1) /* don't change the current setting */ mode.level = sc->mode.level; else if ((mode.level < PSM_LEVEL_MIN) || (mode.level > PSM_LEVEL_MAX)) return (EINVAL); if (mode.accelfactor == -1) /* don't change the current setting */ mode.accelfactor = sc->mode.accelfactor; else if (mode.accelfactor < 0) return (EINVAL); /* don't allow anybody to poll the keyboard controller */ error = block_mouse_data(sc, &command_byte); if (error) return (error); /* set mouse parameters */ if (mode.rate > 0) mode.rate = set_mouse_sampling_rate(sc->kbdc, mode.rate); if (mode.resolution >= 0) mode.resolution = set_mouse_resolution(sc->kbdc, mode.resolution); set_mouse_scaling(sc->kbdc, 1); get_mouse_status(sc->kbdc, stat, 0, 3); s = spltty(); sc->mode.rate = mode.rate; sc->mode.resolution = mode.resolution; sc->mode.accelfactor = mode.accelfactor; sc->mode.level = mode.level; splx(s); unblock_mouse_data(sc, command_byte); break; case MOUSE_GETLEVEL: *(int *)addr = sc->mode.level; break; case MOUSE_SETLEVEL: if ((*(int *)addr < PSM_LEVEL_MIN) || (*(int *)addr > PSM_LEVEL_MAX)) return (EINVAL); sc->mode.level = *(int *)addr; break; case MOUSE_GETSTATUS: s = spltty(); status = sc->status; sc->status.flags = 0; sc->status.obutton = sc->status.button; sc->status.button = 0; sc->status.dx = 0; sc->status.dy = 0; sc->status.dz = 0; splx(s); *(mousestatus_t *)addr = status; break; - -#if (defined(MOUSE_GETVARS)) - case MOUSE_GETVARS: - var = (mousevar_t *)addr; - bzero(var, sizeof(*var)); - s = spltty(); - var->var[0] = MOUSE_VARS_PS2_SIG; - var->var[1] = sc->config; - var->var[2] = sc->flags; - splx(s); - break; - - case MOUSE_SETVARS: - return (ENODEV); -#endif /* MOUSE_GETVARS */ case MOUSE_READSTATE: case MOUSE_READDATA: data = (mousedata_t *)addr; if (data->len > sizeof(data->buf)/sizeof(data->buf[0])) return (EINVAL); error = block_mouse_data(sc, &command_byte); if (error) return (error); if ((data->len = get_mouse_status(sc->kbdc, data->buf, (cmd == MOUSE_READDATA) ? 1 : 0, data->len)) <= 0) error = EIO; unblock_mouse_data(sc, command_byte); break; #if (defined(MOUSE_SETRESOLUTION)) case MOUSE_SETRESOLUTION: mode.resolution = *(int *)addr; if (mode.resolution >= UCHAR_MAX) return (EINVAL); else if (mode.resolution >= 200) mode.resolution = MOUSE_RES_HIGH; else if (mode.resolution >= 100) mode.resolution = MOUSE_RES_MEDIUMHIGH; else if (mode.resolution >= 50) mode.resolution = MOUSE_RES_MEDIUMLOW; else if (mode.resolution > 0) mode.resolution = MOUSE_RES_LOW; if (mode.resolution == MOUSE_RES_DEFAULT) mode.resolution = sc->dflt_mode.resolution; else if (mode.resolution == -1) mode.resolution = sc->mode.resolution; else if (mode.resolution < 0) /* MOUSE_RES_LOW/MEDIUM/HIGH */ mode.resolution = MOUSE_RES_LOW - mode.resolution; error = block_mouse_data(sc, &command_byte); if (error) return (error); sc->mode.resolution = set_mouse_resolution(sc->kbdc, mode.resolution); if (sc->mode.resolution != mode.resolution) error = EIO; unblock_mouse_data(sc, command_byte); break; #endif /* MOUSE_SETRESOLUTION */ #if (defined(MOUSE_SETRATE)) case MOUSE_SETRATE: mode.rate = *(int *)addr; if (mode.rate > UCHAR_MAX) return (EINVAL); if (mode.rate == 0) mode.rate = sc->dflt_mode.rate; else if (mode.rate < 0) mode.rate = sc->mode.rate; error = block_mouse_data(sc, &command_byte); if (error) return (error); sc->mode.rate = set_mouse_sampling_rate(sc->kbdc, mode.rate); if (sc->mode.rate != mode.rate) error = EIO; unblock_mouse_data(sc, command_byte); break; #endif /* MOUSE_SETRATE */ #if (defined(MOUSE_SETSCALING)) case MOUSE_SETSCALING: if ((*(int *)addr <= 0) || (*(int *)addr > 2)) return (EINVAL); error = block_mouse_data(sc, &command_byte); if (error) return (error); if (!set_mouse_scaling(sc->kbdc, *(int *)addr)) error = EIO; unblock_mouse_data(sc, command_byte); break; #endif /* MOUSE_SETSCALING */ #if (defined(MOUSE_GETHWID)) case MOUSE_GETHWID: error = block_mouse_data(sc, &command_byte); if (error) return (error); sc->hw.hwid &= ~0x00ff; sc->hw.hwid |= get_aux_id(sc->kbdc); *(int *)addr = sc->hw.hwid & 0x00ff; unblock_mouse_data(sc, command_byte); break; #endif /* MOUSE_GETHWID */ case FIONBIO: case FIOASYNC: break; case FIOSETOWN: error = fsetown(*(int *)addr, &sc->async); break; case FIOGETOWN: *(int *) addr = fgetown(&sc->async); break; default: return (ENOTTY); } return (error); } static void psmtimeout(void *arg) { struct psm_softc *sc; int s; sc = (struct psm_softc *)arg; s = spltty(); if (sc->watchdog && kbdc_lock(sc->kbdc, TRUE)) { VLOG(6, (LOG_DEBUG, "psm%d: lost interrupt?\n", sc->unit)); psmintr(sc); kbdc_lock(sc->kbdc, FALSE); } sc->watchdog = TRUE; splx(s); callout_reset(&sc->callout, hz, psmtimeout, sc); } /* Add all sysctls under the debug.psm and hw.psm nodes */ static SYSCTL_NODE(_debug, OID_AUTO, psm, CTLFLAG_RD, 0, "ps/2 mouse"); static SYSCTL_NODE(_hw, OID_AUTO, psm, CTLFLAG_RD, 0, "ps/2 mouse"); SYSCTL_INT(_debug_psm, OID_AUTO, loglevel, CTLFLAG_RWTUN, &verbose, 0, "Verbosity level"); static int psmhz = 20; SYSCTL_INT(_debug_psm, OID_AUTO, hz, CTLFLAG_RW, &psmhz, 0, "Frequency of the softcallout (in hz)"); static int psmerrsecs = 2; SYSCTL_INT(_debug_psm, OID_AUTO, errsecs, CTLFLAG_RW, &psmerrsecs, 0, "Number of seconds during which packets will dropped after a sync error"); static int psmerrusecs = 0; SYSCTL_INT(_debug_psm, OID_AUTO, errusecs, CTLFLAG_RW, &psmerrusecs, 0, "Microseconds to add to psmerrsecs"); static int psmsecs = 0; SYSCTL_INT(_debug_psm, OID_AUTO, secs, CTLFLAG_RW, &psmsecs, 0, "Max number of seconds between soft interrupts"); static int psmusecs = 500000; SYSCTL_INT(_debug_psm, OID_AUTO, usecs, CTLFLAG_RW, &psmusecs, 0, "Microseconds to add to psmsecs"); static int pkterrthresh = 2; SYSCTL_INT(_debug_psm, OID_AUTO, pkterrthresh, CTLFLAG_RW, &pkterrthresh, 0, "Number of error packets allowed before reinitializing the mouse"); SYSCTL_INT(_hw_psm, OID_AUTO, tap_enabled, CTLFLAG_RWTUN, &tap_enabled, 0, "Enable tap and drag gestures"); static int tap_threshold = PSM_TAP_THRESHOLD; SYSCTL_INT(_hw_psm, OID_AUTO, tap_threshold, CTLFLAG_RW, &tap_threshold, 0, "Button tap threshold"); static int tap_timeout = PSM_TAP_TIMEOUT; SYSCTL_INT(_hw_psm, OID_AUTO, tap_timeout, CTLFLAG_RW, &tap_timeout, 0, "Tap timeout for touchpads"); /* Tunables */ SYSCTL_INT(_hw_psm, OID_AUTO, synaptics_support, CTLFLAG_RDTUN, &synaptics_support, 0, "Enable support for Synaptics touchpads"); SYSCTL_INT(_hw_psm, OID_AUTO, trackpoint_support, CTLFLAG_RDTUN, &trackpoint_support, 0, "Enable support for IBM/Lenovo TrackPoint"); SYSCTL_INT(_hw_psm, OID_AUTO, elantech_support, CTLFLAG_RDTUN, &elantech_support, 0, "Enable support for Elantech touchpads"); static void psmintr(void *arg) { struct psm_softc *sc = arg; struct timeval now; int c; packetbuf_t *pb; /* read until there is nothing to read */ while((c = read_aux_data_no_wait(sc->kbdc)) != -1) { pb = &sc->pqueue[sc->pqueue_end]; /* discard the byte if the device is not open */ if (!(sc->state & (PSM_OPEN | PSM_EV_OPEN_R | PSM_EV_OPEN_A))) continue; getmicrouptime(&now); if ((pb->inputbytes > 0) && timevalcmp(&now, &sc->inputtimeout, >)) { VLOG(3, (LOG_DEBUG, "psmintr: delay too long; " "resetting byte count\n")); pb->inputbytes = 0; sc->syncerrors = 0; sc->pkterrors = 0; } sc->inputtimeout.tv_sec = PSM_INPUT_TIMEOUT / 1000000; sc->inputtimeout.tv_usec = PSM_INPUT_TIMEOUT % 1000000; timevaladd(&sc->inputtimeout, &now); pb->ipacket[pb->inputbytes++] = c; if (sc->mode.level == PSM_LEVEL_NATIVE) { VLOG(4, (LOG_DEBUG, "psmintr: %02x\n", pb->ipacket[0])); sc->syncerrors = 0; sc->pkterrors = 0; goto next; } else { if (pb->inputbytes < sc->mode.packetsize) continue; VLOG(4, (LOG_DEBUG, "psmintr: %02x %02x %02x %02x %02x %02x\n", pb->ipacket[0], pb->ipacket[1], pb->ipacket[2], pb->ipacket[3], pb->ipacket[4], pb->ipacket[5])); } c = pb->ipacket[0]; if ((sc->flags & PSM_NEED_SYNCBITS) != 0) { sc->mode.syncmask[1] = (c & sc->mode.syncmask[0]); sc->flags &= ~PSM_NEED_SYNCBITS; VLOG(2, (LOG_DEBUG, "psmintr: Sync bytes now %04x,%04x\n", sc->mode.syncmask[0], sc->mode.syncmask[1])); } else if ((sc->config & PSM_CONFIG_NOCHECKSYNC) == 0 && (c & sc->mode.syncmask[0]) != sc->mode.syncmask[1]) { VLOG(3, (LOG_DEBUG, "psmintr: out of sync " "(%04x != %04x) %d cmds since last error.\n", c & sc->mode.syncmask[0], sc->mode.syncmask[1], sc->cmdcount - sc->lasterr)); sc->lasterr = sc->cmdcount; /* * The sync byte test is a weak measure of packet * validity. Conservatively discard any input yet * to be seen by userland when we detect a sync * error since there is a good chance some of * the queued packets have undetected errors. */ dropqueue(sc); if (sc->syncerrors == 0) sc->pkterrors++; ++sc->syncerrors; sc->lastinputerr = now; if (sc->syncerrors >= sc->mode.packetsize * 2 || sc->pkterrors >= pkterrthresh) { /* * If we've failed to find a single sync byte * in 2 packets worth of data, or we've seen * persistent packet errors during the * validation period, reinitialize the mouse * in hopes of returning it to the expected * mode. */ VLOG(3, (LOG_DEBUG, "psmintr: reset the mouse.\n")); reinitialize(sc, TRUE); } else if (sc->syncerrors == sc->mode.packetsize) { /* * Try a soft reset after searching for a sync * byte through a packet length of bytes. */ VLOG(3, (LOG_DEBUG, "psmintr: re-enable the mouse.\n")); pb->inputbytes = 0; disable_aux_dev(sc->kbdc); enable_aux_dev(sc->kbdc); } else { VLOG(3, (LOG_DEBUG, "psmintr: discard a byte (%d)\n", sc->syncerrors)); pb->inputbytes--; bcopy(&pb->ipacket[1], &pb->ipacket[0], pb->inputbytes); } continue; } /* * We have what appears to be a valid packet. * Reset the error counters. */ sc->syncerrors = 0; /* * Drop even good packets if they occur within a timeout * period of a sync error. This allows the detection of * a change in the mouse's packet mode without exposing * erratic mouse behavior to the user. Some KVMs forget * enhanced mouse modes during switch events. */ if (!timeelapsed(&sc->lastinputerr, psmerrsecs, psmerrusecs, &now)) { pb->inputbytes = 0; continue; } /* * Now that we're out of the validation period, reset * the packet error count. */ sc->pkterrors = 0; sc->cmdcount++; next: if (++sc->pqueue_end >= PSM_PACKETQUEUE) sc->pqueue_end = 0; /* * If we've filled the queue then call the softintr ourselves, * otherwise schedule the interrupt for later. */ if (!timeelapsed(&sc->lastsoftintr, psmsecs, psmusecs, &now) || (sc->pqueue_end == sc->pqueue_start)) { if ((sc->state & PSM_SOFTARMED) != 0) { sc->state &= ~PSM_SOFTARMED; callout_stop(&sc->softcallout); } psmsoftintr(arg); } else if ((sc->state & PSM_SOFTARMED) == 0) { sc->state |= PSM_SOFTARMED; callout_reset(&sc->softcallout, psmhz < 1 ? 1 : (hz/psmhz), psmsoftintr, arg); } } } static void proc_mmanplus(struct psm_softc *sc, packetbuf_t *pb, mousestatus_t *ms, int *x, int *y, int *z) { /* * PS2++ protocol packet * * b7 b6 b5 b4 b3 b2 b1 b0 * byte 1: * 1 p3 p2 1 * * * * byte 2: c1 c2 p1 p0 d1 d0 1 0 * * p3-p0: packet type * c1, c2: c1 & c2 == 1, if p2 == 0 * c1 & c2 == 0, if p2 == 1 * * packet type: 0 (device type) * See comments in enable_mmanplus() below. * * packet type: 1 (wheel data) * * b7 b6 b5 b4 b3 b2 b1 b0 * byte 3: h * B5 B4 s d2 d1 d0 * * h: 1, if horizontal roller data * 0, if vertical roller data * B4, B5: button 4 and 5 * s: sign bit * d2-d0: roller data * * packet type: 2 (reserved) */ if (((pb->ipacket[0] & MOUSE_PS2PLUS_SYNCMASK) == MOUSE_PS2PLUS_SYNC) && (abs(*x) > 191) && MOUSE_PS2PLUS_CHECKBITS(pb->ipacket)) { /* * the extended data packet encodes button * and wheel events */ switch (MOUSE_PS2PLUS_PACKET_TYPE(pb->ipacket)) { case 1: /* wheel data packet */ *x = *y = 0; if (pb->ipacket[2] & 0x80) { /* XXX horizontal roller count - ignore it */ ; } else { /* vertical roller count */ *z = (pb->ipacket[2] & MOUSE_PS2PLUS_ZNEG) ? (pb->ipacket[2] & 0x0f) - 16 : (pb->ipacket[2] & 0x0f); } ms->button |= (pb->ipacket[2] & MOUSE_PS2PLUS_BUTTON4DOWN) ? MOUSE_BUTTON4DOWN : 0; ms->button |= (pb->ipacket[2] & MOUSE_PS2PLUS_BUTTON5DOWN) ? MOUSE_BUTTON5DOWN : 0; break; case 2: /* * this packet type is reserved by * Logitech... */ /* * IBM ScrollPoint Mouse uses this * packet type to encode both vertical * and horizontal scroll movement. */ *x = *y = 0; /* horizontal count */ if (pb->ipacket[2] & 0x0f) *z = (pb->ipacket[2] & MOUSE_SPOINT_WNEG) ? -2 : 2; /* vertical count */ if (pb->ipacket[2] & 0xf0) *z = (pb->ipacket[2] & MOUSE_SPOINT_ZNEG) ? -1 : 1; break; case 0: /* device type packet - shouldn't happen */ /* FALLTHROUGH */ default: *x = *y = 0; ms->button = ms->obutton; VLOG(1, (LOG_DEBUG, "psmintr: unknown PS2++ packet " "type %d: 0x%02x 0x%02x 0x%02x\n", MOUSE_PS2PLUS_PACKET_TYPE(pb->ipacket), pb->ipacket[0], pb->ipacket[1], pb->ipacket[2])); break; } } else { /* preserve button states */ ms->button |= ms->obutton & MOUSE_EXTBUTTONS; } } static int proc_synaptics(struct psm_softc *sc, packetbuf_t *pb, mousestatus_t *ms, int *x, int *y, int *z) { static int touchpad_buttons; static int guest_buttons; static finger_t f[PSM_FINGERS]; int w, id, nfingers, ewcode, extended_buttons, clickpad_pressed; extended_buttons = 0; /* TouchPad PS/2 absolute mode message format with capFourButtons: * * Bits: 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------------ * ipacket[0]: 1 0 W3 W2 0 W1 R L * ipacket[1]: Yb Ya Y9 Y8 Xb Xa X9 X8 * ipacket[2]: Z7 Z6 Z5 Z4 Z3 Z2 Z1 Z0 * ipacket[3]: 1 1 Yc Xc 0 W0 D^R U^L * ipacket[4]: X7 X6 X5 X4 X3 X2 X1 X0 * ipacket[5]: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * * Legend: * L: left physical mouse button * R: right physical mouse button * D: down button * U: up button * W: "wrist" value * X: x position * Y: y position * Z: pressure * * Without capFourButtons but with nExtendeButtons and/or capMiddle * * Bits: 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------------------ * ipacket[3]: 1 1 Yc Xc 0 W0 E^R M^L * ipacket[4]: X7 X6 X5 X4 X3|b7 X2|b5 X1|b3 X0|b1 * ipacket[5]: Y7 Y6 Y5 Y4 Y3|b8 Y2|b6 Y1|b4 Y0|b2 * * Legend: * M: Middle physical mouse button * E: Extended mouse buttons reported instead of low bits of X and Y * b1-b8: Extended mouse buttons * Only ((nExtendedButtons + 1) >> 1) bits are used in packet * 4 and 5, for reading X and Y value they should be zeroed. * * Absolute reportable limits: 0 - 6143. * Typical bezel limits: 1472 - 5472. * Typical edge marings: 1632 - 5312. * * w = 3 Passthrough Packet * * Byte 2,5,6 == Byte 1,2,3 of "Guest" */ if (!synaptics_support) return (0); /* Sanity check for out of sync packets. */ if ((pb->ipacket[0] & 0xc8) != 0x80 || (pb->ipacket[3] & 0xc8) != 0xc0) return (-1); *x = *y = 0; ms->button = ms->obutton; /* * Pressure value. * Interpretation: * z = 0 No finger contact * z = 10 Finger hovering near the pad * z = 30 Very light finger contact * z = 80 Normal finger contact * z = 110 Very heavy finger contact * z = 200 Finger lying flat on pad surface * z = 255 Maximum reportable Z */ *z = pb->ipacket[2]; /* * Finger width value * Interpretation: * w = 0 Two finger on the pad (capMultiFinger needed) * w = 1 Three or more fingers (capMultiFinger needed) * w = 2 Pen (instead of finger) (capPen needed) * w = 3 Reserved (passthrough?) * w = 4-7 Finger of normal width (capPalmDetect needed) * w = 8-14 Very wide finger or palm (capPalmDetect needed) * w = 15 Maximum reportable width (capPalmDetect needed) */ /* XXX Is checking capExtended enough? */ if (sc->synhw.capExtended) w = ((pb->ipacket[0] & 0x30) >> 2) | ((pb->ipacket[0] & 0x04) >> 1) | ((pb->ipacket[3] & 0x04) >> 2); else { /* Assume a finger of regular width. */ w = 4; } switch (w) { case 3: /* * Handle packets from the guest device. See: * Synaptics PS/2 TouchPad Interfacing Guide, Section 5.1 */ if (sc->synhw.capPassthrough) { *x = ((pb->ipacket[1] & 0x10) ? pb->ipacket[4] - 256 : pb->ipacket[4]); *y = ((pb->ipacket[1] & 0x20) ? pb->ipacket[5] - 256 : pb->ipacket[5]); *z = 0; guest_buttons = 0; if (pb->ipacket[1] & 0x01) guest_buttons |= MOUSE_BUTTON1DOWN; if (pb->ipacket[1] & 0x04) guest_buttons |= MOUSE_BUTTON2DOWN; if (pb->ipacket[1] & 0x02) guest_buttons |= MOUSE_BUTTON3DOWN; #ifdef EVDEV_SUPPORT if (evdev_rcpt_mask & EVDEV_RCPT_HW_MOUSE) { evdev_push_rel(sc->evdev_r, REL_X, *x); evdev_push_rel(sc->evdev_r, REL_Y, -*y); evdev_push_mouse_btn(sc->evdev_r, guest_buttons); evdev_sync(sc->evdev_r); } #endif ms->button = touchpad_buttons | guest_buttons | sc->extended_buttons; } goto SYNAPTICS_END; case 2: /* Handle Extended W mode packets */ ewcode = (pb->ipacket[5] & 0xf0) >> 4; #if PSM_FINGERS > 1 switch (ewcode) { case 1: /* Secondary finger */ if (sc->synhw.capAdvancedGestures) f[1] = (finger_t) { .x = (((pb->ipacket[4] & 0x0f) << 8) | pb->ipacket[1]) << 1, .y = (((pb->ipacket[4] & 0xf0) << 4) | pb->ipacket[2]) << 1, .p = ((pb->ipacket[3] & 0x30) | (pb->ipacket[5] & 0x0f)) << 1, .w = PSM_FINGER_DEFAULT_W, .flags = PSM_FINGER_FUZZY, }; else if (sc->synhw.capReportsV) f[1] = (finger_t) { .x = (((pb->ipacket[4] & 0x0f) << 8) | (pb->ipacket[1] & 0xfe)) << 1, .y = (((pb->ipacket[4] & 0xf0) << 4) | (pb->ipacket[2] & 0xfe)) << 1, .p = ((pb->ipacket[3] & 0x30) | (pb->ipacket[5] & 0x0e)) << 1, .w = (((pb->ipacket[5] & 0x01) << 2) | ((pb->ipacket[2] & 0x01) << 1) | (pb->ipacket[1] & 0x01)) + 8, .flags = PSM_FINGER_FUZZY, }; default: break; } #endif goto SYNAPTICS_END; case 1: case 0: nfingers = w + 2; break; default: nfingers = 1; } if (sc->syninfo.touchpad_off) goto SYNAPTICS_END; /* Button presses */ touchpad_buttons = 0; if (pb->ipacket[0] & 0x01) touchpad_buttons |= MOUSE_BUTTON1DOWN; if (pb->ipacket[0] & 0x02) touchpad_buttons |= MOUSE_BUTTON3DOWN; if (sc->synhw.capExtended && sc->synhw.capFourButtons) { if ((pb->ipacket[3] ^ pb->ipacket[0]) & 0x01) touchpad_buttons |= MOUSE_BUTTON4DOWN; if ((pb->ipacket[3] ^ pb->ipacket[0]) & 0x02) touchpad_buttons |= MOUSE_BUTTON5DOWN; } else if (sc->synhw.capExtended && sc->synhw.capMiddle && !sc->synhw.capClickPad) { /* Middle Button */ if ((pb->ipacket[0] ^ pb->ipacket[3]) & 0x01) touchpad_buttons |= MOUSE_BUTTON2DOWN; } else if (sc->synhw.capExtended && (sc->synhw.nExtendedButtons > 0)) { /* Extended Buttons */ if ((pb->ipacket[0] ^ pb->ipacket[3]) & 0x02) { if (sc->syninfo.directional_scrolls) { if (pb->ipacket[4] & 0x01) extended_buttons |= MOUSE_BUTTON4DOWN; if (pb->ipacket[5] & 0x01) extended_buttons |= MOUSE_BUTTON5DOWN; if (pb->ipacket[4] & 0x02) extended_buttons |= MOUSE_BUTTON6DOWN; if (pb->ipacket[5] & 0x02) extended_buttons |= MOUSE_BUTTON7DOWN; } else { if (pb->ipacket[4] & 0x01) extended_buttons |= MOUSE_BUTTON1DOWN; if (pb->ipacket[5] & 0x01) extended_buttons |= MOUSE_BUTTON3DOWN; if (pb->ipacket[4] & 0x02) extended_buttons |= MOUSE_BUTTON2DOWN; sc->extended_buttons = extended_buttons; } /* * Zero out bits used by extended buttons to avoid * misinterpretation of the data absolute position. * * The bits represented by * * (nExtendedButtons + 1) >> 1 * * will be masked out in both bytes. * The mask for n bits is computed with the formula * * (1 << n) - 1 */ int maskedbits = 0; int mask = 0; maskedbits = (sc->synhw.nExtendedButtons + 1) >> 1; mask = (1 << maskedbits) - 1; #ifdef EVDEV_SUPPORT int i; if (evdev_rcpt_mask & EVDEV_RCPT_HW_MOUSE) { if (sc->synhw.capPassthrough) { evdev_push_mouse_btn(sc->evdev_r, extended_buttons); evdev_sync(sc->evdev_r); } for (i = 0; i < maskedbits; i++) { evdev_push_key(sc->evdev_a, BTN_0 + i * 2, pb->ipacket[4] & (1 << i)); evdev_push_key(sc->evdev_a, BTN_0 + i * 2 + 1, pb->ipacket[5] & (1 << i)); } } #endif pb->ipacket[4] &= ~(mask); pb->ipacket[5] &= ~(mask); } else if (!sc->syninfo.directional_scrolls && !sc->gesture.in_vscroll) { /* * Keep reporting MOUSE DOWN until we get a new packet * indicating otherwise. */ extended_buttons |= sc->extended_buttons; } } if (sc->synhw.capReportsV && nfingers > 1) f[0] = (finger_t) { .x = ((pb->ipacket[3] & 0x10) << 8) | ((pb->ipacket[1] & 0x0f) << 8) | (pb->ipacket[4] & 0xfd), .y = ((pb->ipacket[3] & 0x20) << 7) | ((pb->ipacket[1] & 0xf0) << 4) | (pb->ipacket[5] & 0xfd), .p = *z & 0xfe, .w = (((pb->ipacket[2] & 0x01) << 2) | (pb->ipacket[5] & 0x02) | ((pb->ipacket[4] & 0x02) >> 1)) + 8, .flags = PSM_FINGER_FUZZY, }; else f[0] = (finger_t) { .x = ((pb->ipacket[3] & 0x10) << 8) | ((pb->ipacket[1] & 0x0f) << 8) | pb->ipacket[4], .y = ((pb->ipacket[3] & 0x20) << 7) | ((pb->ipacket[1] & 0xf0) << 4) | pb->ipacket[5], .p = *z, .w = w, .flags = nfingers > 1 ? PSM_FINGER_FUZZY : 0, }; /* Ignore hovering and unmeasurable touches */ if (f[0].p < sc->syninfo.min_pressure || f[0].x < 2) nfingers = 0; /* Handle ClickPad */ if (sc->synhw.capClickPad) { clickpad_pressed = (pb->ipacket[0] ^ pb->ipacket[3]) & 0x01; if (sc->synhw.forcePad) { /* * Forcepads erroneously report button click if there * are 2 or more fingers on the touchpad breaking * multifinger gestures. To workaround this start * reporting a click only after 4 consecutive single * touch packets has been received. * Skip these packets in case more contacts appear. */ switch (nfingers) { case 0: sc->fpcount = 0; break; case 1: if (clickpad_pressed && sc->fpcount < INT_MAX) ++sc->fpcount; /* FALLTHROUGH */ default: if (!clickpad_pressed) sc->fpcount = 0; if (sc->fpcount >= sc->syninfo.window_min) touchpad_buttons |= MOUSE_BUTTON1DOWN; } } else if (clickpad_pressed) touchpad_buttons |= MOUSE_BUTTON1DOWN; } for (id = 0; id < PSM_FINGERS; id++) if (id >= nfingers) PSM_FINGER_RESET(f[id]); #ifdef EVDEV_SUPPORT if (evdev_rcpt_mask & EVDEV_RCPT_HW_MOUSE) { for (id = 0; id < PSM_FINGERS; id++) { if (PSM_FINGER_IS_SET(f[id])) psm_push_mt_finger(sc, id, &f[id]); else psm_release_mt_slot(sc->evdev_a, id); } evdev_push_key(sc->evdev_a, BTN_TOUCH, nfingers > 0); evdev_push_nfingers(sc->evdev_a, nfingers); if (nfingers > 0) psm_push_st_finger(sc, &f[0]); else evdev_push_abs(sc->evdev_a, ABS_PRESSURE, 0); evdev_push_mouse_btn(sc->evdev_a, touchpad_buttons); if (sc->synhw.capExtended && sc->synhw.capFourButtons) { evdev_push_key(sc->evdev_a, BTN_FORWARD, touchpad_buttons & MOUSE_BUTTON4DOWN); evdev_push_key(sc->evdev_a, BTN_BACK, touchpad_buttons & MOUSE_BUTTON5DOWN); } evdev_sync(sc->evdev_a); } #endif ms->button = touchpad_buttons; psmgestures(sc, &f[0], nfingers, ms); for (id = 0; id < PSM_FINGERS; id++) psmsmoother(sc, &f[id], id, ms, x, y); /* Palm detection doesn't terminate the current action. */ if (psmpalmdetect(sc, &f[0], nfingers)) { *x = *y = *z = 0; ms->button = ms->obutton; return (0); } ms->button |= extended_buttons | guest_buttons; SYNAPTICS_END: /* * Use the extra buttons as a scrollwheel * * XXX X.Org uses the Z axis for vertical wheel only, * whereas moused(8) understands special values to differ * vertical and horizontal wheels. * * xf86-input-mouse needs therefore a small patch to * understand these special values. Without it, the * horizontal wheel acts as a vertical wheel in X.Org. * * That's why the horizontal wheel is disabled by * default for now. */ if (ms->button & MOUSE_BUTTON4DOWN) *z = -1; else if (ms->button & MOUSE_BUTTON5DOWN) *z = 1; else if (ms->button & MOUSE_BUTTON6DOWN) *z = -2; else if (ms->button & MOUSE_BUTTON7DOWN) *z = 2; else *z = 0; ms->button &= ~(MOUSE_BUTTON4DOWN | MOUSE_BUTTON5DOWN | MOUSE_BUTTON6DOWN | MOUSE_BUTTON7DOWN); return (0); } static int psmpalmdetect(struct psm_softc *sc, finger_t *f, int nfingers) { if (!( ((sc->synhw.capMultiFinger || sc->synhw.capAdvancedGestures) && !sc->synhw.capReportsV && nfingers > 1) || (sc->synhw.capReportsV && nfingers > 2) || (sc->synhw.capPalmDetect && f->w <= sc->syninfo.max_width) || (!sc->synhw.capPalmDetect && f->p <= sc->syninfo.max_pressure) || (sc->synhw.capPen && f->flags & PSM_FINGER_IS_PEN))) { /* * We consider the packet irrelevant for the current * action when: * - the width isn't comprised in: * [1; max_width] * - the pressure isn't comprised in: * [min_pressure; max_pressure] * - pen aren't supported but PSM_FINGER_IS_PEN is set */ VLOG(2, (LOG_DEBUG, "synaptics: palm detected! (%d)\n", f->w)); return (1); } return (0); } static void psmgestures(struct psm_softc *sc, finger_t *fingers, int nfingers, mousestatus_t *ms) { smoother_t *smoother; gesture_t *gest; finger_t *f; int y_ok, center_button, center_x, right_button, right_x, i; f = &fingers[0]; smoother = &sc->smoother[0]; gest = &sc->gesture; /* Find first active finger. */ if (nfingers > 0) { for (i = 0; i < PSM_FINGERS; i++) { if (PSM_FINGER_IS_SET(fingers[i])) { f = &fingers[i]; smoother = &sc->smoother[i]; break; } } } /* * Check pressure to detect a real wanted action on the * touchpad. */ if (f->p >= sc->syninfo.min_pressure) { int x0, y0; int dxp, dyp; int start_x, start_y; int queue_len; int margin_top, margin_right, margin_bottom, margin_left; int window_min, window_max; int vscroll_hor_area, vscroll_ver_area; int two_finger_scroll; int max_x, max_y; /* Read sysctl. */ /* XXX Verify values? */ margin_top = sc->syninfo.margin_top; margin_right = sc->syninfo.margin_right; margin_bottom = sc->syninfo.margin_bottom; margin_left = sc->syninfo.margin_left; window_min = sc->syninfo.window_min; window_max = sc->syninfo.window_max; vscroll_hor_area = sc->syninfo.vscroll_hor_area; vscroll_ver_area = sc->syninfo.vscroll_ver_area; two_finger_scroll = sc->syninfo.two_finger_scroll; max_x = sc->syninfo.max_x; max_y = sc->syninfo.max_y; /* Read current absolute position. */ x0 = f->x; y0 = f->y; /* * Limit the coordinates to the specified margins because * this area isn't very reliable. */ if (x0 <= margin_left) x0 = margin_left; else if (x0 >= max_x - margin_right) x0 = max_x - margin_right; if (y0 <= margin_bottom) y0 = margin_bottom; else if (y0 >= max_y - margin_top) y0 = max_y - margin_top; VLOG(3, (LOG_DEBUG, "synaptics: ipacket: [%d, %d], %d, %d\n", x0, y0, f->p, f->w)); /* * If the action is just beginning, init the structure and * compute tap timeout. */ if (!(sc->flags & PSM_FLAGS_FINGERDOWN)) { VLOG(3, (LOG_DEBUG, "synaptics: ----\n")); /* Initialize queue. */ gest->window_min = window_min; /* Reset pressure peak. */ gest->zmax = 0; /* Reset fingers count. */ gest->fingers_nb = 0; /* Reset virtual scrolling state. */ gest->in_vscroll = 0; /* Compute tap timeout. */ gest->taptimeout.tv_sec = tap_timeout / 1000000; gest->taptimeout.tv_usec = tap_timeout % 1000000; timevaladd(&gest->taptimeout, &sc->lastsoftintr); sc->flags |= PSM_FLAGS_FINGERDOWN; /* Smoother has not been reset yet */ queue_len = 1; start_x = x0; start_y = y0; } else { queue_len = smoother->queue_len + 1; start_x = smoother->start_x; start_y = smoother->start_y; } /* Process ClickPad softbuttons */ if (sc->synhw.capClickPad && ms->button & MOUSE_BUTTON1DOWN) { y_ok = sc->syninfo.softbuttons_y >= 0 ? start_y < sc->syninfo.softbuttons_y : start_y > max_y + sc->syninfo.softbuttons_y; center_button = MOUSE_BUTTON2DOWN; center_x = sc->syninfo.softbutton2_x; right_button = MOUSE_BUTTON3DOWN; right_x = sc->syninfo.softbutton3_x; if (center_x > 0 && right_x > 0 && center_x > right_x) { center_button = MOUSE_BUTTON3DOWN; center_x = sc->syninfo.softbutton3_x; right_button = MOUSE_BUTTON2DOWN; right_x = sc->syninfo.softbutton2_x; } if (right_x > 0 && start_x > right_x && y_ok) ms->button = (ms->button & ~MOUSE_BUTTON1DOWN) | right_button; else if (center_x > 0 && start_x > center_x && y_ok) ms->button = (ms->button & ~MOUSE_BUTTON1DOWN) | center_button; } /* If in tap-hold, add the recorded button. */ if (gest->in_taphold) ms->button |= gest->tap_button; /* * For tap, we keep the maximum number of fingers and the * pressure peak. Also with multiple fingers, we increase * the minimum window. */ if (nfingers > 1) gest->window_min = window_max; gest->fingers_nb = imax(nfingers, gest->fingers_nb); gest->zmax = imax(f->p, gest->zmax); /* Do we have enough packets to consider this a gesture? */ if (queue_len < gest->window_min) return; dyp = -1; dxp = -1; /* Is a scrolling action occurring? */ if (!gest->in_taphold && !ms->button && (!gest->in_vscroll || two_finger_scroll)) { /* * A scrolling action must not conflict with a tap * action. Here are the conditions to consider a * scrolling action: * - the action in a configurable area * - one of the following: * . the distance between the last packet and the * first should be above a configurable minimum * . tap timed out */ dxp = abs(x0 - start_x); dyp = abs(y0 - start_y); if (timevalcmp(&sc->lastsoftintr, &gest->taptimeout, >) || dxp >= sc->syninfo.vscroll_min_delta || dyp >= sc->syninfo.vscroll_min_delta) { /* * Handle two finger scrolling. * Note that we don't rely on fingers_nb * as that keeps the maximum number of fingers. */ if (two_finger_scroll) { if (nfingers == 2) { gest->in_vscroll += dyp ? 2 : 0; gest->in_vscroll += dxp ? 1 : 0; } } else { /* Check for horizontal scrolling. */ if ((vscroll_hor_area > 0 && start_y <= vscroll_hor_area) || (vscroll_hor_area < 0 && start_y >= max_y + vscroll_hor_area)) gest->in_vscroll += 2; /* Check for vertical scrolling. */ if ((vscroll_ver_area > 0 && start_x <= vscroll_ver_area) || (vscroll_ver_area < 0 && start_x >= max_x + vscroll_ver_area)) gest->in_vscroll += 1; } /* Avoid conflicts if area overlaps. */ if (gest->in_vscroll >= 3) gest->in_vscroll = (dxp > dyp) ? 2 : 1; } } /* * Reset two finger scrolling when the number of fingers * is different from two or any button is pressed. */ if (two_finger_scroll && gest->in_vscroll != 0 && (nfingers != 2 || ms->button)) gest->in_vscroll = 0; VLOG(5, (LOG_DEBUG, "synaptics: virtual scrolling: %s " "(direction=%d, dxp=%d, dyp=%d, fingers=%d)\n", gest->in_vscroll ? "YES" : "NO", gest->in_vscroll, dxp, dyp, gest->fingers_nb)); } else if (sc->flags & PSM_FLAGS_FINGERDOWN) { /* * An action is currently taking place but the pressure * dropped under the minimum, putting an end to it. */ int taphold_timeout, dx, dy, tap_max_delta; dx = abs(smoother->queue[smoother->queue_cursor].x - smoother->start_x); dy = abs(smoother->queue[smoother->queue_cursor].y - smoother->start_y); /* Max delta is disabled for multi-fingers tap. */ if (gest->fingers_nb > 1) tap_max_delta = imax(dx, dy); else tap_max_delta = sc->syninfo.tap_max_delta; sc->flags &= ~PSM_FLAGS_FINGERDOWN; /* Check for tap. */ VLOG(3, (LOG_DEBUG, "synaptics: zmax=%d, dx=%d, dy=%d, " "delta=%d, fingers=%d, queue=%d\n", gest->zmax, dx, dy, tap_max_delta, gest->fingers_nb, smoother->queue_len)); if (!gest->in_vscroll && gest->zmax >= tap_threshold && timevalcmp(&sc->lastsoftintr, &gest->taptimeout, <=) && dx <= tap_max_delta && dy <= tap_max_delta && smoother->queue_len >= sc->syninfo.tap_min_queue) { /* * We have a tap if: * - the maximum pressure went over tap_threshold * - the action ended before tap_timeout * * To handle tap-hold, we must delay any button push to * the next action. */ if (gest->in_taphold) { /* * This is the second and last tap of a * double tap action, not a tap-hold. */ gest->in_taphold = 0; /* * For double-tap to work: * - no button press is emitted (to * simulate a button release) * - PSM_FLAGS_FINGERDOWN is set to * force the next packet to emit a * button press) */ VLOG(2, (LOG_DEBUG, "synaptics: button RELEASE: %d\n", gest->tap_button)); sc->flags |= PSM_FLAGS_FINGERDOWN; /* Schedule button press on next interrupt */ sc->idletimeout.tv_sec = psmhz > 1 ? 0 : 1; sc->idletimeout.tv_usec = psmhz > 1 ? 1000000 / psmhz : 0; } else { /* * This is the first tap: we set the * tap-hold state and notify the button * down event. */ gest->in_taphold = 1; taphold_timeout = sc->syninfo.taphold_timeout; gest->taptimeout.tv_sec = taphold_timeout / 1000000; gest->taptimeout.tv_usec = taphold_timeout % 1000000; sc->idletimeout = gest->taptimeout; timevaladd(&gest->taptimeout, &sc->lastsoftintr); switch (gest->fingers_nb) { case 3: gest->tap_button = MOUSE_BUTTON2DOWN; break; case 2: gest->tap_button = MOUSE_BUTTON3DOWN; break; default: gest->tap_button = MOUSE_BUTTON1DOWN; } VLOG(2, (LOG_DEBUG, "synaptics: button PRESS: %d\n", gest->tap_button)); ms->button |= gest->tap_button; } } else { /* * Not enough pressure or timeout: reset * tap-hold state. */ if (gest->in_taphold) { VLOG(2, (LOG_DEBUG, "synaptics: button RELEASE: %d\n", gest->tap_button)); gest->in_taphold = 0; } else { VLOG(2, (LOG_DEBUG, "synaptics: not a tap-hold\n")); } } } else if (!(sc->flags & PSM_FLAGS_FINGERDOWN) && gest->in_taphold) { /* * For a tap-hold to work, the button must remain down at * least until timeout (where the in_taphold flags will be * cleared) or during the next action. */ if (timevalcmp(&sc->lastsoftintr, &gest->taptimeout, <=)) { ms->button |= gest->tap_button; } else { VLOG(2, (LOG_DEBUG, "synaptics: button RELEASE: %d\n", gest->tap_button)); gest->in_taphold = 0; } } return; } static void psmsmoother(struct psm_softc *sc, finger_t *f, int smoother_id, mousestatus_t *ms, int *x, int *y) { smoother_t *smoother = &sc->smoother[smoother_id]; gesture_t *gest = &(sc->gesture); /* * Check pressure to detect a real wanted action on the * touchpad. */ if (f->p >= sc->syninfo.min_pressure) { int x0, y0; int cursor, peer, window; int dx, dy, dxp, dyp; int max_width, max_pressure; int margin_top, margin_right, margin_bottom, margin_left; int na_top, na_right, na_bottom, na_left; int window_min, window_max; int multiplicator; int weight_current, weight_previous, weight_len_squared; int div_min, div_max, div_len; int vscroll_hor_area, vscroll_ver_area; int two_finger_scroll; int max_x, max_y; int len, weight_prev_x, weight_prev_y; int div_max_x, div_max_y, div_x, div_y; int is_fuzzy; /* Read sysctl. */ /* XXX Verify values? */ max_width = sc->syninfo.max_width; max_pressure = sc->syninfo.max_pressure; margin_top = sc->syninfo.margin_top; margin_right = sc->syninfo.margin_right; margin_bottom = sc->syninfo.margin_bottom; margin_left = sc->syninfo.margin_left; na_top = sc->syninfo.na_top; na_right = sc->syninfo.na_right; na_bottom = sc->syninfo.na_bottom; na_left = sc->syninfo.na_left; window_min = sc->syninfo.window_min; window_max = sc->syninfo.window_max; multiplicator = sc->syninfo.multiplicator; weight_current = sc->syninfo.weight_current; weight_previous = sc->syninfo.weight_previous; weight_len_squared = sc->syninfo.weight_len_squared; div_min = sc->syninfo.div_min; div_max = sc->syninfo.div_max; div_len = sc->syninfo.div_len; vscroll_hor_area = sc->syninfo.vscroll_hor_area; vscroll_ver_area = sc->syninfo.vscroll_ver_area; two_finger_scroll = sc->syninfo.two_finger_scroll; max_x = sc->syninfo.max_x; max_y = sc->syninfo.max_y; is_fuzzy = (f->flags & PSM_FINGER_FUZZY) != 0; /* Read current absolute position. */ x0 = f->x; y0 = f->y; /* * Limit the coordinates to the specified margins because * this area isn't very reliable. */ if (x0 <= margin_left) x0 = margin_left; else if (x0 >= max_x - margin_right) x0 = max_x - margin_right; if (y0 <= margin_bottom) y0 = margin_bottom; else if (y0 >= max_y - margin_top) y0 = max_y - margin_top; /* If the action is just beginning, init the structure. */ if (smoother->active == 0) { VLOG(3, (LOG_DEBUG, "smoother%d: ---\n", smoother_id)); /* Store the first point of this action. */ smoother->start_x = x0; smoother->start_y = y0; dx = dy = 0; /* Initialize queue. */ smoother->queue_cursor = SYNAPTICS_PACKETQUEUE; smoother->queue_len = 0; /* Reset average. */ smoother->avg_dx = 0; smoother->avg_dy = 0; /* Reset squelch. */ smoother->squelch_x = 0; smoother->squelch_y = 0; /* Activate queue */ smoother->active = 1; } else { /* Calculate the current delta. */ cursor = smoother->queue_cursor; dx = x0 - smoother->queue[cursor].x; dy = y0 - smoother->queue[cursor].y; } VLOG(3, (LOG_DEBUG, "smoother%d: ipacket: [%d, %d], %d, %d\n", smoother_id, x0, y0, f->p, f->w)); /* Queue this new packet. */ cursor = SYNAPTICS_QUEUE_CURSOR(smoother->queue_cursor - 1); smoother->queue[cursor].x = x0; smoother->queue[cursor].y = y0; smoother->queue_cursor = cursor; if (smoother->queue_len < SYNAPTICS_PACKETQUEUE) smoother->queue_len++; VLOG(5, (LOG_DEBUG, "smoother%d: cursor[%d]: x=%d, y=%d, dx=%d, dy=%d\n", smoother_id, cursor, x0, y0, dx, dy)); /* Do we have enough packets to consider this a movement? */ if (smoother->queue_len < gest->window_min) return; weight_prev_x = weight_prev_y = weight_previous; div_max_x = div_max_y = div_max; if (gest->in_vscroll) { /* Dividers are different with virtual scrolling. */ div_min = sc->syninfo.vscroll_div_min; div_max_x = div_max_y = sc->syninfo.vscroll_div_max; } else { /* * There's a lot of noise in coordinates when * the finger is on the touchpad's borders. When * using this area, we apply a special weight and * div. */ if (x0 <= na_left || x0 >= max_x - na_right) { weight_prev_x = sc->syninfo.weight_previous_na; div_max_x = sc->syninfo.div_max_na; } if (y0 <= na_bottom || y0 >= max_y - na_top) { weight_prev_y = sc->syninfo.weight_previous_na; div_max_y = sc->syninfo.div_max_na; } } /* * Calculate weights for the average operands and * the divisor. Both depend on the distance between * the current packet and a previous one (based on the * window width). */ window = imin(smoother->queue_len, window_max); peer = SYNAPTICS_QUEUE_CURSOR(cursor + window - 1); dxp = abs(x0 - smoother->queue[peer].x) + 1; dyp = abs(y0 - smoother->queue[peer].y) + 1; len = (dxp * dxp) + (dyp * dyp); weight_prev_x = imin(weight_prev_x, weight_len_squared * weight_prev_x / len); weight_prev_y = imin(weight_prev_y, weight_len_squared * weight_prev_y / len); len = (dxp + dyp) / 2; div_x = div_len * div_max_x / len; div_x = imin(div_max_x, div_x); div_x = imax(div_min, div_x); div_y = div_len * div_max_y / len; div_y = imin(div_max_y, div_y); div_y = imax(div_min, div_y); VLOG(3, (LOG_DEBUG, "smoother%d: peer=%d, len=%d, weight=%d/%d, div=%d/%d\n", smoother_id, peer, len, weight_prev_x, weight_prev_y, div_x, div_y)); /* Compute averages. */ smoother->avg_dx = (weight_current * dx * multiplicator + weight_prev_x * smoother->avg_dx) / (weight_current + weight_prev_x); smoother->avg_dy = (weight_current * dy * multiplicator + weight_prev_y * smoother->avg_dy) / (weight_current + weight_prev_y); VLOG(5, (LOG_DEBUG, "smoother%d: avg_dx~=%d, avg_dy~=%d\n", smoother_id, smoother->avg_dx / multiplicator, smoother->avg_dy / multiplicator)); /* Use these averages to calculate x & y. */ smoother->squelch_x += smoother->avg_dx; dxp = smoother->squelch_x / (div_x * multiplicator); smoother->squelch_x = smoother->squelch_x % (div_x * multiplicator); smoother->squelch_y += smoother->avg_dy; dyp = smoother->squelch_y / (div_y * multiplicator); smoother->squelch_y = smoother->squelch_y % (div_y * multiplicator); switch(gest->in_vscroll) { case 0: /* Pointer movement. */ /* On real<->fuzzy finger switch the x/y pos jumps */ if (is_fuzzy == smoother->is_fuzzy) { *x += dxp; *y += dyp; } VLOG(3, (LOG_DEBUG, "smoother%d: [%d, %d] -> [%d, %d]\n", smoother_id, dx, dy, dxp, dyp)); break; case 1: /* Vertical scrolling. */ if (dyp != 0) ms->button |= (dyp > 0) ? MOUSE_BUTTON4DOWN : MOUSE_BUTTON5DOWN; break; case 2: /* Horizontal scrolling. */ if (dxp != 0) ms->button |= (dxp > 0) ? MOUSE_BUTTON7DOWN : MOUSE_BUTTON6DOWN; break; } smoother->is_fuzzy = is_fuzzy; } else { /* * Deactivate queue. Note: We can not just reset queue here * as these values are still used by gesture processor. * So postpone reset till next touch. */ smoother->active = 0; } } static int proc_elantech(struct psm_softc *sc, packetbuf_t *pb, mousestatus_t *ms, int *x, int *y, int *z) { static int touchpad_button, trackpoint_button; finger_t fn, f[ELANTECH_MAX_FINGERS]; int pkt, id, scale, i, nfingers, mask; if (!elantech_support) return (0); /* Determine packet format and do a sanity check for out of sync packets. */ if (ELANTECH_PKT_IS_DEBOUNCE(pb, sc->elanhw.hwversion)) pkt = ELANTECH_PKT_NOP; else if (sc->elanhw.hastrackpoint && ELANTECH_PKT_IS_TRACKPOINT(pb)) pkt = ELANTECH_PKT_TRACKPOINT; else switch (sc->elanhw.hwversion) { case 2: if (!ELANTECH_PKT_IS_V2(pb)) return (-1); pkt = (pb->ipacket[0] & 0xc0) == 0x80 ? ELANTECH_PKT_V2_2FINGER : ELANTECH_PKT_V2_COMMON; break; case 3: if (!ELANTECH_PKT_IS_V3_HEAD(pb, sc->elanhw.hascrc) && !ELANTECH_PKT_IS_V3_TAIL(pb, sc->elanhw.hascrc)) return (-1); pkt = ELANTECH_PKT_V3; break; case 4: if (!ELANTECH_PKT_IS_V4(pb, sc->elanhw.hascrc)) return (-1); switch (pb->ipacket[3] & 0x03) { case 0x00: pkt = ELANTECH_PKT_V4_STATUS; break; case 0x01: pkt = ELANTECH_PKT_V4_HEAD; break; case 0x02: pkt = ELANTECH_PKT_V4_MOTION; break; default: return (-1); } break; default: return (-1); } VLOG(5, (LOG_DEBUG, "elantech: ipacket format: %d\n", pkt)); for (id = 0; id < ELANTECH_MAX_FINGERS; id++) PSM_FINGER_RESET(f[id]); *x = *y = *z = 0; ms->button = ms->obutton; if (sc->syninfo.touchpad_off) return (0); /* Common legend * L: Left mouse button pressed * R: Right mouse button pressed * N: number of fingers on touchpad * X: absolute x value (horizontal) * Y: absolute y value (vertical) * W; width of the finger touch * P: pressure */ switch (pkt) { case ELANTECH_PKT_V2_COMMON: /* HW V2. One/Three finger touch */ /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: N1 N0 W3 W2 . . R L * ipacket[1]: P7 P6 P5 P4 X11 X10 X9 X8 * ipacket[2]: X7 X6 X5 X4 X3 X2 X1 X0 * ipacket[3]: N4 VF W1 W0 . . . B2 * ipacket[4]: P3 P1 P2 P0 Y11 Y10 Y9 Y8 * ipacket[5]: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * ------------------------------------------- * N4: set if more than 3 fingers (only in 3 fingers mode) * VF: a kind of flag? (only on EF123, 0 when finger * is over one of the buttons, 1 otherwise) * B2: (on EF113 only, 0 otherwise), one button pressed * P & W is not reported on EF113 touchpads */ nfingers = (pb->ipacket[0] & 0xc0) >> 6; if (nfingers == 3 && (pb->ipacket[3] & 0x80)) nfingers = 4; if (nfingers == 0) { mask = (1 << nfingers) - 1; /* = 0x00 */ break; } /* Map 3-rd and 4-th fingers to first finger */ mask = (1 << 1) - 1; /* = 0x01 */ f[0] = ELANTECH_FINGER_SET_XYP(pb); if (sc->elanhw.haspressure) { f[0].w = ((pb->ipacket[0] & 0x30) >> 2) | ((pb->ipacket[3] & 0x30) >> 4); } else { f[0].p = PSM_FINGER_DEFAULT_P; f[0].w = PSM_FINGER_DEFAULT_W; } /* * HW v2 dont report exact finger positions when 3 or more * fingers are on touchpad. */ if (nfingers > 2) f[0].flags = PSM_FINGER_FUZZY; break; case ELANTECH_PKT_V2_2FINGER: /*HW V2. Two finger touch */ /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: N1 N0 AY8 AX8 . . R L * ipacket[1]: AX7 AX6 AX5 AX4 AX3 AX2 AX1 AX0 * ipacket[2]: AY7 AY6 AY5 AY4 AY3 AY2 AY1 AY0 * ipacket[3]: . . BY8 BX8 . . . . * ipacket[4]: BX7 BX6 BX5 BX4 BX3 BX2 BX1 BX0 * ipacket[5]: BY7 BY6 BY5 BY4 BY3 BY2 BY1 BY0 * ------------------------------------------- * AX: lower-left finger absolute x value * AY: lower-left finger absolute y value * BX: upper-right finger absolute x value * BY: upper-right finger absolute y value */ nfingers = 2; mask = (1 << nfingers) - 1; for (id = 0; id < imin(2, ELANTECH_MAX_FINGERS); id ++) f[id] = (finger_t) { .x = (((pb->ipacket[id * 3] & 0x10) << 4) | pb->ipacket[id * 3 + 1]) << 2, .y = (((pb->ipacket[id * 3] & 0x20) << 3) | pb->ipacket[id * 3 + 2]) << 2, .p = PSM_FINGER_DEFAULT_P, .w = PSM_FINGER_DEFAULT_W, /* HW ver.2 sends bounding box */ .flags = PSM_FINGER_FUZZY }; break; case ELANTECH_PKT_V3: /* HW Version 3 */ /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: N1 N0 W3 W2 0 1 R L * ipacket[1]: P7 P6 P5 P4 X11 X10 X9 X8 * ipacket[2]: X7 X6 X5 X4 X3 X2 X1 X0 * ipacket[3]: 0 0 W1 W0 0 0 1 0 * ipacket[4]: P3 P1 P2 P0 Y11 Y10 Y9 Y8 * ipacket[5]: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * ------------------------------------------- */ nfingers = (pb->ipacket[0] & 0xc0) >> 6; /* Map 3-rd finger to first finger */ id = nfingers > 2 ? 0 : nfingers - 1; mask = (1 << (id + 1)) - 1; if (nfingers == 0) break; fn = ELANTECH_FINGER_SET_XYP(pb); fn.w = ((pb->ipacket[0] & 0x30) >> 2) | ((pb->ipacket[3] & 0x30) >> 4); /* * HW v3 dont report exact finger positions when 3 or more * fingers are on touchpad. */ if (nfingers > 1) fn.flags = PSM_FINGER_FUZZY; if (nfingers == 2) { if (ELANTECH_PKT_IS_V3_HEAD(pb, sc->elanhw.hascrc)) { sc->elanaction.fingers[0] = fn; return (0); } else f[0] = sc->elanaction.fingers[0]; } f[id] = fn; break; case ELANTECH_PKT_V4_STATUS: /* HW Version 4. Status packet */ /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: . . . . 0 1 R L * ipacket[1]: . . . F4 F3 F2 F1 F0 * ipacket[2]: . . . . . . . . * ipacket[3]: . . . 1 0 0 0 0 * ipacket[4]: PL . . . . . . . * ipacket[5]: . . . . . . . . * ------------------------------------------- * Fn: finger n is on touchpad * PL: palm * HV ver4 sends a status packet to indicate that the numbers * or identities of the fingers has been changed */ mask = pb->ipacket[1] & 0x1f; nfingers = bitcount(mask); if (sc->elanaction.mask_v4wait != 0) VLOG(3, (LOG_DEBUG, "elantech: HW v4 status packet" " when not all previous head packets received\n")); /* Bitmap of fingers to receive before gesture processing */ sc->elanaction.mask_v4wait = mask & ~sc->elanaction.mask; /* Skip "new finger is on touchpad" packets */ if (sc->elanaction.mask_v4wait) { sc->elanaction.mask = mask; return (0); } break; case ELANTECH_PKT_V4_HEAD: /* HW Version 4. Head packet */ /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: W3 W2 W1 W0 0 1 R L * ipacket[1]: P7 P6 P5 P4 X11 X10 X9 X8 * ipacket[2]: X7 X6 X5 X4 X3 X2 X1 X0 * ipacket[3]: ID2 ID1 ID0 1 0 0 0 1 * ipacket[4]: P3 P1 P2 P0 Y11 Y10 Y9 Y8 * ipacket[5]: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * ------------------------------------------- * ID: finger id * HW ver 4 sends head packets in two cases: * 1. One finger touch and movement. * 2. Next after status packet to tell new finger positions. */ mask = sc->elanaction.mask; nfingers = bitcount(mask); id = ((pb->ipacket[3] & 0xe0) >> 5) - 1; fn = ELANTECH_FINGER_SET_XYP(pb); fn.w =(pb->ipacket[0] & 0xf0) >> 4; if (id < 0) return (0); /* Packet is finger position update. Report it */ if (sc->elanaction.mask_v4wait == 0) { if (id < ELANTECH_MAX_FINGERS) f[id] = fn; break; } /* Remove finger from waiting bitmap and store into context */ sc->elanaction.mask_v4wait &= ~(1 << id); if (id < ELANTECH_MAX_FINGERS) sc->elanaction.fingers[id] = fn; /* Wait for other fingers if needed */ if (sc->elanaction.mask_v4wait != 0) return (0); /* All new fingers are received. Report them from context */ for (id = 0; id < ELANTECH_MAX_FINGERS; id++) if (sc->elanaction.mask & (1 << id)) f[id] = sc->elanaction.fingers[id]; break; case ELANTECH_PKT_V4_MOTION: /* HW Version 4. Motion packet */ /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: ID2 ID1 ID0 OF 0 1 R L * ipacket[1]: DX7 DX6 DX5 DX4 DX3 DX2 DX1 DX0 * ipacket[2]: DY7 DY6 DY5 DY4 DY3 DY2 DY1 DY0 * ipacket[3]: ID2 ID1 ID0 1 0 0 1 0 * ipacket[4]: DX7 DX6 DX5 DX4 DX3 DX2 DX1 DX0 * ipacket[5]: DY7 DY6 DY5 DY4 DY3 DY2 DY1 DY0 * ------------------------------------------- * OF: delta overflows (> 127 or < -128), in this case * firmware sends us (delta x / 5) and (delta y / 5) * ID: finger id * DX: delta x (two's complement) * XY: delta y (two's complement) * byte 0 ~ 2 for one finger * byte 3 ~ 5 for another finger */ mask = sc->elanaction.mask; nfingers = bitcount(mask); scale = (pb->ipacket[0] & 0x10) ? 5 : 1; for (i = 0; i <= 3; i += 3) { id = ((pb->ipacket[i] & 0xe0) >> 5) - 1; if (id < 0 || id >= ELANTECH_MAX_FINGERS) continue; if (PSM_FINGER_IS_SET(sc->elanaction.fingers[id])) { f[id] = sc->elanaction.fingers[id]; f[id].x += imax(-f[id].x, (signed char)pb->ipacket[i+1] * scale); f[id].y += imax(-f[id].y, (signed char)pb->ipacket[i+2] * scale); } else { VLOG(3, (LOG_DEBUG, "elantech: " "HW v4 motion packet skipped\n")); } } break; case ELANTECH_PKT_TRACKPOINT: /* 7 6 5 4 3 2 1 0 (LSB) * ------------------------------------------- * ipacket[0]: 0 0 SX SY 0 M R L * ipacket[1]: ~SX 0 0 0 0 0 0 0 * ipacket[2]: ~SY 0 0 0 0 0 0 0 * ipacket[3]: 0 0 ~SY ~SX 0 1 1 0 * ipacket[4]: X7 X6 X5 X4 X3 X2 X1 X0 * ipacket[5]: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * ------------------------------------------- * X and Y are written in two's complement spread * over 9 bits with SX/SY the relative top bit and * X7..X0 and Y7..Y0 the lower bits. */ *x = (pb->ipacket[0] & 0x20) ? pb->ipacket[4] - 256 : pb->ipacket[4]; *y = (pb->ipacket[0] & 0x10) ? pb->ipacket[5] - 256 : pb->ipacket[5]; trackpoint_button = ((pb->ipacket[0] & 0x01) ? MOUSE_BUTTON1DOWN : 0) | ((pb->ipacket[0] & 0x02) ? MOUSE_BUTTON3DOWN : 0) | ((pb->ipacket[0] & 0x04) ? MOUSE_BUTTON2DOWN : 0); #ifdef EVDEV_SUPPORT evdev_push_rel(sc->evdev_r, REL_X, *x); evdev_push_rel(sc->evdev_r, REL_Y, -*y); evdev_push_mouse_btn(sc->evdev_r, trackpoint_button); evdev_sync(sc->evdev_r); #endif ms->button = touchpad_button | trackpoint_button; return (0); case ELANTECH_PKT_NOP: return (0); default: return (-1); } for (id = 0; id < ELANTECH_MAX_FINGERS; id++) if (PSM_FINGER_IS_SET(f[id])) VLOG(2, (LOG_DEBUG, "elantech: " "finger %d: down [%d, %d], %d, %d, %d\n", id + 1, f[id].x, f[id].y, f[id].p, f[id].w, f[id].flags)); /* Touchpad button presses */ if (sc->elanhw.isclickpad) { touchpad_button = ((pb->ipacket[0] & 0x03) ? MOUSE_BUTTON1DOWN : 0); } else { touchpad_button = ((pb->ipacket[0] & 0x01) ? MOUSE_BUTTON1DOWN : 0) | ((pb->ipacket[0] & 0x02) ? MOUSE_BUTTON3DOWN : 0); } #ifdef EVDEV_SUPPORT if (evdev_rcpt_mask & EVDEV_RCPT_HW_MOUSE) { for (id = 0; id < ELANTECH_MAX_FINGERS; id++) { if (PSM_FINGER_IS_SET(f[id])) { psm_push_mt_finger(sc, id, &f[id]); /* Convert touch width to surface units */ evdev_push_abs(sc->evdev_a, ABS_MT_TOUCH_MAJOR, f[id].w * sc->elanhw.dptracex); } if (sc->elanaction.mask & (1 << id) && !(mask & (1 << id))) psm_release_mt_slot(sc->evdev_a, id); } evdev_push_key(sc->evdev_a, BTN_TOUCH, nfingers > 0); evdev_push_nfingers(sc->evdev_a, nfingers); if (nfingers > 0) { if (PSM_FINGER_IS_SET(f[0])) psm_push_st_finger(sc, &f[0]); } else evdev_push_abs(sc->evdev_a, ABS_PRESSURE, 0); evdev_push_mouse_btn(sc->evdev_a, touchpad_button); evdev_sync(sc->evdev_a); } #endif ms->button = touchpad_button | trackpoint_button; /* Send finger 1 position to gesture processor */ if (PSM_FINGER_IS_SET(f[0]) || PSM_FINGER_IS_SET(f[1]) || nfingers == 0) psmgestures(sc, &f[0], imin(nfingers, 3), ms); /* Send fingers positions to movement smoothers */ for (id = 0; id < PSM_FINGERS; id++) if (PSM_FINGER_IS_SET(f[id]) || !(mask & (1 << id))) psmsmoother(sc, &f[id], id, ms, x, y); /* Store current finger positions in action context */ for (id = 0; id < ELANTECH_MAX_FINGERS; id++) { if (PSM_FINGER_IS_SET(f[id])) sc->elanaction.fingers[id] = f[id]; if ((sc->elanaction.mask & (1 << id)) && !(mask & (1 << id))) PSM_FINGER_RESET(sc->elanaction.fingers[id]); } sc->elanaction.mask = mask; /* Palm detection doesn't terminate the current action. */ if (psmpalmdetect(sc, &f[0], nfingers)) { *x = *y = *z = 0; ms->button = ms->obutton; return (0); } /* Use the extra buttons as a scrollwheel */ if (ms->button & MOUSE_BUTTON4DOWN) *z = -1; else if (ms->button & MOUSE_BUTTON5DOWN) *z = 1; else if (ms->button & MOUSE_BUTTON6DOWN) *z = -2; else if (ms->button & MOUSE_BUTTON7DOWN) *z = 2; else *z = 0; ms->button &= ~(MOUSE_BUTTON4DOWN | MOUSE_BUTTON5DOWN | MOUSE_BUTTON6DOWN | MOUSE_BUTTON7DOWN); return (0); } static void proc_versapad(struct psm_softc *sc, packetbuf_t *pb, mousestatus_t *ms, int *x, int *y, int *z) { static int butmap_versapad[8] = { 0, MOUSE_BUTTON3DOWN, 0, MOUSE_BUTTON3DOWN, MOUSE_BUTTON1DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON3DOWN, MOUSE_BUTTON1DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON3DOWN }; int c, x0, y0; /* VersaPad PS/2 absolute mode message format * * [packet1] 7 6 5 4 3 2 1 0(LSB) * ipacket[0]: 1 1 0 A 1 L T R * ipacket[1]: H7 H6 H5 H4 H3 H2 H1 H0 * ipacket[2]: V7 V6 V5 V4 V3 V2 V1 V0 * ipacket[3]: 1 1 1 A 1 L T R * ipacket[4]:V11 V10 V9 V8 H11 H10 H9 H8 * ipacket[5]: 0 P6 P5 P4 P3 P2 P1 P0 * * [note] * R: right physical mouse button (1=on) * T: touch pad virtual button (1=tapping) * L: left physical mouse button (1=on) * A: position data is valid (1=valid) * H: horizontal data (12bit signed integer. H11 is sign bit.) * V: vertical data (12bit signed integer. V11 is sign bit.) * P: pressure data * * Tapping is mapped to MOUSE_BUTTON4. */ c = pb->ipacket[0]; *x = *y = 0; ms->button = butmap_versapad[c & MOUSE_PS2VERSA_BUTTONS]; ms->button |= (c & MOUSE_PS2VERSA_TAP) ? MOUSE_BUTTON4DOWN : 0; if (c & MOUSE_PS2VERSA_IN_USE) { x0 = pb->ipacket[1] | (((pb->ipacket[4]) & 0x0f) << 8); y0 = pb->ipacket[2] | (((pb->ipacket[4]) & 0xf0) << 4); if (x0 & 0x800) x0 -= 0x1000; if (y0 & 0x800) y0 -= 0x1000; if (sc->flags & PSM_FLAGS_FINGERDOWN) { *x = sc->xold - x0; *y = y0 - sc->yold; if (*x < 0) /* XXX */ ++*x; else if (*x) --*x; if (*y < 0) ++*y; else if (*y) --*y; } else sc->flags |= PSM_FLAGS_FINGERDOWN; sc->xold = x0; sc->yold = y0; } else sc->flags &= ~PSM_FLAGS_FINGERDOWN; } static void psmsoftintridle(void *arg) { struct psm_softc *sc = arg; packetbuf_t *pb; /* Invoke soft handler only when pqueue is empty. Otherwise it will be * invoked from psmintr soon with pqueue filled with real data */ if (sc->pqueue_start == sc->pqueue_end && sc->idlepacket.inputbytes > 0) { /* Grow circular queue backwards to avoid race with psmintr */ if (--sc->pqueue_start < 0) sc->pqueue_start = PSM_PACKETQUEUE - 1; pb = &sc->pqueue[sc->pqueue_start]; memcpy(pb, &sc->idlepacket, sizeof(packetbuf_t)); VLOG(4, (LOG_DEBUG, "psmsoftintridle: %02x %02x %02x %02x %02x %02x\n", pb->ipacket[0], pb->ipacket[1], pb->ipacket[2], pb->ipacket[3], pb->ipacket[4], pb->ipacket[5])); psmsoftintr(arg); } } static void psmsoftintr(void *arg) { /* * the table to turn PS/2 mouse button bits (MOUSE_PS2_BUTTON?DOWN) * into `mousestatus' button bits (MOUSE_BUTTON?DOWN). */ static int butmap[8] = { 0, MOUSE_BUTTON1DOWN, MOUSE_BUTTON3DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON3DOWN, MOUSE_BUTTON2DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON2DOWN, MOUSE_BUTTON2DOWN | MOUSE_BUTTON3DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON2DOWN | MOUSE_BUTTON3DOWN }; struct psm_softc *sc = arg; mousestatus_t ms; packetbuf_t *pb; int x, y, z, c, l, s; getmicrouptime(&sc->lastsoftintr); s = spltty(); do { pb = &sc->pqueue[sc->pqueue_start]; if (sc->mode.level == PSM_LEVEL_NATIVE) goto next_native; c = pb->ipacket[0]; /* * A kludge for Kensington device! * The MSB of the horizontal count appears to be stored in * a strange place. */ if (sc->hw.model == MOUSE_MODEL_THINK) pb->ipacket[1] |= (c & MOUSE_PS2_XOVERFLOW) ? 0x80 : 0; /* ignore the overflow bits... */ x = (c & MOUSE_PS2_XNEG) ? pb->ipacket[1] - 256 : pb->ipacket[1]; y = (c & MOUSE_PS2_YNEG) ? pb->ipacket[2] - 256 : pb->ipacket[2]; z = 0; ms.obutton = sc->button; /* previous button state */ ms.button = butmap[c & MOUSE_PS2_BUTTONS]; /* `tapping' action */ if (sc->config & PSM_CONFIG_FORCETAP) ms.button |= ((c & MOUSE_PS2_TAP)) ? 0 : MOUSE_BUTTON4DOWN; timevalclear(&sc->idletimeout); sc->idlepacket.inputbytes = 0; switch (sc->hw.model) { case MOUSE_MODEL_EXPLORER: /* * b7 b6 b5 b4 b3 b2 b1 b0 * byte 1: oy ox sy sx 1 M R L * byte 2: x x x x x x x x * byte 3: y y y y y y y y * byte 4: * * S2 S1 s d2 d1 d0 * * L, M, R, S1, S2: left, middle, right and side buttons * s: wheel data sign bit * d2-d0: wheel data */ z = (pb->ipacket[3] & MOUSE_EXPLORER_ZNEG) ? (pb->ipacket[3] & 0x0f) - 16 : (pb->ipacket[3] & 0x0f); ms.button |= (pb->ipacket[3] & MOUSE_EXPLORER_BUTTON4DOWN) ? MOUSE_BUTTON4DOWN : 0; ms.button |= (pb->ipacket[3] & MOUSE_EXPLORER_BUTTON5DOWN) ? MOUSE_BUTTON5DOWN : 0; break; case MOUSE_MODEL_INTELLI: case MOUSE_MODEL_NET: /* wheel data is in the fourth byte */ z = (char)pb->ipacket[3]; /* * XXX some mice may send 7 when there is no Z movement? */ if ((z >= 7) || (z <= -7)) z = 0; /* some compatible mice have additional buttons */ ms.button |= (c & MOUSE_PS2INTELLI_BUTTON4DOWN) ? MOUSE_BUTTON4DOWN : 0; ms.button |= (c & MOUSE_PS2INTELLI_BUTTON5DOWN) ? MOUSE_BUTTON5DOWN : 0; break; case MOUSE_MODEL_MOUSEMANPLUS: proc_mmanplus(sc, pb, &ms, &x, &y, &z); break; case MOUSE_MODEL_GLIDEPOINT: /* `tapping' action */ ms.button |= ((c & MOUSE_PS2_TAP)) ? 0 : MOUSE_BUTTON4DOWN; break; case MOUSE_MODEL_NETSCROLL: /* * three additional bytes encode buttons and * wheel events */ ms.button |= (pb->ipacket[3] & MOUSE_PS2_BUTTON3DOWN) ? MOUSE_BUTTON4DOWN : 0; ms.button |= (pb->ipacket[3] & MOUSE_PS2_BUTTON1DOWN) ? MOUSE_BUTTON5DOWN : 0; z = (pb->ipacket[3] & MOUSE_PS2_XNEG) ? pb->ipacket[4] - 256 : pb->ipacket[4]; break; case MOUSE_MODEL_THINK: /* the fourth button state in the first byte */ ms.button |= (c & MOUSE_PS2_TAP) ? MOUSE_BUTTON4DOWN : 0; break; case MOUSE_MODEL_VERSAPAD: proc_versapad(sc, pb, &ms, &x, &y, &z); c = ((x < 0) ? MOUSE_PS2_XNEG : 0) | ((y < 0) ? MOUSE_PS2_YNEG : 0); break; case MOUSE_MODEL_4D: /* * b7 b6 b5 b4 b3 b2 b1 b0 * byte 1: s2 d2 s1 d1 1 M R L * byte 2: sx x x x x x x x * byte 3: sy y y y y y y y * * s1: wheel 1 direction * d1: wheel 1 data * s2: wheel 2 direction * d2: wheel 2 data */ x = (pb->ipacket[1] & 0x80) ? pb->ipacket[1] - 256 : pb->ipacket[1]; y = (pb->ipacket[2] & 0x80) ? pb->ipacket[2] - 256 : pb->ipacket[2]; switch (c & MOUSE_4D_WHEELBITS) { case 0x10: z = 1; break; case 0x30: z = -1; break; case 0x40: /* XXX 2nd wheel turning right */ z = 2; break; case 0xc0: /* XXX 2nd wheel turning left */ z = -2; break; } break; case MOUSE_MODEL_4DPLUS: if ((x < 16 - 256) && (y < 16 - 256)) { /* * b7 b6 b5 b4 b3 b2 b1 b0 * byte 1: 0 0 1 1 1 M R L * byte 2: 0 0 0 0 1 0 0 0 * byte 3: 0 0 0 0 S s d1 d0 * * L, M, R, S: left, middle, right, * and side buttons * s: wheel data sign bit * d1-d0: wheel data */ x = y = 0; if (pb->ipacket[2] & MOUSE_4DPLUS_BUTTON4DOWN) ms.button |= MOUSE_BUTTON4DOWN; z = (pb->ipacket[2] & MOUSE_4DPLUS_ZNEG) ? ((pb->ipacket[2] & 0x07) - 8) : (pb->ipacket[2] & 0x07) ; } else { /* preserve previous button states */ ms.button |= ms.obutton & MOUSE_EXTBUTTONS; } break; case MOUSE_MODEL_SYNAPTICS: if (proc_synaptics(sc, pb, &ms, &x, &y, &z) != 0) { VLOG(3, (LOG_DEBUG, "synaptics: " "packet rejected\n")); goto next; } break; case MOUSE_MODEL_ELANTECH: if (proc_elantech(sc, pb, &ms, &x, &y, &z) != 0) { VLOG(3, (LOG_DEBUG, "elantech: " "packet rejected\n")); goto next; } break; case MOUSE_MODEL_TRACKPOINT: case MOUSE_MODEL_GENERIC: default: break; } #ifdef EVDEV_SUPPORT if (evdev_rcpt_mask & EVDEV_RCPT_HW_MOUSE && sc->hw.model != MOUSE_MODEL_ELANTECH && sc->hw.model != MOUSE_MODEL_SYNAPTICS) { evdev_push_rel(sc->evdev_r, REL_X, x); evdev_push_rel(sc->evdev_r, REL_Y, -y); switch (sc->hw.model) { case MOUSE_MODEL_EXPLORER: case MOUSE_MODEL_INTELLI: case MOUSE_MODEL_NET: case MOUSE_MODEL_NETSCROLL: case MOUSE_MODEL_4DPLUS: evdev_push_rel(sc->evdev_r, REL_WHEEL, -z); break; case MOUSE_MODEL_MOUSEMANPLUS: case MOUSE_MODEL_4D: switch (z) { case 1: case -1: evdev_push_rel(sc->evdev_r, REL_WHEEL, -z); break; case 2: case -2: evdev_push_rel(sc->evdev_r, REL_HWHEEL, z / 2); break; } break; } evdev_push_mouse_btn(sc->evdev_r, ms.button); evdev_sync(sc->evdev_r); } #endif /* scale values */ if (sc->mode.accelfactor >= 1) { if (x != 0) { x = x * x / sc->mode.accelfactor; if (x == 0) x = 1; if (c & MOUSE_PS2_XNEG) x = -x; } if (y != 0) { y = y * y / sc->mode.accelfactor; if (y == 0) y = 1; if (c & MOUSE_PS2_YNEG) y = -y; } } /* Store last packet for reinjection if it has not been set already */ if (timevalisset(&sc->idletimeout) && sc->idlepacket.inputbytes == 0) sc->idlepacket = *pb; ms.dx = x; ms.dy = y; ms.dz = z; ms.flags = ((x || y || z) ? MOUSE_POSCHANGED : 0) | (ms.obutton ^ ms.button); pb->inputbytes = tame_mouse(sc, pb, &ms, pb->ipacket); sc->status.flags |= ms.flags; sc->status.dx += ms.dx; sc->status.dy += ms.dy; sc->status.dz += ms.dz; sc->status.button = ms.button; sc->button = ms.button; next_native: sc->watchdog = FALSE; /* queue data */ if (sc->queue.count + pb->inputbytes < sizeof(sc->queue.buf)) { l = imin(pb->inputbytes, sizeof(sc->queue.buf) - sc->queue.tail); bcopy(&pb->ipacket[0], &sc->queue.buf[sc->queue.tail], l); if (pb->inputbytes > l) bcopy(&pb->ipacket[l], &sc->queue.buf[0], pb->inputbytes - l); sc->queue.tail = (sc->queue.tail + pb->inputbytes) % sizeof(sc->queue.buf); sc->queue.count += pb->inputbytes; } next: pb->inputbytes = 0; if (++sc->pqueue_start >= PSM_PACKETQUEUE) sc->pqueue_start = 0; } while (sc->pqueue_start != sc->pqueue_end); if (sc->state & PSM_ASLP) { sc->state &= ~PSM_ASLP; wakeup(sc); } selwakeuppri(&sc->rsel, PZERO); if (sc->async != NULL) { pgsigio(&sc->async, SIGIO, 0); } sc->state &= ~PSM_SOFTARMED; /* schedule injection of predefined packet after idletimeout * if no data packets have been received from psmintr */ if (timevalisset(&sc->idletimeout)) { sc->state |= PSM_SOFTARMED; callout_reset(&sc->softcallout, tvtohz(&sc->idletimeout), psmsoftintridle, sc); VLOG(2, (LOG_DEBUG, "softintr: callout set: %d ticks\n", tvtohz(&sc->idletimeout))); } splx(s); } static int psmpoll(struct cdev *dev, int events, struct thread *td) { struct psm_softc *sc = dev->si_drv1; int s; int revents = 0; /* Return true if a mouse event available */ s = spltty(); if (events & (POLLIN | POLLRDNORM)) { if (sc->queue.count > 0) revents |= events & (POLLIN | POLLRDNORM); else selrecord(td, &sc->rsel); } splx(s); return (revents); } /* vendor/model specific routines */ static int mouse_id_proc1(KBDC kbdc, int res, int scale, int *status) { if (set_mouse_resolution(kbdc, res) != res) return (FALSE); if (set_mouse_scaling(kbdc, scale) && set_mouse_scaling(kbdc, scale) && set_mouse_scaling(kbdc, scale) && (get_mouse_status(kbdc, status, 0, 3) >= 3)) return (TRUE); return (FALSE); } static int mouse_ext_command(KBDC kbdc, int command) { int c; c = (command >> 6) & 0x03; if (set_mouse_resolution(kbdc, c) != c) return (FALSE); c = (command >> 4) & 0x03; if (set_mouse_resolution(kbdc, c) != c) return (FALSE); c = (command >> 2) & 0x03; if (set_mouse_resolution(kbdc, c) != c) return (FALSE); c = (command >> 0) & 0x03; if (set_mouse_resolution(kbdc, c) != c) return (FALSE); return (TRUE); } #ifdef notyet /* Logitech MouseMan Cordless II */ static int enable_lcordless(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int status[3]; int ch; if (!mouse_id_proc1(kbdc, PSMD_RES_HIGH, 2, status)) return (FALSE); if (status[1] == PSMD_RES_HIGH) return (FALSE); ch = (status[0] & 0x07) - 1; /* channel # */ if ((ch <= 0) || (ch > 4)) return (FALSE); /* * status[1]: always one? * status[2]: battery status? (0-100) */ return (TRUE); } #endif /* notyet */ /* Genius NetScroll Mouse, MouseSystems SmartScroll Mouse */ static int enable_groller(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int status[3]; /* * The special sequence to enable the fourth button and the * roller. Immediately after this sequence check status bytes. * if the mouse is NetScroll, the second and the third bytes are * '3' and 'D'. */ /* * If the mouse is an ordinary PS/2 mouse, the status bytes should * look like the following. * * byte 1 bit 7 always 0 * bit 6 stream mode (0) * bit 5 disabled (0) * bit 4 1:1 scaling (0) * bit 3 always 0 * bit 0-2 button status * byte 2 resolution (PSMD_RES_HIGH) * byte 3 report rate (?) */ if (!mouse_id_proc1(kbdc, PSMD_RES_HIGH, 1, status)) return (FALSE); if ((status[1] != '3') || (status[2] != 'D')) return (FALSE); /* FIXME: SmartScroll Mouse has 5 buttons! XXX */ if (arg == PROBE) sc->hw.buttons = 4; return (TRUE); } /* Genius NetMouse/NetMouse Pro, ASCII Mie Mouse, NetScroll Optical */ static int enable_gmouse(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int status[3]; /* * The special sequence to enable the middle, "rubber" button. * Immediately after this sequence check status bytes. * if the mouse is NetMouse, NetMouse Pro, or ASCII MIE Mouse, * the second and the third bytes are '3' and 'U'. * NOTE: NetMouse reports that it has three buttons although it has * two buttons and a rubber button. NetMouse Pro and MIE Mouse * say they have three buttons too and they do have a button on the * side... */ if (!mouse_id_proc1(kbdc, PSMD_RES_HIGH, 1, status)) return (FALSE); if ((status[1] != '3') || (status[2] != 'U')) return (FALSE); return (TRUE); } /* ALPS GlidePoint */ static int enable_aglide(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int status[3]; /* * The special sequence to obtain ALPS GlidePoint specific * information. Immediately after this sequence, status bytes will * contain something interesting. * NOTE: ALPS produces several models of GlidePoint. Some of those * do not respond to this sequence, thus, cannot be detected this way. */ if (set_mouse_sampling_rate(kbdc, 100) != 100) return (FALSE); if (!mouse_id_proc1(kbdc, PSMD_RES_LOW, 2, status)) return (FALSE); if ((status[1] == PSMD_RES_LOW) || (status[2] == 100)) return (FALSE); return (TRUE); } /* Kensington ThinkingMouse/Trackball */ static int enable_kmouse(struct psm_softc *sc, enum probearg arg) { static u_char rate[] = { 20, 60, 40, 20, 20, 60, 40, 20, 20 }; KBDC kbdc = sc->kbdc; int status[3]; int id1; int id2; int i; id1 = get_aux_id(kbdc); if (set_mouse_sampling_rate(kbdc, 10) != 10) return (FALSE); /* * The device is now in the native mode? It returns a different * ID value... */ id2 = get_aux_id(kbdc); if ((id1 == id2) || (id2 != 2)) return (FALSE); if (set_mouse_resolution(kbdc, PSMD_RES_LOW) != PSMD_RES_LOW) return (FALSE); #if PSM_DEBUG >= 2 /* at this point, resolution is LOW, sampling rate is 10/sec */ if (get_mouse_status(kbdc, status, 0, 3) < 3) return (FALSE); #endif /* * The special sequence to enable the third and fourth buttons. * Otherwise they behave like the first and second buttons. */ for (i = 0; i < nitems(rate); ++i) if (set_mouse_sampling_rate(kbdc, rate[i]) != rate[i]) return (FALSE); /* * At this point, the device is using default resolution and * sampling rate for the native mode. */ if (get_mouse_status(kbdc, status, 0, 3) < 3) return (FALSE); if ((status[1] == PSMD_RES_LOW) || (status[2] == rate[i - 1])) return (FALSE); /* the device appears be enabled by this sequence, diable it for now */ disable_aux_dev(kbdc); empty_aux_buffer(kbdc, 5); return (TRUE); } /* Logitech MouseMan+/FirstMouse+, IBM ScrollPoint Mouse */ static int enable_mmanplus(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int data[3]; /* the special sequence to enable the fourth button and the roller. */ /* * NOTE: for ScrollPoint to respond correctly, the SET_RESOLUTION * must be called exactly three times since the last RESET command * before this sequence. XXX */ if (!set_mouse_scaling(kbdc, 1)) return (FALSE); if (!mouse_ext_command(kbdc, 0x39) || !mouse_ext_command(kbdc, 0xdb)) return (FALSE); if (get_mouse_status(kbdc, data, 1, 3) < 3) return (FALSE); /* * PS2++ protocol, packet type 0 * * b7 b6 b5 b4 b3 b2 b1 b0 * byte 1: * 1 p3 p2 1 * * * * byte 2: 1 1 p1 p0 m1 m0 1 0 * byte 3: m7 m6 m5 m4 m3 m2 m1 m0 * * p3-p0: packet type: 0 * m7-m0: model ID: MouseMan+:0x50, * FirstMouse+:0x51, * ScrollPoint:0x58... */ /* check constant bits */ if ((data[0] & MOUSE_PS2PLUS_SYNCMASK) != MOUSE_PS2PLUS_SYNC) return (FALSE); if ((data[1] & 0xc3) != 0xc2) return (FALSE); /* check d3-d0 in byte 2 */ if (!MOUSE_PS2PLUS_CHECKBITS(data)) return (FALSE); /* check p3-p0 */ if (MOUSE_PS2PLUS_PACKET_TYPE(data) != 0) return (FALSE); if (arg == PROBE) { sc->hw.hwid &= 0x00ff; sc->hw.hwid |= data[2] << 8; /* save model ID */ } /* * MouseMan+ (or FirstMouse+) is now in its native mode, in which * the wheel and the fourth button events are encoded in the * special data packet. The mouse may be put in the IntelliMouse mode * if it is initialized by the IntelliMouse's method. */ return (TRUE); } /* MS IntelliMouse Explorer */ static int enable_msexplorer(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; static u_char rate0[] = { 200, 100, 80, }; static u_char rate1[] = { 200, 200, 80, }; int id; int i; /* * This is needed for at least A4Tech X-7xx mice - they do not go * straight to Explorer mode, but need to be set to Intelli mode * first. */ enable_msintelli(sc, arg); /* the special sequence to enable the extra buttons and the roller. */ for (i = 0; i < nitems(rate1); ++i) if (set_mouse_sampling_rate(kbdc, rate1[i]) != rate1[i]) return (FALSE); /* the device will give the genuine ID only after the above sequence */ id = get_aux_id(kbdc); if (id != PSM_EXPLORER_ID) return (FALSE); if (arg == PROBE) { sc->hw.buttons = 5; /* IntelliMouse Explorer XXX */ sc->hw.hwid = id; } /* * XXX: this is a kludge to fool some KVM switch products * which think they are clever enough to know the 4-byte IntelliMouse * protocol, and assume any other protocols use 3-byte packets. * They don't convey 4-byte data packets from the IntelliMouse Explorer * correctly to the host computer because of this! * The following sequence is actually IntelliMouse's "wake up" * sequence; it will make the KVM think the mouse is IntelliMouse * when it is in fact IntelliMouse Explorer. */ for (i = 0; i < nitems(rate0); ++i) if (set_mouse_sampling_rate(kbdc, rate0[i]) != rate0[i]) break; get_aux_id(kbdc); return (TRUE); } /* * MS IntelliMouse * Logitech MouseMan+ and FirstMouse+ will also respond to this * probe routine and act like IntelliMouse. */ static int enable_msintelli(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; static u_char rate[] = { 200, 100, 80, }; int id; int i; /* the special sequence to enable the third button and the roller. */ for (i = 0; i < nitems(rate); ++i) if (set_mouse_sampling_rate(kbdc, rate[i]) != rate[i]) return (FALSE); /* the device will give the genuine ID only after the above sequence */ id = get_aux_id(kbdc); if (id != PSM_INTELLI_ID) return (FALSE); if (arg == PROBE) { sc->hw.buttons = 3; sc->hw.hwid = id; } return (TRUE); } /* * A4 Tech 4D Mouse * Newer wheel mice from A4 Tech may use the 4D+ protocol. */ static int enable_4dmouse(struct psm_softc *sc, enum probearg arg) { static u_char rate[] = { 200, 100, 80, 60, 40, 20 }; KBDC kbdc = sc->kbdc; int id; int i; for (i = 0; i < nitems(rate); ++i) if (set_mouse_sampling_rate(kbdc, rate[i]) != rate[i]) return (FALSE); id = get_aux_id(kbdc); /* * WinEasy 4D, 4 Way Scroll 4D: 6 * Cable-Free 4D: 8 (4DPLUS) * WinBest 4D+, 4 Way Scroll 4D+: 8 (4DPLUS) */ if (id != PSM_4DMOUSE_ID) return (FALSE); if (arg == PROBE) { sc->hw.buttons = 3; /* XXX some 4D mice have 4? */ sc->hw.hwid = id; } return (TRUE); } /* * A4 Tech 4D+ Mouse * Newer wheel mice from A4 Tech seem to use this protocol. * Older models are recognized as either 4D Mouse or IntelliMouse. */ static int enable_4dplus(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int id; /* * enable_4dmouse() already issued the following ID sequence... static u_char rate[] = { 200, 100, 80, 60, 40, 20 }; int i; for (i = 0; i < sizeof(rate)/sizeof(rate[0]); ++i) if (set_mouse_sampling_rate(kbdc, rate[i]) != rate[i]) return (FALSE); */ id = get_aux_id(kbdc); switch (id) { case PSM_4DPLUS_ID: break; case PSM_4DPLUS_RFSW35_ID: break; default: return (FALSE); } if (arg == PROBE) { sc->hw.buttons = (id == PSM_4DPLUS_ID) ? 4 : 3; sc->hw.hwid = id; } return (TRUE); } /* Synaptics Touchpad */ static int synaptics_sysctl(SYSCTL_HANDLER_ARGS) { struct psm_softc *sc; int error, arg; if (oidp->oid_arg1 == NULL || oidp->oid_arg2 < 0 || oidp->oid_arg2 > SYNAPTICS_SYSCTL_SOFTBUTTON3_X) return (EINVAL); sc = oidp->oid_arg1; /* Read the current value. */ arg = *(int *)((char *)sc + oidp->oid_arg2); error = sysctl_handle_int(oidp, &arg, 0, req); /* Sanity check. */ if (error || !req->newptr) return (error); /* * Check that the new value is in the concerned node's range * of values. */ switch (oidp->oid_arg2) { case SYNAPTICS_SYSCTL_MIN_PRESSURE: case SYNAPTICS_SYSCTL_MAX_PRESSURE: if (arg < 0 || arg > 255) return (EINVAL); break; case SYNAPTICS_SYSCTL_MAX_WIDTH: if (arg < 4 || arg > 15) return (EINVAL); break; case SYNAPTICS_SYSCTL_MARGIN_TOP: case SYNAPTICS_SYSCTL_MARGIN_BOTTOM: case SYNAPTICS_SYSCTL_NA_TOP: case SYNAPTICS_SYSCTL_NA_BOTTOM: if (arg < 0 || arg > sc->synhw.maximumYCoord) return (EINVAL); break; case SYNAPTICS_SYSCTL_SOFTBUTTON2_X: case SYNAPTICS_SYSCTL_SOFTBUTTON3_X: /* Softbuttons is clickpad only feature */ if (!sc->synhw.capClickPad && arg != 0) return (EINVAL); /* FALLTHROUGH */ case SYNAPTICS_SYSCTL_MARGIN_RIGHT: case SYNAPTICS_SYSCTL_MARGIN_LEFT: case SYNAPTICS_SYSCTL_NA_RIGHT: case SYNAPTICS_SYSCTL_NA_LEFT: if (arg < 0 || arg > sc->synhw.maximumXCoord) return (EINVAL); break; case SYNAPTICS_SYSCTL_WINDOW_MIN: case SYNAPTICS_SYSCTL_WINDOW_MAX: case SYNAPTICS_SYSCTL_TAP_MIN_QUEUE: if (arg < 1 || arg > SYNAPTICS_PACKETQUEUE) return (EINVAL); break; case SYNAPTICS_SYSCTL_MULTIPLICATOR: case SYNAPTICS_SYSCTL_WEIGHT_CURRENT: case SYNAPTICS_SYSCTL_WEIGHT_PREVIOUS: case SYNAPTICS_SYSCTL_WEIGHT_PREVIOUS_NA: case SYNAPTICS_SYSCTL_WEIGHT_LEN_SQUARED: case SYNAPTICS_SYSCTL_DIV_MIN: case SYNAPTICS_SYSCTL_DIV_MAX: case SYNAPTICS_SYSCTL_DIV_MAX_NA: case SYNAPTICS_SYSCTL_DIV_LEN: case SYNAPTICS_SYSCTL_VSCROLL_DIV_MIN: case SYNAPTICS_SYSCTL_VSCROLL_DIV_MAX: if (arg < 1) return (EINVAL); break; case SYNAPTICS_SYSCTL_TAP_MAX_DELTA: case SYNAPTICS_SYSCTL_TAPHOLD_TIMEOUT: case SYNAPTICS_SYSCTL_VSCROLL_MIN_DELTA: if (arg < 0) return (EINVAL); break; case SYNAPTICS_SYSCTL_VSCROLL_HOR_AREA: if (arg < -sc->synhw.maximumXCoord || arg > sc->synhw.maximumXCoord) return (EINVAL); break; case SYNAPTICS_SYSCTL_SOFTBUTTONS_Y: /* Softbuttons is clickpad only feature */ if (!sc->synhw.capClickPad && arg != 0) return (EINVAL); /* FALLTHROUGH */ case SYNAPTICS_SYSCTL_VSCROLL_VER_AREA: if (arg < -sc->synhw.maximumYCoord || arg > sc->synhw.maximumYCoord) return (EINVAL); break; case SYNAPTICS_SYSCTL_TOUCHPAD_OFF: if (arg < 0 || arg > 1) return (EINVAL); break; default: return (EINVAL); } /* Update. */ *(int *)((char *)sc + oidp->oid_arg2) = arg; return (error); } static void synaptics_sysctl_create_softbuttons_tree(struct psm_softc *sc) { /* * Set predefined sizes for softbuttons. * Values are taken to match HP Pavilion dv6 clickpad drawings * with thin middle softbutton placed on separator */ /* hw.psm.synaptics.softbuttons_y */ sc->syninfo.softbuttons_y = 1700; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "softbuttons_y", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_SOFTBUTTONS_Y, synaptics_sysctl, "I", "Vertical size of softbuttons area"); /* hw.psm.synaptics.softbutton2_x */ sc->syninfo.softbutton2_x = 3100; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "softbutton2_x", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_SOFTBUTTON2_X, synaptics_sysctl, "I", "Horisontal position of 2-nd softbutton left edge (0-disable)"); /* hw.psm.synaptics.softbutton3_x */ sc->syninfo.softbutton3_x = 3900; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "softbutton3_x", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_SOFTBUTTON3_X, synaptics_sysctl, "I", "Horisontal position of 3-rd softbutton left edge (0-disable)"); } static void synaptics_sysctl_create_tree(struct psm_softc *sc, const char *name, const char *descr) { if (sc->syninfo.sysctl_tree != NULL) return; /* Attach extra synaptics sysctl nodes under hw.psm.synaptics */ sysctl_ctx_init(&sc->syninfo.sysctl_ctx); sc->syninfo.sysctl_tree = SYSCTL_ADD_NODE(&sc->syninfo.sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_psm), OID_AUTO, name, CTLFLAG_RD, 0, descr); /* hw.psm.synaptics.directional_scrolls. */ sc->syninfo.directional_scrolls = 0; SYSCTL_ADD_INT(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "directional_scrolls", CTLFLAG_RW|CTLFLAG_ANYBODY, &sc->syninfo.directional_scrolls, 0, "Enable hardware scrolling pad (if non-zero) or register it as " "extended buttons (if 0)"); /* hw.psm.synaptics.max_x. */ sc->syninfo.max_x = 6143; SYSCTL_ADD_INT(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "max_x", CTLFLAG_RD|CTLFLAG_ANYBODY, &sc->syninfo.max_x, 0, "Horizontal reporting range"); /* hw.psm.synaptics.max_y. */ sc->syninfo.max_y = 6143; SYSCTL_ADD_INT(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "max_y", CTLFLAG_RD|CTLFLAG_ANYBODY, &sc->syninfo.max_y, 0, "Vertical reporting range"); /* * Turn off two finger scroll if we have a * physical area reserved for scrolling or when * there's no multi finger support. */ if (sc->synhw.verticalScroll || (sc->synhw.capMultiFinger == 0 && sc->synhw.capAdvancedGestures == 0)) sc->syninfo.two_finger_scroll = 0; else sc->syninfo.two_finger_scroll = 1; /* hw.psm.synaptics.two_finger_scroll. */ SYSCTL_ADD_INT(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "two_finger_scroll", CTLFLAG_RW|CTLFLAG_ANYBODY, &sc->syninfo.two_finger_scroll, 0, "Enable two finger scrolling"); /* hw.psm.synaptics.min_pressure. */ sc->syninfo.min_pressure = 32; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "min_pressure", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MIN_PRESSURE, synaptics_sysctl, "I", "Minimum pressure required to start an action"); /* hw.psm.synaptics.max_pressure. */ sc->syninfo.max_pressure = 220; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "max_pressure", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MAX_PRESSURE, synaptics_sysctl, "I", "Maximum pressure to detect palm"); /* hw.psm.synaptics.max_width. */ sc->syninfo.max_width = 10; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "max_width", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MAX_WIDTH, synaptics_sysctl, "I", "Maximum finger width to detect palm"); /* hw.psm.synaptics.top_margin. */ sc->syninfo.margin_top = 200; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "margin_top", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MARGIN_TOP, synaptics_sysctl, "I", "Top margin"); /* hw.psm.synaptics.right_margin. */ sc->syninfo.margin_right = 200; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "margin_right", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MARGIN_RIGHT, synaptics_sysctl, "I", "Right margin"); /* hw.psm.synaptics.bottom_margin. */ sc->syninfo.margin_bottom = 200; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "margin_bottom", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MARGIN_BOTTOM, synaptics_sysctl, "I", "Bottom margin"); /* hw.psm.synaptics.left_margin. */ sc->syninfo.margin_left = 200; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "margin_left", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MARGIN_LEFT, synaptics_sysctl, "I", "Left margin"); /* hw.psm.synaptics.na_top. */ sc->syninfo.na_top = 1783; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "na_top", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_NA_TOP, synaptics_sysctl, "I", "Top noisy area, where weight_previous_na is used instead " "of weight_previous"); /* hw.psm.synaptics.na_right. */ sc->syninfo.na_right = 563; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "na_right", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_NA_RIGHT, synaptics_sysctl, "I", "Right noisy area, where weight_previous_na is used instead " "of weight_previous"); /* hw.psm.synaptics.na_bottom. */ sc->syninfo.na_bottom = 1408; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "na_bottom", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_NA_BOTTOM, synaptics_sysctl, "I", "Bottom noisy area, where weight_previous_na is used instead " "of weight_previous"); /* hw.psm.synaptics.na_left. */ sc->syninfo.na_left = 1600; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "na_left", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_NA_LEFT, synaptics_sysctl, "I", "Left noisy area, where weight_previous_na is used instead " "of weight_previous"); /* hw.psm.synaptics.window_min. */ sc->syninfo.window_min = 4; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "window_min", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_WINDOW_MIN, synaptics_sysctl, "I", "Minimum window size to start an action"); /* hw.psm.synaptics.window_max. */ sc->syninfo.window_max = 10; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "window_max", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_WINDOW_MAX, synaptics_sysctl, "I", "Maximum window size"); /* hw.psm.synaptics.multiplicator. */ sc->syninfo.multiplicator = 10000; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "multiplicator", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_MULTIPLICATOR, synaptics_sysctl, "I", "Multiplicator to increase precision in averages and divisions"); /* hw.psm.synaptics.weight_current. */ sc->syninfo.weight_current = 3; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "weight_current", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_WEIGHT_CURRENT, synaptics_sysctl, "I", "Weight of the current movement in the new average"); /* hw.psm.synaptics.weight_previous. */ sc->syninfo.weight_previous = 6; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "weight_previous", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_WEIGHT_PREVIOUS, synaptics_sysctl, "I", "Weight of the previous average"); /* hw.psm.synaptics.weight_previous_na. */ sc->syninfo.weight_previous_na = 20; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "weight_previous_na", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_WEIGHT_PREVIOUS_NA, synaptics_sysctl, "I", "Weight of the previous average (inside the noisy area)"); /* hw.psm.synaptics.weight_len_squared. */ sc->syninfo.weight_len_squared = 2000; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "weight_len_squared", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_WEIGHT_LEN_SQUARED, synaptics_sysctl, "I", "Length (squared) of segments where weight_previous " "starts to decrease"); /* hw.psm.synaptics.div_min. */ sc->syninfo.div_min = 9; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "div_min", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_DIV_MIN, synaptics_sysctl, "I", "Divisor for fast movements"); /* hw.psm.synaptics.div_max. */ sc->syninfo.div_max = 17; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "div_max", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_DIV_MAX, synaptics_sysctl, "I", "Divisor for slow movements"); /* hw.psm.synaptics.div_max_na. */ sc->syninfo.div_max_na = 30; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "div_max_na", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_DIV_MAX_NA, synaptics_sysctl, "I", "Divisor with slow movements (inside the noisy area)"); /* hw.psm.synaptics.div_len. */ sc->syninfo.div_len = 100; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "div_len", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_DIV_LEN, synaptics_sysctl, "I", "Length of segments where div_max starts to decrease"); /* hw.psm.synaptics.tap_max_delta. */ sc->syninfo.tap_max_delta = 80; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "tap_max_delta", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_TAP_MAX_DELTA, synaptics_sysctl, "I", "Length of segments above which a tap is ignored"); /* hw.psm.synaptics.tap_min_queue. */ sc->syninfo.tap_min_queue = 2; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "tap_min_queue", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_TAP_MIN_QUEUE, synaptics_sysctl, "I", "Number of packets required to consider a tap"); /* hw.psm.synaptics.taphold_timeout. */ sc->gesture.in_taphold = 0; sc->syninfo.taphold_timeout = tap_timeout; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "taphold_timeout", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_TAPHOLD_TIMEOUT, synaptics_sysctl, "I", "Maximum elapsed time between two taps to consider a tap-hold " "action"); /* hw.psm.synaptics.vscroll_hor_area. */ sc->syninfo.vscroll_hor_area = 0; /* 1300 */ SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "vscroll_hor_area", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_VSCROLL_HOR_AREA, synaptics_sysctl, "I", "Area reserved for horizontal virtual scrolling"); /* hw.psm.synaptics.vscroll_ver_area. */ sc->syninfo.vscroll_ver_area = -400 - sc->syninfo.margin_right; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "vscroll_ver_area", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_VSCROLL_VER_AREA, synaptics_sysctl, "I", "Area reserved for vertical virtual scrolling"); /* hw.psm.synaptics.vscroll_min_delta. */ sc->syninfo.vscroll_min_delta = 50; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "vscroll_min_delta", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_VSCROLL_MIN_DELTA, synaptics_sysctl, "I", "Minimum movement to consider virtual scrolling"); /* hw.psm.synaptics.vscroll_div_min. */ sc->syninfo.vscroll_div_min = 100; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "vscroll_div_min", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_VSCROLL_DIV_MIN, synaptics_sysctl, "I", "Divisor for fast scrolling"); /* hw.psm.synaptics.vscroll_div_min. */ sc->syninfo.vscroll_div_max = 150; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "vscroll_div_max", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_VSCROLL_DIV_MAX, synaptics_sysctl, "I", "Divisor for slow scrolling"); /* hw.psm.synaptics.touchpad_off. */ sc->syninfo.touchpad_off = 0; SYSCTL_ADD_PROC(&sc->syninfo.sysctl_ctx, SYSCTL_CHILDREN(sc->syninfo.sysctl_tree), OID_AUTO, "touchpad_off", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, SYNAPTICS_SYSCTL_TOUCHPAD_OFF, synaptics_sysctl, "I", "Turn off touchpad"); sc->syninfo.softbuttons_y = 0; sc->syninfo.softbutton2_x = 0; sc->syninfo.softbutton3_x = 0; /* skip softbuttons sysctl on not clickpads */ if (sc->synhw.capClickPad) synaptics_sysctl_create_softbuttons_tree(sc); } static int synaptics_preferred_mode(struct psm_softc *sc) { int mode_byte; /* Check if we are in relative mode */ if (sc->hw.model != MOUSE_MODEL_SYNAPTICS) { if (tap_enabled == 0) /* * Disable tap & drag gestures. We use a Mode Byte * and set the DisGest bit (see §2.5 of Synaptics * TouchPad Interfacing Guide). */ return (0x04); else /* * Enable tap & drag gestures. We use a Mode Byte * and clear the DisGest bit (see §2.5 of Synaptics * TouchPad Interfacing Guide). */ return (0x00); } mode_byte = 0xc4; /* request wmode where available */ if (sc->synhw.capExtended) mode_byte |= 1; return mode_byte; } static void synaptics_set_mode(struct psm_softc *sc, int mode_byte) { mouse_ext_command(sc->kbdc, mode_byte); /* "Commit" the Set Mode Byte command sent above. */ set_mouse_sampling_rate(sc->kbdc, 20); /* * Enable advanced gestures mode if supported and we are not entering * passthrough or relative mode. */ if ((sc->synhw.capAdvancedGestures || sc->synhw.capReportsV) && sc->hw.model == MOUSE_MODEL_SYNAPTICS && !(mode_byte & (1 << 5))) { mouse_ext_command(sc->kbdc, 3); set_mouse_sampling_rate(sc->kbdc, 0xc8); } } static int enable_synaptics(struct psm_softc *sc, enum probearg arg) { device_t psmcpnp; struct psmcpnp_softc *psmcpnp_sc; KBDC kbdc = sc->kbdc; synapticshw_t synhw; int status[3]; int buttons, middle_byte; VLOG(3, (LOG_DEBUG, "synaptics: BEGIN init\n")); /* * Just to be on the safe side: this avoids troubles with * following mouse_ext_command() when the previous command * was PSMC_SET_RESOLUTION. Set Scaling has no effect on * Synaptics Touchpad behaviour. */ set_mouse_scaling(kbdc, 1); /* Identify the Touchpad version. */ if (mouse_ext_command(kbdc, 0) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); middle_byte = status[1]; if (middle_byte != 0x46 && middle_byte != 0x47) return (FALSE); bzero(&synhw, sizeof(synhw)); synhw.infoMinor = status[0]; synhw.infoMajor = status[2] & 0x0f; if (verbose >= 2) printf("Synaptics Touchpad v%d.%d\n", synhw.infoMajor, synhw.infoMinor); /* * Most synaptics touchpads return 0x47 in middle byte in responce to * identify command as stated in p.4.4 of "Synaptics PS/2 TouchPad * Interfacing Guide" and we only support v4.0 or better. But some * devices return 0x46 here and have a different numbering scheme. * In the case of 0x46, we allow versions as low as v2.0 */ if ((middle_byte == 0x47 && synhw.infoMajor < 4) || (middle_byte == 0x46 && synhw.infoMajor < 2)) { printf(" Unsupported (pre-v4) Touchpad detected\n"); return (FALSE); } /* Get the Touchpad model information. */ if (mouse_ext_command(kbdc, 3) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); if ((status[1] & 0x01) != 0) { printf(" Failed to read model information\n"); return (FALSE); } synhw.infoRot180 = (status[0] & 0x80) != 0; synhw.infoPortrait = (status[0] & 0x40) != 0; synhw.infoSensor = status[0] & 0x3f; synhw.infoHardware = (status[1] & 0xfe) >> 1; synhw.infoNewAbs = (status[2] & 0x80) != 0; synhw.capPen = (status[2] & 0x40) != 0; synhw.infoSimplC = (status[2] & 0x20) != 0; synhw.infoGeometry = status[2] & 0x0f; if (verbose >= 2) { printf(" Model information:\n"); printf(" infoRot180: %d\n", synhw.infoRot180); printf(" infoPortrait: %d\n", synhw.infoPortrait); printf(" infoSensor: %d\n", synhw.infoSensor); printf(" infoHardware: %d\n", synhw.infoHardware); printf(" infoNewAbs: %d\n", synhw.infoNewAbs); printf(" capPen: %d\n", synhw.capPen); printf(" infoSimplC: %d\n", synhw.infoSimplC); printf(" infoGeometry: %d\n", synhw.infoGeometry); } /* Read the extended capability bits. */ if (mouse_ext_command(kbdc, 2) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); if (!SYNAPTICS_VERSION_GE(synhw, 7, 5) && status[1] != middle_byte) { printf(" Failed to read extended capability bits\n"); return (FALSE); } psmcpnp = devclass_get_device(devclass_find(PSMCPNP_DRIVER_NAME), sc->unit); psmcpnp_sc = (psmcpnp != NULL) ? device_get_softc(psmcpnp) : NULL; /* * Set conservative defaults for 0x46 middle byte touchpads * as ExtendedQueries return bogus data. */ if (middle_byte == 0x46) { synhw.capExtended = 1; synhw.capPalmDetect = 1; synhw.capPassthrough = 1; synhw.capMultiFinger = 1; synhw.maximumXCoord = SYNAPTICS_DEFAULT_MAX_X; synhw.maximumYCoord = SYNAPTICS_DEFAULT_MAX_Y; synhw.minimumXCoord = SYNAPTICS_DEFAULT_MIN_X; synhw.minimumYCoord = SYNAPTICS_DEFAULT_MIN_Y; /* Enable multitouch mode for HW v8.1 devices */ if (psmcpnp_sc != NULL && psmcpnp_sc->type == PSMCPNP_HPSYN81) synhw.capReportsV = 1; } else synhw.capExtended = (status[0] & 0x80) != 0; /* Set the different capabilities when they exist. */ buttons = 0; if (synhw.capExtended && middle_byte == 0x47) { synhw.nExtendedQueries = (status[0] & 0x70) >> 4; synhw.capMiddle = (status[0] & 0x04) != 0; synhw.capPassthrough = (status[2] & 0x80) != 0; synhw.capLowPower = (status[2] & 0x40) != 0; synhw.capMultiFingerReport = (status[2] & 0x20) != 0; synhw.capSleep = (status[2] & 0x10) != 0; synhw.capFourButtons = (status[2] & 0x08) != 0; synhw.capBallistics = (status[2] & 0x04) != 0; synhw.capMultiFinger = (status[2] & 0x02) != 0; synhw.capPalmDetect = (status[2] & 0x01) != 0; if (!set_mouse_scaling(kbdc, 1)) return (FALSE); if (mouse_ext_command(kbdc, 0x08) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); if (status[0] != 0 && (status[1] & 0x80) && status[2] != 0) { synhw.infoXupmm = status[0]; synhw.infoYupmm = status[2]; } if (verbose >= 2) { printf(" Extended capabilities:\n"); printf(" capExtended: %d\n", synhw.capExtended); printf(" capMiddle: %d\n", synhw.capMiddle); printf(" nExtendedQueries: %d\n", synhw.nExtendedQueries); printf(" capPassthrough: %d\n", synhw.capPassthrough); printf(" capLowPower: %d\n", synhw.capLowPower); printf(" capMultiFingerReport: %d\n", synhw.capMultiFingerReport); printf(" capSleep: %d\n", synhw.capSleep); printf(" capFourButtons: %d\n", synhw.capFourButtons); printf(" capBallistics: %d\n", synhw.capBallistics); printf(" capMultiFinger: %d\n", synhw.capMultiFinger); printf(" capPalmDetect: %d\n", synhw.capPalmDetect); printf(" infoXupmm: %d\n", synhw.infoXupmm); printf(" infoYupmm: %d\n", synhw.infoYupmm); } /* * If nExtendedQueries is 1 or greater, then the TouchPad * supports this number of extended queries. We can load * more information about buttons using query 0x09. */ if (synhw.nExtendedQueries >= 1) { if (!set_mouse_scaling(kbdc, 1)) return (FALSE); if (mouse_ext_command(kbdc, 0x09) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); synhw.verticalScroll = (status[0] & 0x01) != 0; synhw.horizontalScroll = (status[0] & 0x02) != 0; synhw.verticalWheel = (status[0] & 0x08) != 0; synhw.nExtendedButtons = (status[1] & 0xf0) >> 4; synhw.capEWmode = (status[0] & 0x04) != 0; if (verbose >= 2) { printf(" Extended model ID:\n"); printf(" verticalScroll: %d\n", synhw.verticalScroll); printf(" horizontalScroll: %d\n", synhw.horizontalScroll); printf(" verticalWheel: %d\n", synhw.verticalWheel); printf(" nExtendedButtons: %d\n", synhw.nExtendedButtons); printf(" capEWmode: %d\n", synhw.capEWmode); } /* * Add the number of extended buttons to the total * button support count, including the middle button * if capMiddle support bit is set. */ buttons = synhw.nExtendedButtons + synhw.capMiddle; } else /* * If the capFourButtons support bit is set, * add a fourth button to the total button count. */ buttons = synhw.capFourButtons ? 1 : 0; /* Read the continued capabilities bits. */ if (synhw.nExtendedQueries >= 4) { if (!set_mouse_scaling(kbdc, 1)) return (FALSE); if (mouse_ext_command(kbdc, 0x0c) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); synhw.capClickPad = (status[1] & 0x01) << 1; synhw.capClickPad |= (status[0] & 0x10) != 0; synhw.capDeluxeLEDs = (status[1] & 0x02) != 0; synhw.noAbsoluteFilter = (status[1] & 0x04) != 0; synhw.capReportsV = (status[1] & 0x08) != 0; synhw.capUniformClickPad = (status[1] & 0x10) != 0; synhw.capReportsMin = (status[1] & 0x20) != 0; synhw.capInterTouch = (status[1] & 0x40) != 0; synhw.capReportsMax = (status[0] & 0x02) != 0; synhw.capClearPad = (status[0] & 0x04) != 0; synhw.capAdvancedGestures = (status[0] & 0x08) != 0; synhw.capCoveredPad = (status[0] & 0x80) != 0; if (synhw.capReportsMax) { if (!set_mouse_scaling(kbdc, 1)) return (FALSE); if (mouse_ext_command(kbdc, 0x0d) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); synhw.maximumXCoord = (status[0] << 5) | ((status[1] & 0x0f) << 1); synhw.maximumYCoord = (status[2] << 5) | ((status[1] & 0xf0) >> 3); } else { synhw.maximumXCoord = SYNAPTICS_DEFAULT_MAX_X; synhw.maximumYCoord = SYNAPTICS_DEFAULT_MAX_Y; } if (synhw.capReportsMin) { if (!set_mouse_scaling(kbdc, 1)) return (FALSE); if (mouse_ext_command(kbdc, 0x0f) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); synhw.minimumXCoord = (status[0] << 5) | ((status[1] & 0x0f) << 1); synhw.minimumYCoord = (status[2] << 5) | ((status[1] & 0xf0) >> 3); } else { synhw.minimumXCoord = SYNAPTICS_DEFAULT_MIN_X; synhw.minimumYCoord = SYNAPTICS_DEFAULT_MIN_Y; } /* * ClickPad properties are not exported through PS/2 * protocol. Detection is based on controller's PnP ID. */ if (synhw.capClickPad && psmcpnp_sc != NULL) { switch (psmcpnp_sc->type) { case PSMCPNP_FORCEPAD: synhw.forcePad = 1; break; default: break; } } if (verbose >= 2) { printf(" Continued capabilities:\n"); printf(" capClickPad: %d\n", synhw.capClickPad); printf(" capDeluxeLEDs: %d\n", synhw.capDeluxeLEDs); printf(" noAbsoluteFilter: %d\n", synhw.noAbsoluteFilter); printf(" capReportsV: %d\n", synhw.capReportsV); printf(" capUniformClickPad: %d\n", synhw.capUniformClickPad); printf(" capReportsMin: %d\n", synhw.capReportsMin); printf(" capInterTouch: %d\n", synhw.capInterTouch); printf(" capReportsMax: %d\n", synhw.capReportsMax); printf(" capClearPad: %d\n", synhw.capClearPad); printf(" capAdvancedGestures: %d\n", synhw.capAdvancedGestures); printf(" capCoveredPad: %d\n", synhw.capCoveredPad); if (synhw.capReportsMax) { printf(" maximumXCoord: %d\n", synhw.maximumXCoord); printf(" maximumYCoord: %d\n", synhw.maximumYCoord); } if (synhw.capReportsMin) { printf(" minimumXCoord: %d\n", synhw.minimumXCoord); printf(" minimumYCoord: %d\n", synhw.minimumYCoord); } if (synhw.capClickPad) { printf(" forcePad: %d\n", synhw.forcePad); } } buttons += synhw.capClickPad; } } if (verbose >= 2) { if (synhw.capExtended) printf(" Additional Buttons: %d\n", buttons); else printf(" No extended capabilities\n"); } /* * Add the default number of 3 buttons to the total * count of supported buttons reported above. */ buttons += 3; /* * Read the mode byte. * * XXX: Note the Synaptics documentation also defines the first * byte of the response to this query to be a constant 0x3b, this * does not appear to be true for Touchpads with guest devices. */ if (mouse_ext_command(kbdc, 1) == 0) return (FALSE); if (get_mouse_status(kbdc, status, 0, 3) != 3) return (FALSE); if (!SYNAPTICS_VERSION_GE(synhw, 7, 5) && status[1] != middle_byte) { printf(" Failed to read mode byte\n"); return (FALSE); } if (arg == PROBE) sc->synhw = synhw; if (!synaptics_support) return (FALSE); /* Set mouse type just now for synaptics_set_mode() */ sc->hw.model = MOUSE_MODEL_SYNAPTICS; synaptics_set_mode(sc, synaptics_preferred_mode(sc)); if (trackpoint_support && synhw.capPassthrough) { enable_trackpoint(sc, arg); } VLOG(3, (LOG_DEBUG, "synaptics: END init (%d buttons)\n", buttons)); if (arg == PROBE) { /* Create sysctl tree. */ synaptics_sysctl_create_tree(sc, "synaptics", "Synaptics TouchPad"); sc->hw.buttons = buttons; } return (TRUE); } static void synaptics_passthrough_on(struct psm_softc *sc) { VLOG(2, (LOG_NOTICE, "psm: setting pass-through mode.\n")); synaptics_set_mode(sc, synaptics_preferred_mode(sc) | (1 << 5)); } static void synaptics_passthrough_off(struct psm_softc *sc) { VLOG(2, (LOG_NOTICE, "psm: turning pass-through mode off.\n")); set_mouse_scaling(sc->kbdc, 2); set_mouse_scaling(sc->kbdc, 1); synaptics_set_mode(sc, synaptics_preferred_mode(sc)); } /* IBM/Lenovo TrackPoint */ static int trackpoint_command(struct psm_softc *sc, int cmd, int loc, int val) { const int seq[] = { 0xe2, cmd, loc, val }; int i; if (sc->synhw.capPassthrough) synaptics_passthrough_on(sc); for (i = 0; i < nitems(seq); i++) { if (sc->synhw.capPassthrough && (seq[i] == 0xff || seq[i] == 0xe7)) if (send_aux_command(sc->kbdc, 0xe7) != PSM_ACK) { synaptics_passthrough_off(sc); return (EIO); } if (send_aux_command(sc->kbdc, seq[i]) != PSM_ACK) { if (sc->synhw.capPassthrough) synaptics_passthrough_off(sc); return (EIO); } } if (sc->synhw.capPassthrough) synaptics_passthrough_off(sc); return (0); } #define PSM_TPINFO(x) offsetof(struct psm_softc, tpinfo.x) #define TPMASK 0 #define TPLOC 1 #define TPINFO 2 static int trackpoint_sysctl(SYSCTL_HANDLER_ARGS) { static const int data[][3] = { { 0x00, 0x4a, PSM_TPINFO(sensitivity) }, { 0x00, 0x4d, PSM_TPINFO(inertia) }, { 0x00, 0x60, PSM_TPINFO(uplateau) }, { 0x00, 0x57, PSM_TPINFO(reach) }, { 0x00, 0x58, PSM_TPINFO(draghys) }, { 0x00, 0x59, PSM_TPINFO(mindrag) }, { 0x00, 0x5a, PSM_TPINFO(upthresh) }, { 0x00, 0x5c, PSM_TPINFO(threshold) }, { 0x00, 0x5d, PSM_TPINFO(jenks) }, { 0x00, 0x5e, PSM_TPINFO(ztime) }, { 0x01, 0x2c, PSM_TPINFO(pts) }, { 0x08, 0x2d, PSM_TPINFO(skipback) } }; struct psm_softc *sc; int error, newval, *oldvalp; const int *tp; if (arg1 == NULL || arg2 < 0 || arg2 >= nitems(data)) return (EINVAL); sc = arg1; tp = data[arg2]; oldvalp = (int *)((intptr_t)sc + tp[TPINFO]); newval = *oldvalp; error = sysctl_handle_int(oidp, &newval, 0, req); if (error != 0) return (error); if (newval == *oldvalp) return (0); if (newval < 0 || newval > (tp[TPMASK] == 0 ? 255 : 1)) return (EINVAL); error = trackpoint_command(sc, tp[TPMASK] == 0 ? 0x81 : 0x47, tp[TPLOC], tp[TPMASK] == 0 ? newval : tp[TPMASK]); if (error != 0) return (error); *oldvalp = newval; return (0); } static void trackpoint_sysctl_create_tree(struct psm_softc *sc) { if (sc->tpinfo.sysctl_tree != NULL) return; /* Attach extra trackpoint sysctl nodes under hw.psm.trackpoint */ sysctl_ctx_init(&sc->tpinfo.sysctl_ctx); sc->tpinfo.sysctl_tree = SYSCTL_ADD_NODE(&sc->tpinfo.sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_psm), OID_AUTO, "trackpoint", CTLFLAG_RD, 0, "IBM/Lenovo TrackPoint"); /* hw.psm.trackpoint.sensitivity */ sc->tpinfo.sensitivity = 0x80; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "sensitivity", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_SENSITIVITY, trackpoint_sysctl, "I", "Sensitivity"); /* hw.psm.trackpoint.negative_inertia */ sc->tpinfo.inertia = 0x06; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "negative_inertia", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_NEGATIVE_INERTIA, trackpoint_sysctl, "I", "Negative inertia factor"); /* hw.psm.trackpoint.upper_plateau */ sc->tpinfo.uplateau = 0x61; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "upper_plateau", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_UPPER_PLATEAU, trackpoint_sysctl, "I", "Transfer function upper plateau speed"); /* hw.psm.trackpoint.backup_range */ sc->tpinfo.reach = 0x0a; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "backup_range", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_BACKUP_RANGE, trackpoint_sysctl, "I", "Backup range"); /* hw.psm.trackpoint.drag_hysteresis */ sc->tpinfo.draghys = 0xff; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "drag_hysteresis", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_DRAG_HYSTERESIS, trackpoint_sysctl, "I", "Drag hysteresis"); /* hw.psm.trackpoint.minimum_drag */ sc->tpinfo.mindrag = 0x14; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "minimum_drag", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_MINIMUM_DRAG, trackpoint_sysctl, "I", "Minimum drag"); /* hw.psm.trackpoint.up_threshold */ sc->tpinfo.upthresh = 0xff; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "up_threshold", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_UP_THRESHOLD, trackpoint_sysctl, "I", "Up threshold for release"); /* hw.psm.trackpoint.threshold */ sc->tpinfo.threshold = 0x08; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "threshold", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_THRESHOLD, trackpoint_sysctl, "I", "Threshold"); /* hw.psm.trackpoint.jenks_curvature */ sc->tpinfo.jenks = 0x87; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "jenks_curvature", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_JENKS_CURVATURE, trackpoint_sysctl, "I", "Jenks curvature"); /* hw.psm.trackpoint.z_time */ sc->tpinfo.ztime = 0x26; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "z_time", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_Z_TIME, trackpoint_sysctl, "I", "Z time constant"); /* hw.psm.trackpoint.press_to_select */ sc->tpinfo.pts = 0x00; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "press_to_select", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_PRESS_TO_SELECT, trackpoint_sysctl, "I", "Press to Select"); /* hw.psm.trackpoint.skip_backups */ sc->tpinfo.skipback = 0x00; SYSCTL_ADD_PROC(&sc->tpinfo.sysctl_ctx, SYSCTL_CHILDREN(sc->tpinfo.sysctl_tree), OID_AUTO, "skip_backups", CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, sc, TRACKPOINT_SYSCTL_SKIP_BACKUPS, trackpoint_sysctl, "I", "Skip backups from drags"); } static void set_trackpoint_parameters(struct psm_softc *sc) { trackpoint_command(sc, 0x81, 0x4a, sc->tpinfo.sensitivity); trackpoint_command(sc, 0x81, 0x60, sc->tpinfo.uplateau); trackpoint_command(sc, 0x81, 0x4d, sc->tpinfo.inertia); trackpoint_command(sc, 0x81, 0x57, sc->tpinfo.reach); trackpoint_command(sc, 0x81, 0x58, sc->tpinfo.draghys); trackpoint_command(sc, 0x81, 0x59, sc->tpinfo.mindrag); trackpoint_command(sc, 0x81, 0x5a, sc->tpinfo.upthresh); trackpoint_command(sc, 0x81, 0x5c, sc->tpinfo.threshold); trackpoint_command(sc, 0x81, 0x5d, sc->tpinfo.jenks); trackpoint_command(sc, 0x81, 0x5e, sc->tpinfo.ztime); if (sc->tpinfo.pts == 0x01) trackpoint_command(sc, 0x47, 0x2c, 0x01); if (sc->tpinfo.skipback == 0x01) trackpoint_command(sc, 0x47, 0x2d, 0x08); } static int enable_trackpoint(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int id; /* * If called from enable_synaptics(), make sure that passthrough * mode is enabled so we can reach the trackpoint. * However, passthrough mode must be disabled before setting the * trackpoint parameters, as rackpoint_command() enables and disables * passthrough mode on its own. */ if (sc->synhw.capPassthrough) synaptics_passthrough_on(sc); if (send_aux_command(kbdc, 0xe1) != PSM_ACK || read_aux_data(kbdc) != 0x01) goto no_trackpoint; id = read_aux_data(kbdc); if (id < 0x01) goto no_trackpoint; if (arg == PROBE) sc->tphw = id; if (!trackpoint_support) goto no_trackpoint; if (sc->synhw.capPassthrough) synaptics_passthrough_off(sc); if (arg == PROBE) { trackpoint_sysctl_create_tree(sc); /* * Don't overwrite hwid and buttons when we are * a guest device. */ if (!sc->synhw.capPassthrough) { sc->hw.hwid = id; sc->hw.buttons = 3; } } set_trackpoint_parameters(sc); return (TRUE); no_trackpoint: if (sc->synhw.capPassthrough) synaptics_passthrough_off(sc); return (FALSE); } /* Interlink electronics VersaPad */ static int enable_versapad(struct psm_softc *sc, enum probearg arg) { KBDC kbdc = sc->kbdc; int data[3]; set_mouse_resolution(kbdc, PSMD_RES_MEDIUM_HIGH); /* set res. 2 */ set_mouse_sampling_rate(kbdc, 100); /* set rate 100 */ set_mouse_scaling(kbdc, 1); /* set scale 1:1 */ set_mouse_scaling(kbdc, 1); /* set scale 1:1 */ set_mouse_scaling(kbdc, 1); /* set scale 1:1 */ set_mouse_scaling(kbdc, 1); /* set scale 1:1 */ if (get_mouse_status(kbdc, data, 0, 3) < 3) /* get status */ return (FALSE); if (data[2] != 0xa || data[1] != 0 ) /* rate == 0xa && res. == 0 */ return (FALSE); set_mouse_scaling(kbdc, 1); /* set scale 1:1 */ return (TRUE); /* PS/2 absolute mode */ } /* Elantech Touchpad */ static int elantech_read_1(KBDC kbdc, int hwversion, int reg, int *val) { int res, readcmd, retidx; int resp[3]; readcmd = hwversion == 2 ? ELANTECH_REG_READ : ELANTECH_REG_RDWR; retidx = hwversion == 4 ? 1 : 0; res = send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, readcmd) != PSM_ACK; res |= send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, reg) != PSM_ACK; res |= get_mouse_status(kbdc, resp, 0, 3) != 3; if (res == 0) *val = resp[retidx]; return (res); } static int elantech_write_1(KBDC kbdc, int hwversion, int reg, int val) { int res, writecmd; writecmd = hwversion == 2 ? ELANTECH_REG_WRITE : ELANTECH_REG_RDWR; res = send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, writecmd) != PSM_ACK; res |= send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, reg) != PSM_ACK; if (hwversion == 4) { res |= send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, writecmd) != PSM_ACK; } res |= send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, val) != PSM_ACK; res |= set_mouse_scaling(kbdc, 1) == 0; return (res); } static int elantech_cmd(KBDC kbdc, int hwversion, int cmd, int *resp) { int res; if (hwversion == 2) { res = set_mouse_scaling(kbdc, 1) == 0; res |= mouse_ext_command(kbdc, cmd) == 0; } else { res = send_aux_command(kbdc, ELANTECH_CUSTOM_CMD) != PSM_ACK; res |= send_aux_command(kbdc, cmd) != PSM_ACK; } res |= get_mouse_status(kbdc, resp, 0, 3) != 3; return (res); } static int elantech_init(KBDC kbdc, elantechhw_t *elanhw) { int i, val, res, hwversion, reg10; /* set absolute mode */ hwversion = elanhw->hwversion; reg10 = -1; switch (hwversion) { case 2: reg10 = elanhw->fwversion == 0x020030 ? 0x54 : 0xc4; res = elantech_write_1(kbdc, hwversion, 0x10, reg10); if (res) break; res = elantech_write_1(kbdc, hwversion, 0x11, 0x8A); break; case 3: reg10 = 0x0b; res = elantech_write_1(kbdc, hwversion, 0x10, reg10); break; case 4: res = elantech_write_1(kbdc, hwversion, 0x07, 0x01); break; default: res = 1; } /* Read back reg 0x10 to ensure hardware is ready. */ if (res == 0 && reg10 >= 0) { for (i = 0; i < 5; i++) { if (elantech_read_1(kbdc, hwversion, 0x10, &val) == 0) break; DELAY(2000); } if (i == 5) res = 1; } if (res) printf("couldn't set absolute mode\n"); return (res); } static void elantech_init_synaptics(struct psm_softc *sc) { /* Set capabilites required by movement smother */ sc->synhw.infoMajor = sc->elanhw.hwversion; sc->synhw.infoMinor = sc->elanhw.fwversion; sc->synhw.infoXupmm = sc->elanhw.dpmmx; sc->synhw.infoYupmm = sc->elanhw.dpmmy; sc->synhw.verticalScroll = 0; sc->synhw.nExtendedQueries = 4; sc->synhw.capExtended = 1; sc->synhw.capPassthrough = sc->elanhw.hastrackpoint; sc->synhw.capClickPad = sc->elanhw.isclickpad; sc->synhw.capMultiFinger = 1; if (sc->elanhw.issemimt) sc->synhw.capAdvancedGestures = 1; else sc->synhw.capReportsV = 1; sc->synhw.capPalmDetect = 1; sc->synhw.capPen = 0; sc->synhw.capReportsMax = 1; sc->synhw.maximumXCoord = sc->elanhw.sizex; sc->synhw.maximumYCoord = sc->elanhw.sizey; sc->synhw.capReportsMin = 1; sc->synhw.minimumXCoord = 0; sc->synhw.minimumYCoord = 0; if (sc->syninfo.sysctl_tree == NULL) { synaptics_sysctl_create_tree(sc, "elantech", "Elantech Touchpad"); /* * Adjust synaptic smoother tunables * 1. Disable finger detection pressure threshold. Unlike * synaptics we assume the finger is acting when packet with * its X&Y arrives not when pressure exceedes some threshold * 2. Disable unrelated features like margins and noisy areas * 3. Disable virtual scroll areas as 2nd finger is preferable * 4. For clickpads set bottom quarter as 42% - 16% - 42% sized * softbuttons * 5. Scale down divisors and movement lengths by a factor of 3 * where 3 is Synaptics to Elantech (~2200/800) dpi ratio */ /* Set reporting range to be equal touchpad size */ sc->syninfo.max_x = sc->elanhw.sizex; sc->syninfo.max_y = sc->elanhw.sizey; /* Disable finger detection pressure threshold */ sc->syninfo.min_pressure = 1; /* Adjust palm width to nearly match synaptics w=10 */ sc->syninfo.max_width = 7; /* Elans often report double & triple taps as single event */ sc->syninfo.tap_min_queue = 1; /* Use full area of touchpad */ sc->syninfo.margin_top = 0; sc->syninfo.margin_right = 0; sc->syninfo.margin_bottom = 0; sc->syninfo.margin_left = 0; /* Disable noisy area */ sc->syninfo.na_top = 0; sc->syninfo.na_right = 0; sc->syninfo.na_bottom = 0; sc->syninfo.na_left = 0; /* Tune divisors and movement lengths */ sc->syninfo.weight_len_squared = 200; sc->syninfo.div_min = 3; sc->syninfo.div_max = 6; sc->syninfo.div_max_na = 10; sc->syninfo.div_len = 30; sc->syninfo.tap_max_delta = 25; /* Disable virtual scrolling areas and tune its divisors */ sc->syninfo.vscroll_hor_area = 0; sc->syninfo.vscroll_ver_area = 0; sc->syninfo.vscroll_min_delta = 15; sc->syninfo.vscroll_div_min = 30; sc->syninfo.vscroll_div_max = 50; /* Set bottom quarter as 42% - 16% - 42% sized softbuttons */ if (sc->elanhw.isclickpad) { sc->syninfo.softbuttons_y = sc->elanhw.sizey / 4; sc->syninfo.softbutton2_x = sc->elanhw.sizex * 11 / 25; sc->syninfo.softbutton3_x = sc->elanhw.sizex * 14 / 25; } } return; } static int enable_elantech(struct psm_softc *sc, enum probearg arg) { static const int ic2hw[] = /*IC: 0 1 2 3 4 5 6 7 8 9 a b c d e f */ { 0, 0, 2, 0, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0 }; static const int fw_sizes[][3] = { /* FW.vers MaxX MaxY */ { 0x020030, 1152, 768 }, { 0x020800, 1152, 768 }, { 0x020b00, 1152, 768 }, { 0x040215, 900, 500 }, { 0x040216, 819, 405 }, { 0x040219, 900, 500 }, }; elantechhw_t elanhw; int icversion, hwversion, xtr, i, id, resp[3], dpix, dpiy; KBDC kbdc = sc->kbdc; VLOG(3, (LOG_DEBUG, "elantech: BEGIN init\n")); set_mouse_scaling(kbdc, 1); set_mouse_scaling(kbdc, 1); set_mouse_scaling(kbdc, 1); if (get_mouse_status(kbdc, resp, 0, 3) != 3) return (FALSE); if (!ELANTECH_MAGIC(resp)) return (FALSE); /* Identify the Touchpad version. */ if (elantech_cmd(kbdc, 2, ELANTECH_FW_VERSION, resp)) return (FALSE); bzero(&elanhw, sizeof(elanhw)); elanhw.fwversion = (resp[0] << 16) | (resp[1] << 8) | resp[2]; icversion = resp[0] & 0x0f; hwversion = ic2hw[icversion]; if (verbose >= 2) printf("Elantech touchpad hardware v.%d firmware v.0x%06x\n", hwversion, elanhw.fwversion); if (ELANTECH_HW_IS_V1(elanhw.fwversion)) { printf (" Unsupported touchpad hardware (v1)\n"); return (FALSE); } if (hwversion == 0) { printf (" Unknown touchpad hardware (firmware v.0x%06x)\n", elanhw.fwversion); return (FALSE); } /* Get the Touchpad model information. */ elanhw.hwversion = hwversion; elanhw.issemimt = hwversion == 2; elanhw.isclickpad = (resp[1] & 0x10) != 0; elanhw.hascrc = (resp[1] & 0x40) != 0; elanhw.haspressure = elanhw.fwversion >= 0x020800; /* Read the capability bits. */ if (elantech_cmd(kbdc, hwversion, ELANTECH_CAPABILITIES, resp) != 0) { printf(" Failed to read capability bits\n"); return (FALSE); } elanhw.ntracesx = imax(resp[1], 3); elanhw.ntracesy = imax(resp[2], 3); elanhw.hastrackpoint = (resp[0] & 0x80) != 0; /* Get the touchpad resolution */ switch (hwversion) { case 4: if (elantech_cmd(kbdc, hwversion, ELANTECH_RESOLUTION, resp) == 0) { dpix = (resp[1] & 0x0f) * 10 + 790; dpiy = ((resp[1] & 0xf0) >> 4) * 10 + 790; elanhw.dpmmx = (dpix * 10 + 5) / 254; elanhw.dpmmy = (dpiy * 10 + 5) / 254; break; } /* FALLTHROUGH */ case 2: case 3: elanhw.dpmmx = elanhw.dpmmy = 32; /* 800 dpi */ break; } if (!elantech_support) return (FALSE); if (elantech_init(kbdc, &elanhw)) { printf("couldn't initialize elantech touchpad\n"); return (FALSE); } /* * Get the touchpad reporting range. * On HW v.3 touchpads it should be done after switching hardware * to real resolution mode (by setting bit 3 of reg10) */ elanhw.dptracex = elanhw.dptracey = 64; for (i = 0; i < nitems(fw_sizes); i++) { if (elanhw.fwversion == fw_sizes[i][0]) { elanhw.sizex = fw_sizes[i][1]; elanhw.sizey = fw_sizes[i][2]; goto found; } } if (elantech_cmd(kbdc, hwversion, ELANTECH_FW_ID, resp) != 0) { printf(" Failed to read touchpad size\n"); elanhw.sizex = 10000; /* Arbitrary high values to */ elanhw.sizey = 10000; /* prevent clipping in smoother */ } else if (hwversion == 2) { if ((elanhw.fwversion >> 16) == 0x14 && (resp[1] & 0x10) && !elantech_cmd(kbdc, hwversion, ELANTECH_SAMPLE, resp)) { elanhw.dptracex = resp[1] / 2; elanhw.dptracey = resp[2] / 2; } xtr = ((elanhw.fwversion >> 8) == 0x0208) ? 1 : 2; elanhw.sizex = (elanhw.ntracesx - xtr) * elanhw.dptracex; elanhw.sizey = (elanhw.ntracesy - xtr) * elanhw.dptracey; } else { elanhw.sizex = (resp[0] & 0x0f) << 8 | resp[1]; elanhw.sizey = (resp[0] & 0xf0) << 4 | resp[2]; xtr = (elanhw.sizex % (elanhw.ntracesx - 2) == 0) ? 2 : 1; elanhw.dptracex = elanhw.sizex / (elanhw.ntracesx - xtr); elanhw.dptracey = elanhw.sizey / (elanhw.ntracesy - xtr); } found: if (verbose >= 2) { printf(" Model information:\n"); printf(" MaxX: %d\n", elanhw.sizex); printf(" MaxY: %d\n", elanhw.sizey); printf(" DpmmX: %d\n", elanhw.dpmmx); printf(" DpmmY: %d\n", elanhw.dpmmy); printf(" TracesX: %d\n", elanhw.ntracesx); printf(" TracesY: %d\n", elanhw.ntracesy); printf(" DptraceX: %d\n", elanhw.dptracex); printf(" DptraceY: %d\n", elanhw.dptracey); printf(" SemiMT: %d\n", elanhw.issemimt); printf(" Clickpad: %d\n", elanhw.isclickpad); printf(" Trackpoint: %d\n", elanhw.hastrackpoint); printf(" CRC: %d\n", elanhw.hascrc); printf(" Pressure: %d\n", elanhw.haspressure); } VLOG(3, (LOG_DEBUG, "elantech: END init\n")); if (arg == PROBE) { sc->elanhw = elanhw; sc->hw.buttons = 3; /* Initialize synaptics movement smoother */ elantech_init_synaptics(sc); for (id = 0; id < ELANTECH_MAX_FINGERS; id++) PSM_FINGER_RESET(sc->elanaction.fingers[id]); } return (TRUE); } /* * Return true if 'now' is earlier than (start + (secs.usecs)). * Now may be NULL and the function will fetch the current time from * getmicrouptime(), or a cached 'now' can be passed in. * All values should be numbers derived from getmicrouptime(). */ static int timeelapsed(start, secs, usecs, now) const struct timeval *start, *now; int secs, usecs; { struct timeval snow, tv; /* if there is no 'now' passed in, the get it as a convience. */ if (now == NULL) { getmicrouptime(&snow); now = &snow; } tv.tv_sec = secs; tv.tv_usec = usecs; timevaladd(&tv, start); return (timevalcmp(&tv, now, <)); } static int psmresume(device_t dev) { struct psm_softc *sc = device_get_softc(dev); int unit = device_get_unit(dev); int err; VLOG(2, (LOG_NOTICE, "psm%d: system resume hook called.\n", unit)); if ((sc->config & (PSM_CONFIG_HOOKRESUME | PSM_CONFIG_INITAFTERSUSPEND)) == 0) return (0); err = reinitialize(sc, sc->config & PSM_CONFIG_INITAFTERSUSPEND); if ((sc->state & PSM_ASLP) && !(sc->state & PSM_VALID)) { /* * Release the blocked process; it must be notified that * the device cannot be accessed anymore. */ sc->state &= ~PSM_ASLP; wakeup(sc); } VLOG(2, (LOG_DEBUG, "psm%d: system resume hook exiting.\n", unit)); return (err); } DRIVER_MODULE(psm, atkbdc, psm_driver, psm_devclass, 0, 0); #ifdef EVDEV_SUPPORT MODULE_DEPEND(psm, evdev, 1, 1, 1); #endif #ifdef DEV_ISA /* * This sucks up assignments from PNPBIOS and ACPI. */ /* * When the PS/2 mouse device is reported by ACPI or PnP BIOS, it may * appear BEFORE the AT keyboard controller. As the PS/2 mouse device * can be probed and attached only after the AT keyboard controller is * attached, we shall quietly reserve the IRQ resource for later use. * If the PS/2 mouse device is reported to us AFTER the keyboard controller, * copy the IRQ resource to the PS/2 mouse device instance hanging * under the keyboard controller, then probe and attach it. */ static devclass_t psmcpnp_devclass; static device_probe_t psmcpnp_probe; static device_attach_t psmcpnp_attach; static device_method_t psmcpnp_methods[] = { DEVMETHOD(device_probe, psmcpnp_probe), DEVMETHOD(device_attach, psmcpnp_attach), { 0, 0 } }; static driver_t psmcpnp_driver = { PSMCPNP_DRIVER_NAME, psmcpnp_methods, sizeof(struct psmcpnp_softc), }; static struct isa_pnp_id psmcpnp_ids[] = { { 0x030fd041, "PS/2 mouse port" }, /* PNP0F03 */ { 0x0e0fd041, "PS/2 mouse port" }, /* PNP0F0E */ { 0x120fd041, "PS/2 mouse port" }, /* PNP0F12 */ { 0x130fd041, "PS/2 mouse port" }, /* PNP0F13 */ { 0x1303d041, "PS/2 port" }, /* PNP0313, XXX */ { 0x02002e4f, "Dell PS/2 mouse port" }, /* Lat. X200, Dell */ { 0x0002a906, "ALPS Glide Point" }, /* ALPS Glide Point */ { 0x80374d24, "IBM PS/2 mouse port" }, /* IBM3780, ThinkPad */ { 0x81374d24, "IBM PS/2 mouse port" }, /* IBM3781, ThinkPad */ { 0x0190d94d, "SONY VAIO PS/2 mouse port"}, /* SNY9001, Vaio */ { 0x0290d94d, "SONY VAIO PS/2 mouse port"}, /* SNY9002, Vaio */ { 0x0390d94d, "SONY VAIO PS/2 mouse port"}, /* SNY9003, Vaio */ { 0x0490d94d, "SONY VAIO PS/2 mouse port"}, /* SNY9004, Vaio */ { 0 } }; /* _HID list for quirk detection. Any device below has _CID from psmcpnp_ids */ static struct isa_pnp_id forcepad_ids[] = { { 0x0d302e4f, "HP PS/2 forcepad port" }, /* SYN300D, EB 1040 */ { 0x14302e4f, "HP PS/2 forcepad port" }, /* SYN3014, EB 1040 */ { 0 } }; /* List of HW v8.1 synaptics touchpads erroneously detected as HW v2.0 */ static struct isa_pnp_id hpsyn81_ids[] = { { 0x9e012e4f, "HP PS/2 trackpad port" }, /* SYN019E, EB 9470 */ { 0 } }; static int create_a_copy(device_t atkbdc, device_t me) { device_t psm; u_long irq; /* find the PS/2 mouse device instance under the keyboard controller */ psm = device_find_child(atkbdc, PSM_DRIVER_NAME, device_get_unit(atkbdc)); if (psm == NULL) return (ENXIO); if (device_get_state(psm) != DS_NOTPRESENT) return (0); /* move our resource to the found device */ irq = bus_get_resource_start(me, SYS_RES_IRQ, 0); bus_delete_resource(me, SYS_RES_IRQ, 0); bus_set_resource(psm, SYS_RES_IRQ, KBDC_RID_AUX, irq, 1); /* ...then probe and attach it */ return (device_probe_and_attach(psm)); } static int psmcpnp_probe(device_t dev) { struct psmcpnp_softc *sc = device_get_softc(dev); struct resource *res; u_long irq; int rid; if (ISA_PNP_PROBE(device_get_parent(dev), dev, forcepad_ids) == 0) sc->type = PSMCPNP_FORCEPAD; else if(ISA_PNP_PROBE(device_get_parent(dev), dev, hpsyn81_ids) == 0) sc->type = PSMCPNP_HPSYN81; else if (ISA_PNP_PROBE(device_get_parent(dev), dev, psmcpnp_ids) == 0) sc->type = PSMCPNP_GENERIC; else return (ENXIO); /* * The PnP BIOS and ACPI are supposed to assign an IRQ (12) * to the PS/2 mouse device node. But, some buggy PnP BIOS * declares the PS/2 mouse device node without an IRQ resource! * If this happens, we shall refer to device hints. * If we still don't find it there, use a hardcoded value... XXX */ rid = 0; irq = bus_get_resource_start(dev, SYS_RES_IRQ, rid); if (irq <= 0) { if (resource_long_value(PSM_DRIVER_NAME, device_get_unit(dev),"irq", &irq) != 0) irq = 12; /* XXX */ device_printf(dev, "irq resource info is missing; " "assuming irq %ld\n", irq); bus_set_resource(dev, SYS_RES_IRQ, rid, irq, 1); } res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 0); bus_release_resource(dev, SYS_RES_IRQ, rid, res); /* keep quiet */ if (!bootverbose) device_quiet(dev); return ((res == NULL) ? ENXIO : 0); } static int psmcpnp_attach(device_t dev) { device_t atkbdc; /* find the keyboard controller, which may be on acpi* or isa* bus */ atkbdc = devclass_get_device(devclass_find(ATKBDC_DRIVER_NAME), device_get_unit(dev)); if ((atkbdc != NULL) && (device_get_state(atkbdc) == DS_ATTACHED)) create_a_copy(atkbdc, dev); return (0); } DRIVER_MODULE(psmcpnp, isa, psmcpnp_driver, psmcpnp_devclass, 0, 0); DRIVER_MODULE(psmcpnp, acpi, psmcpnp_driver, psmcpnp_devclass, 0, 0); ISA_PNP_INFO(psmcpnp_ids); #endif /* DEV_ISA */ Index: projects/pnfs-planb-server/sys/dev/bxe/bxe.c =================================================================== --- projects/pnfs-planb-server/sys/dev/bxe/bxe.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/bxe/bxe.c (revision 334967) @@ -1,19247 +1,19247 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007-2014 QLogic Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define BXE_DRIVER_VERSION "1.78.91" #include "bxe.h" #include "ecore_sp.h" #include "ecore_init.h" #include "ecore_init_ops.h" #include "57710_int_offsets.h" #include "57711_int_offsets.h" #include "57712_int_offsets.h" /* * CTLTYPE_U64 and sysctl_handle_64 were added in r217616. Define these * explicitly here for older kernels that don't include this changeset. */ #ifndef CTLTYPE_U64 #define CTLTYPE_U64 CTLTYPE_QUAD #define sysctl_handle_64 sysctl_handle_quad #endif /* * CSUM_TCP_IPV6 and CSUM_UDP_IPV6 were added in r236170. Define these * here as zero(0) for older kernels that don't include this changeset * thereby masking the functionality. */ #ifndef CSUM_TCP_IPV6 #define CSUM_TCP_IPV6 0 #define CSUM_UDP_IPV6 0 #endif /* * pci_find_cap was added in r219865. Re-define this at pci_find_extcap * for older kernels that don't include this changeset. */ #if __FreeBSD_version < 900035 #define pci_find_cap pci_find_extcap #endif #define BXE_DEF_SB_ATT_IDX 0x0001 #define BXE_DEF_SB_IDX 0x0002 /* * FLR Support - bxe_pf_flr_clnup() is called during nic_load in the per * function HW initialization. */ #define FLR_WAIT_USEC 10000 /* 10 msecs */ #define FLR_WAIT_INTERVAL 50 /* usecs */ #define FLR_POLL_CNT (FLR_WAIT_USEC / FLR_WAIT_INTERVAL) /* 200 */ struct pbf_pN_buf_regs { int pN; uint32_t init_crd; uint32_t crd; uint32_t crd_freed; }; struct pbf_pN_cmd_regs { int pN; uint32_t lines_occup; uint32_t lines_freed; }; /* * PCI Device ID Table used by bxe_probe(). */ #define BXE_DEVDESC_MAX 64 static struct bxe_device_type bxe_devs[] = { { BRCM_VENDORID, CHIP_NUM_57710, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57710 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57711 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57711E, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57711E 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57712_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57712 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57800, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57800_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57800 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57810, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57810_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57810 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57811, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57811_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57811 MF 10GbE" }, { BRCM_VENDORID, CHIP_NUM_57840_4_10, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 4x10GbE" }, { QLOGIC_VENDORID, CHIP_NUM_57840_4_10, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 4x10GbE" }, { BRCM_VENDORID, CHIP_NUM_57840_2_20, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 2x20GbE" }, { BRCM_VENDORID, CHIP_NUM_57840_MF, PCI_ANY_ID, PCI_ANY_ID, "QLogic NetXtreme II BCM57840 MF 10GbE" }, { 0, 0, 0, 0, NULL } }; MALLOC_DECLARE(M_BXE_ILT); MALLOC_DEFINE(M_BXE_ILT, "bxe_ilt", "bxe ILT pointer"); /* * FreeBSD device entry points. */ static int bxe_probe(device_t); static int bxe_attach(device_t); static int bxe_detach(device_t); static int bxe_shutdown(device_t); /* * FreeBSD KLD module/device interface event handler method. */ static device_method_t bxe_methods[] = { /* Device interface (device_if.h) */ DEVMETHOD(device_probe, bxe_probe), DEVMETHOD(device_attach, bxe_attach), DEVMETHOD(device_detach, bxe_detach), DEVMETHOD(device_shutdown, bxe_shutdown), /* Bus interface (bus_if.h) */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), KOBJMETHOD_END }; /* * FreeBSD KLD Module data declaration */ static driver_t bxe_driver = { "bxe", /* module name */ bxe_methods, /* event handler */ sizeof(struct bxe_softc) /* extra data */ }; /* * FreeBSD dev class is needed to manage dev instances and * to associate with a bus type */ static devclass_t bxe_devclass; MODULE_DEPEND(bxe, pci, 1, 1, 1); MODULE_DEPEND(bxe, ether, 1, 1, 1); DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); NETDUMP_DEFINE(bxe); /* resources needed for unloading a previously loaded device */ #define BXE_PREV_WAIT_NEEDED 1 struct mtx bxe_prev_mtx; MTX_SYSINIT(bxe_prev_mtx, &bxe_prev_mtx, "bxe_prev_lock", MTX_DEF); struct bxe_prev_list_node { LIST_ENTRY(bxe_prev_list_node) node; uint8_t bus; uint8_t slot; uint8_t path; uint8_t aer; /* XXX automatic error recovery */ uint8_t undi; }; static LIST_HEAD(, bxe_prev_list_node) bxe_prev_list = LIST_HEAD_INITIALIZER(bxe_prev_list); static int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ /* Tunable device values... */ SYSCTL_NODE(_hw, OID_AUTO, bxe, CTLFLAG_RD, 0, "bxe driver parameters"); /* Debug */ unsigned long bxe_debug = 0; SYSCTL_ULONG(_hw_bxe, OID_AUTO, debug, CTLFLAG_RDTUN, &bxe_debug, 0, "Debug logging mode"); /* Interrupt Mode: 0 (IRQ), 1 (MSI/IRQ), and 2 (MSI-X/MSI/IRQ) */ static int bxe_interrupt_mode = INTR_MODE_MSIX; SYSCTL_INT(_hw_bxe, OID_AUTO, interrupt_mode, CTLFLAG_RDTUN, &bxe_interrupt_mode, 0, "Interrupt (MSI-X/MSI/INTx) mode"); /* Number of Queues: 0 (Auto) or 1 to 16 (fixed queue number) */ static int bxe_queue_count = 4; SYSCTL_INT(_hw_bxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &bxe_queue_count, 0, "Multi-Queue queue count"); /* max number of buffers per queue (default RX_BD_USABLE) */ static int bxe_max_rx_bufs = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, max_rx_bufs, CTLFLAG_RDTUN, &bxe_max_rx_bufs, 0, "Maximum Number of Rx Buffers Per Queue"); /* Host interrupt coalescing RX tick timer (usecs) */ static int bxe_hc_rx_ticks = 25; SYSCTL_INT(_hw_bxe, OID_AUTO, hc_rx_ticks, CTLFLAG_RDTUN, &bxe_hc_rx_ticks, 0, "Host Coalescing Rx ticks"); /* Host interrupt coalescing TX tick timer (usecs) */ static int bxe_hc_tx_ticks = 50; SYSCTL_INT(_hw_bxe, OID_AUTO, hc_tx_ticks, CTLFLAG_RDTUN, &bxe_hc_tx_ticks, 0, "Host Coalescing Tx ticks"); /* Maximum number of Rx packets to process at a time */ static int bxe_rx_budget = 0xffffffff; SYSCTL_INT(_hw_bxe, OID_AUTO, rx_budget, CTLFLAG_TUN, &bxe_rx_budget, 0, "Rx processing budget"); /* Maximum LRO aggregation size */ static int bxe_max_aggregation_size = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, max_aggregation_size, CTLFLAG_TUN, &bxe_max_aggregation_size, 0, "max aggregation size"); /* PCI MRRS: -1 (Auto), 0 (128B), 1 (256B), 2 (512B), 3 (1KB) */ static int bxe_mrrs = -1; SYSCTL_INT(_hw_bxe, OID_AUTO, mrrs, CTLFLAG_RDTUN, &bxe_mrrs, 0, "PCIe maximum read request size"); /* AutoGrEEEn: 0 (hardware default), 1 (force on), 2 (force off) */ static int bxe_autogreeen = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, autogreeen, CTLFLAG_RDTUN, &bxe_autogreeen, 0, "AutoGrEEEn support"); /* 4-tuple RSS support for UDP: 0 (disabled), 1 (enabled) */ static int bxe_udp_rss = 0; SYSCTL_INT(_hw_bxe, OID_AUTO, udp_rss, CTLFLAG_RDTUN, &bxe_udp_rss, 0, "UDP RSS support"); #define STAT_NAME_LEN 32 /* no stat names below can be longer than this */ #define STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_stats, stat_name) / 4) #define Q_STATS_OFFSET32(stat_name) \ (offsetof(struct bxe_eth_q_stats, stat_name) / 4) static const struct { uint32_t offset; uint32_t size; uint32_t flags; #define STATS_FLAGS_PORT 1 #define STATS_FLAGS_FUNC 2 /* MF only cares about function stats */ #define STATS_FLAGS_BOTH (STATS_FLAGS_FUNC | STATS_FLAGS_PORT) char string[STAT_NAME_LEN]; } bxe_eth_stats_arr[] = { { STATS_OFFSET32(total_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_bytes" }, { STATS_OFFSET32(error_bytes_received_hi), 8, STATS_FLAGS_BOTH, "rx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_received_hi), 8, STATS_FLAGS_BOTH, "rx_bcast_packets" }, { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi), 8, STATS_FLAGS_PORT, "rx_crc_errors" }, { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi), 8, STATS_FLAGS_PORT, "rx_align_errors" }, { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi), 8, STATS_FLAGS_PORT, "rx_undersize_packets" }, { STATS_OFFSET32(etherstatsoverrsizepkts_hi), 8, STATS_FLAGS_PORT, "rx_oversize_packets" }, { STATS_OFFSET32(rx_stat_etherstatsfragments_hi), 8, STATS_FLAGS_PORT, "rx_fragments" }, { STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), 8, STATS_FLAGS_PORT, "rx_jabbers" }, { STATS_OFFSET32(no_buff_discard_hi), 8, STATS_FLAGS_BOTH, "rx_discards" }, { STATS_OFFSET32(mac_filter_discard), 4, STATS_FLAGS_PORT, "rx_filtered_packets" }, { STATS_OFFSET32(mf_tag_discard), 4, STATS_FLAGS_PORT, "rx_mf_tag_discard" }, { STATS_OFFSET32(pfc_frames_received_hi), 8, STATS_FLAGS_PORT, "pfc_frames_received" }, { STATS_OFFSET32(pfc_frames_sent_hi), 8, STATS_FLAGS_PORT, "pfc_frames_sent" }, { STATS_OFFSET32(brb_drop_hi), 8, STATS_FLAGS_PORT, "rx_brb_discard" }, { STATS_OFFSET32(brb_truncate_hi), 8, STATS_FLAGS_PORT, "rx_brb_truncate" }, { STATS_OFFSET32(pause_frames_received_hi), 8, STATS_FLAGS_PORT, "rx_pause_frames" }, { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi), 8, STATS_FLAGS_PORT, "rx_mac_ctrl_frames" }, { STATS_OFFSET32(nig_timer_max), 4, STATS_FLAGS_PORT, "rx_constant_pause_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), 8, STATS_FLAGS_PORT, "tx_error_bytes" }, { STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_ucast_packets" }, { STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_mcast_packets" }, { STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bcast_packets" }, { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi), 8, STATS_FLAGS_PORT, "tx_mac_errors" }, { STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi), 8, STATS_FLAGS_PORT, "tx_carrier_errors" }, { STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_single_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi), 8, STATS_FLAGS_PORT, "tx_multi_collisions" }, { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi), 8, STATS_FLAGS_PORT, "tx_deferred" }, { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi), 8, STATS_FLAGS_PORT, "tx_excess_collisions" }, { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi), 8, STATS_FLAGS_PORT, "tx_late_collisions" }, { STATS_OFFSET32(tx_stat_etherstatscollisions_hi), 8, STATS_FLAGS_PORT, "tx_total_collisions" }, { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi), 8, STATS_FLAGS_PORT, "tx_64_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi), 8, STATS_FLAGS_PORT, "tx_65_to_127_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi), 8, STATS_FLAGS_PORT, "tx_128_to_255_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi), 8, STATS_FLAGS_PORT, "tx_256_to_511_byte_packets" }, { STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi), 8, STATS_FLAGS_PORT, "tx_512_to_1023_byte_packets" }, { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1024_to_1522_byte_packets" }, { STATS_OFFSET32(etherstatspktsover1522octets_hi), 8, STATS_FLAGS_PORT, "tx_1523_to_9022_byte_packets" }, { STATS_OFFSET32(pause_frames_sent_hi), 8, STATS_FLAGS_PORT, "tx_pause_frames" }, { STATS_OFFSET32(total_tpa_aggregations_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregations" }, { STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, STATS_FLAGS_FUNC, "tpa_aggregated_frames"}, { STATS_OFFSET32(total_tpa_bytes_hi), 8, STATS_FLAGS_FUNC, "tpa_bytes"}, { STATS_OFFSET32(eee_tx_lpi), 4, STATS_FLAGS_PORT, "eee_tx_lpi"}, { STATS_OFFSET32(rx_calls), 4, STATS_FLAGS_FUNC, "rx_calls"}, { STATS_OFFSET32(rx_pkts), 4, STATS_FLAGS_FUNC, "rx_pkts"}, { STATS_OFFSET32(rx_tpa_pkts), 4, STATS_FLAGS_FUNC, "rx_tpa_pkts"}, { STATS_OFFSET32(rx_erroneous_jumbo_sge_pkts), 4, STATS_FLAGS_FUNC, "rx_erroneous_jumbo_sge_pkts"}, { STATS_OFFSET32(rx_bxe_service_rxsgl), 4, STATS_FLAGS_FUNC, "rx_bxe_service_rxsgl"}, { STATS_OFFSET32(rx_jumbo_sge_pkts), 4, STATS_FLAGS_FUNC, "rx_jumbo_sge_pkts"}, { STATS_OFFSET32(rx_soft_errors), 4, STATS_FLAGS_FUNC, "rx_soft_errors"}, { STATS_OFFSET32(rx_hw_csum_errors), 4, STATS_FLAGS_FUNC, "rx_hw_csum_errors"}, { STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_ip"}, { STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, STATS_FLAGS_FUNC, "rx_ofld_frames_csum_tcp_udp"}, { STATS_OFFSET32(rx_budget_reached), 4, STATS_FLAGS_FUNC, "rx_budget_reached"}, { STATS_OFFSET32(tx_pkts), 4, STATS_FLAGS_FUNC, "tx_pkts"}, { STATS_OFFSET32(tx_soft_errors), 4, STATS_FLAGS_FUNC, "tx_soft_errors"}, { STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_ip"}, { STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_tcp"}, { STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_csum_udp"}, { STATS_OFFSET32(tx_ofld_frames_lso), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso"}, { STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, STATS_FLAGS_FUNC, "tx_ofld_frames_lso_hdr_splits"}, { STATS_OFFSET32(tx_encap_failures), 4, STATS_FLAGS_FUNC, "tx_encap_failures"}, { STATS_OFFSET32(tx_hw_queue_full), 4, STATS_FLAGS_FUNC, "tx_hw_queue_full"}, { STATS_OFFSET32(tx_hw_max_queue_depth), 4, STATS_FLAGS_FUNC, "tx_hw_max_queue_depth"}, { STATS_OFFSET32(tx_dma_mapping_failure), 4, STATS_FLAGS_FUNC, "tx_dma_mapping_failure"}, { STATS_OFFSET32(tx_max_drbr_queue_depth), 4, STATS_FLAGS_FUNC, "tx_max_drbr_queue_depth"}, { STATS_OFFSET32(tx_window_violation_std), 4, STATS_FLAGS_FUNC, "tx_window_violation_std"}, { STATS_OFFSET32(tx_window_violation_tso), 4, STATS_FLAGS_FUNC, "tx_window_violation_tso"}, { STATS_OFFSET32(tx_chain_lost_mbuf), 4, STATS_FLAGS_FUNC, "tx_chain_lost_mbuf"}, { STATS_OFFSET32(tx_frames_deferred), 4, STATS_FLAGS_FUNC, "tx_frames_deferred"}, { STATS_OFFSET32(tx_queue_xoff), 4, STATS_FLAGS_FUNC, "tx_queue_xoff"}, { STATS_OFFSET32(mbuf_defrag_attempts), 4, STATS_FLAGS_FUNC, "mbuf_defrag_attempts"}, { STATS_OFFSET32(mbuf_defrag_failures), 4, STATS_FLAGS_FUNC, "mbuf_defrag_failures"}, { STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_bd_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_tpa_mapping_failed"}, { STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_alloc_failed"}, { STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, STATS_FLAGS_FUNC, "mbuf_rx_sge_mapping_failed"}, { STATS_OFFSET32(mbuf_alloc_tx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tx"}, { STATS_OFFSET32(mbuf_alloc_rx), 4, STATS_FLAGS_FUNC, "mbuf_alloc_rx"}, { STATS_OFFSET32(mbuf_alloc_sge), 4, STATS_FLAGS_FUNC, "mbuf_alloc_sge"}, { STATS_OFFSET32(mbuf_alloc_tpa), 4, STATS_FLAGS_FUNC, "mbuf_alloc_tpa"}, { STATS_OFFSET32(tx_queue_full_return), 4, STATS_FLAGS_FUNC, "tx_queue_full_return"}, { STATS_OFFSET32(bxe_tx_mq_sc_state_failures), 4, STATS_FLAGS_FUNC, "bxe_tx_mq_sc_state_failures"}, { STATS_OFFSET32(tx_request_link_down_failures), 4, STATS_FLAGS_FUNC, "tx_request_link_down_failures"}, { STATS_OFFSET32(bd_avail_too_less_failures), 4, STATS_FLAGS_FUNC, "bd_avail_too_less_failures"}, { STATS_OFFSET32(tx_mq_not_empty), 4, STATS_FLAGS_FUNC, "tx_mq_not_empty"}, { STATS_OFFSET32(nsegs_path1_errors), 4, STATS_FLAGS_FUNC, "nsegs_path1_errors"}, { STATS_OFFSET32(nsegs_path2_errors), 4, STATS_FLAGS_FUNC, "nsegs_path2_errors"} }; static const struct { uint32_t offset; uint32_t size; char string[STAT_NAME_LEN]; } bxe_eth_q_stats_arr[] = { { Q_STATS_OFFSET32(total_bytes_received_hi), 8, "rx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_received_hi), 8, "rx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_received_hi), 8, "rx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_received_hi), 8, "rx_bcast_packets" }, { Q_STATS_OFFSET32(no_buff_discard_hi), 8, "rx_discards" }, { Q_STATS_OFFSET32(total_bytes_transmitted_hi), 8, "tx_bytes" }, { Q_STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, "tx_ucast_packets" }, { Q_STATS_OFFSET32(total_multicast_packets_transmitted_hi), 8, "tx_mcast_packets" }, { Q_STATS_OFFSET32(total_broadcast_packets_transmitted_hi), 8, "tx_bcast_packets" }, { Q_STATS_OFFSET32(total_tpa_aggregations_hi), 8, "tpa_aggregations" }, { Q_STATS_OFFSET32(total_tpa_aggregated_frames_hi), 8, "tpa_aggregated_frames"}, { Q_STATS_OFFSET32(total_tpa_bytes_hi), 8, "tpa_bytes"}, { Q_STATS_OFFSET32(rx_calls), 4, "rx_calls"}, { Q_STATS_OFFSET32(rx_pkts), 4, "rx_pkts"}, { Q_STATS_OFFSET32(rx_tpa_pkts), 4, "rx_tpa_pkts"}, { Q_STATS_OFFSET32(rx_erroneous_jumbo_sge_pkts), 4, "rx_erroneous_jumbo_sge_pkts"}, { Q_STATS_OFFSET32(rx_bxe_service_rxsgl), 4, "rx_bxe_service_rxsgl"}, { Q_STATS_OFFSET32(rx_jumbo_sge_pkts), 4, "rx_jumbo_sge_pkts"}, { Q_STATS_OFFSET32(rx_soft_errors), 4, "rx_soft_errors"}, { Q_STATS_OFFSET32(rx_hw_csum_errors), 4, "rx_hw_csum_errors"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_ip), 4, "rx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(rx_ofld_frames_csum_tcp_udp), 4, "rx_ofld_frames_csum_tcp_udp"}, { Q_STATS_OFFSET32(rx_budget_reached), 4, "rx_budget_reached"}, { Q_STATS_OFFSET32(tx_pkts), 4, "tx_pkts"}, { Q_STATS_OFFSET32(tx_soft_errors), 4, "tx_soft_errors"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_ip), 4, "tx_ofld_frames_csum_ip"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_tcp), 4, "tx_ofld_frames_csum_tcp"}, { Q_STATS_OFFSET32(tx_ofld_frames_csum_udp), 4, "tx_ofld_frames_csum_udp"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso), 4, "tx_ofld_frames_lso"}, { Q_STATS_OFFSET32(tx_ofld_frames_lso_hdr_splits), 4, "tx_ofld_frames_lso_hdr_splits"}, { Q_STATS_OFFSET32(tx_encap_failures), 4, "tx_encap_failures"}, { Q_STATS_OFFSET32(tx_hw_queue_full), 4, "tx_hw_queue_full"}, { Q_STATS_OFFSET32(tx_hw_max_queue_depth), 4, "tx_hw_max_queue_depth"}, { Q_STATS_OFFSET32(tx_dma_mapping_failure), 4, "tx_dma_mapping_failure"}, { Q_STATS_OFFSET32(tx_max_drbr_queue_depth), 4, "tx_max_drbr_queue_depth"}, { Q_STATS_OFFSET32(tx_window_violation_std), 4, "tx_window_violation_std"}, { Q_STATS_OFFSET32(tx_window_violation_tso), 4, "tx_window_violation_tso"}, { Q_STATS_OFFSET32(tx_chain_lost_mbuf), 4, "tx_chain_lost_mbuf"}, { Q_STATS_OFFSET32(tx_frames_deferred), 4, "tx_frames_deferred"}, { Q_STATS_OFFSET32(tx_queue_xoff), 4, "tx_queue_xoff"}, { Q_STATS_OFFSET32(mbuf_defrag_attempts), 4, "mbuf_defrag_attempts"}, { Q_STATS_OFFSET32(mbuf_defrag_failures), 4, "mbuf_defrag_failures"}, { Q_STATS_OFFSET32(mbuf_rx_bd_alloc_failed), 4, "mbuf_rx_bd_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_bd_mapping_failed), 4, "mbuf_rx_bd_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_alloc_failed), 4, "mbuf_rx_tpa_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_tpa_mapping_failed), 4, "mbuf_rx_tpa_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_alloc_failed), 4, "mbuf_rx_sge_alloc_failed"}, { Q_STATS_OFFSET32(mbuf_rx_sge_mapping_failed), 4, "mbuf_rx_sge_mapping_failed"}, { Q_STATS_OFFSET32(mbuf_alloc_tx), 4, "mbuf_alloc_tx"}, { Q_STATS_OFFSET32(mbuf_alloc_rx), 4, "mbuf_alloc_rx"}, { Q_STATS_OFFSET32(mbuf_alloc_sge), 4, "mbuf_alloc_sge"}, { Q_STATS_OFFSET32(mbuf_alloc_tpa), 4, "mbuf_alloc_tpa"}, { Q_STATS_OFFSET32(tx_queue_full_return), 4, "tx_queue_full_return"}, { Q_STATS_OFFSET32(bxe_tx_mq_sc_state_failures), 4, "bxe_tx_mq_sc_state_failures"}, { Q_STATS_OFFSET32(tx_request_link_down_failures), 4, "tx_request_link_down_failures"}, { Q_STATS_OFFSET32(bd_avail_too_less_failures), 4, "bd_avail_too_less_failures"}, { Q_STATS_OFFSET32(tx_mq_not_empty), 4, "tx_mq_not_empty"}, { Q_STATS_OFFSET32(nsegs_path1_errors), 4, "nsegs_path1_errors"}, { Q_STATS_OFFSET32(nsegs_path2_errors), 4, "nsegs_path2_errors"} }; #define BXE_NUM_ETH_STATS ARRAY_SIZE(bxe_eth_stats_arr) #define BXE_NUM_ETH_Q_STATS ARRAY_SIZE(bxe_eth_q_stats_arr) static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type); static int bxe_get_cmng_fns_mode(struct bxe_softc *sc); static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port); static void bxe_set_reset_global(struct bxe_softc *sc); static void bxe_set_reset_in_progress(struct bxe_softc *sc); static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine); static uint8_t bxe_clear_pf_load(struct bxe_softc *sc); static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print); static void bxe_int_disable(struct bxe_softc *sc); static int bxe_release_leader_lock(struct bxe_softc *sc); static void bxe_pf_disable(struct bxe_softc *sc); static void bxe_free_fp_buffers(struct bxe_softc *sc); static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod); static void bxe_link_report_locked(struct bxe_softc *sc); static void bxe_link_report(struct bxe_softc *sc); static void bxe_link_status_update(struct bxe_softc *sc); static void bxe_periodic_callout_func(void *xsc); static void bxe_periodic_start(struct bxe_softc *sc); static void bxe_periodic_stop(struct bxe_softc *sc); static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index); static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue); static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index); static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp); static void bxe_task_fp(struct bxe_fastpath *fp); static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents); static int bxe_alloc_mem(struct bxe_softc *sc); static void bxe_free_mem(struct bxe_softc *sc); static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc); static void bxe_free_fw_stats_mem(struct bxe_softc *sc); static int bxe_interrupt_attach(struct bxe_softc *sc); static void bxe_interrupt_detach(struct bxe_softc *sc); static void bxe_set_rx_mode(struct bxe_softc *sc); static int bxe_init_locked(struct bxe_softc *sc); static int bxe_stop_locked(struct bxe_softc *sc); static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode); static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link); static void bxe_handle_sp_tq(void *context, int pending); static void bxe_handle_fp_tq(void *context, int pending); static int bxe_add_cdev(struct bxe_softc *sc); static void bxe_del_cdev(struct bxe_softc *sc); int bxe_grc_dump(struct bxe_softc *sc); static int bxe_alloc_buf_rings(struct bxe_softc *sc); static void bxe_free_buf_rings(struct bxe_softc *sc); /* calculate crc32 on a buffer (NOTE: crc32_length MUST be aligned to 8) */ uint32_t calc_crc32(uint8_t *crc32_packet, uint32_t crc32_length, uint32_t crc32_seed, uint8_t complement) { uint32_t byte = 0; uint32_t bit = 0; uint8_t msb = 0; uint32_t temp = 0; uint32_t shft = 0; uint8_t current_byte = 0; uint32_t crc32_result = crc32_seed; const uint32_t CRC32_POLY = 0x1edc6f41; if ((crc32_packet == NULL) || (crc32_length == 0) || ((crc32_length % 8) != 0)) { return (crc32_result); } for (byte = 0; byte < crc32_length; byte = byte + 1) { current_byte = crc32_packet[byte]; for (bit = 0; bit < 8; bit = bit + 1) { /* msb = crc32_result[31]; */ msb = (uint8_t)(crc32_result >> 31); crc32_result = crc32_result << 1; /* it (msb != current_byte[bit]) */ if (msb != (0x1 & (current_byte >> bit))) { crc32_result = crc32_result ^ CRC32_POLY; /* crc32_result[0] = 1 */ crc32_result |= 1; } } } /* Last step is to: * 1. "mirror" every bit * 2. swap the 4 bytes * 3. complement each bit */ /* Mirror */ temp = crc32_result; shft = sizeof(crc32_result) * 8 - 1; for (crc32_result >>= 1; crc32_result; crc32_result >>= 1) { temp <<= 1; temp |= crc32_result & 1; shft-- ; } /* temp[31-bit] = crc32_result[bit] */ temp <<= shft; /* Swap */ /* crc32_result = {temp[7:0], temp[15:8], temp[23:16], temp[31:24]} */ { uint32_t t0, t1, t2, t3; t0 = (0x000000ff & (temp >> 24)); t1 = (0x0000ff00 & (temp >> 8)); t2 = (0x00ff0000 & (temp << 8)); t3 = (0xff000000 & (temp << 24)); crc32_result = t0 | t1 | t2 | t3; } /* Complement */ if (complement) { crc32_result = ~crc32_result; } return (crc32_result); } int bxe_test_bit(int nr, volatile unsigned long *addr) { return ((atomic_load_acq_long(addr) & (1 << nr)) != 0); } void bxe_set_bit(unsigned int nr, volatile unsigned long *addr) { atomic_set_acq_long(addr, (1 << nr)); } void bxe_clear_bit(int nr, volatile unsigned long *addr) { atomic_clear_acq_long(addr, (1 << nr)); } int bxe_test_and_set_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x | nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_test_and_clear_bit(int nr, volatile unsigned long *addr) { unsigned long x; nr = (1 << nr); do { x = *addr; } while (atomic_cmpset_acq_long(addr, x, x & ~nr) == 0); // if (x & nr) bit_was_set; else bit_was_not_set; return (x & nr); } int bxe_cmpxchg(volatile int *addr, int old, int new) { int x; do { x = *addr; } while (atomic_cmpset_acq_int(addr, old, new) == 0); return (x); } /* * Get DMA memory from the OS. * * Validates that the OS has provided DMA buffers in response to a * bus_dmamap_load call and saves the physical address of those buffers. * When the callback is used the OS will return 0 for the mapping function * (bus_dmamap_load) so we use the value of map_arg->maxsegs to pass any * failures back to the caller. * * Returns: * Nothing. */ static void bxe_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct bxe_dma *dma = arg; if (error) { dma->paddr = 0; dma->nseg = 0; BLOGE(dma->sc, "Failed DMA alloc '%s' (%d)!\n", dma->msg, error); } else { dma->paddr = segs->ds_addr; dma->nseg = nseg; } } /* * Allocate a block of memory and map it for DMA. No partial completions * allowed and release any resources acquired if we can't acquire all * resources. * * Returns: * 0 = Success, !0 = Failure */ int bxe_dma_alloc(struct bxe_softc *sc, bus_size_t size, struct bxe_dma *dma, const char *msg) { int rc; if (dma->size > 0) { BLOGE(sc, "dma block '%s' already has size %lu\n", msg, (unsigned long)dma->size); return (1); } memset(dma, 0, sizeof(*dma)); /* sanity */ dma->sc = sc; dma->size = size; snprintf(dma->msg, sizeof(dma->msg), "%s", msg); rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ BCM_PAGE_SIZE, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ size, /* max map size */ 1, /* num discontinuous */ size, /* max seg size */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &dma->tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to create dma tag for '%s' (%d)\n", msg, rc); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, (BUS_DMA_NOWAIT | BUS_DMA_ZERO), &dma->map); if (rc != 0) { BLOGE(sc, "Failed to alloc dma mem for '%s' (%d)\n", msg, rc); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } rc = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, bxe_dma_map_addr, /* BLOGD in here */ dma, BUS_DMA_NOWAIT); if (rc != 0) { BLOGE(sc, "Failed to load dma map for '%s' (%d)\n", msg, rc); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); memset(dma, 0, sizeof(*dma)); return (1); } return (0); } void bxe_dma_free(struct bxe_softc *sc, struct bxe_dma *dma) { if (dma->size > 0) { DBASSERT(sc, (dma->tag != NULL), ("dma tag is NULL")); bus_dmamap_sync(dma->tag, dma->map, (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); } memset(dma, 0, sizeof(*dma)); } /* * These indirect read and write routines are only during init. * The locking is handled by the MCP. */ void bxe_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); pci_write_config(sc->dev, PCICFG_GRC_DATA, val, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); } uint32_t bxe_reg_rd_ind(struct bxe_softc *sc, uint32_t addr) { uint32_t val; pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, addr, 4); val = pci_read_config(sc->dev, PCICFG_GRC_DATA, 4); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); return (val); } static int bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; int cnt; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "(resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE)" " resource_bit 0x%x\n", resource, resource_bit); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is not already taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { BLOGE(sc, "resource (0x%x) in use (status 0x%x bit 0x%x)\n", resource, lock_status, resource_bit); return (-1); } /* try every 5ms for 5 seconds */ for (cnt = 0; cnt < 1000; cnt++) { REG_WR(sc, (hw_lock_control_reg + 4), resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (0); } DELAY(5000); } BLOGE(sc, "Resource 0x%x resource_bit 0x%x lock timeout!\n", resource, resource_bit); return (-1); } static int bxe_release_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGE(sc, "(resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE)" " resource_bit 0x%x\n", resource, resource_bit); return (-1); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + (func * 8)); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + ((func - 6) * 8)); } /* validate the resource is currently taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (!(lock_status & resource_bit)) { BLOGE(sc, "resource (0x%x) not in use (status 0x%x bit 0x%x)\n", resource, lock_status, resource_bit); return (-1); } REG_WR(sc, hw_lock_control_reg, resource_bit); return (0); } static void bxe_acquire_phy_lock(struct bxe_softc *sc) { BXE_PHY_LOCK(sc); bxe_acquire_hw_lock(sc,HW_LOCK_RESOURCE_MDIO); } static void bxe_release_phy_lock(struct bxe_softc *sc) { bxe_release_hw_lock(sc,HW_LOCK_RESOURCE_MDIO); BXE_PHY_UNLOCK(sc); } /* * Per pf misc lock must be acquired before the per port mcp lock. Otherwise, * had we done things the other way around, if two pfs from the same port * would attempt to access nvram at the same time, we could run into a * scenario such as: * pf A takes the port lock. * pf B succeeds in taking the same lock since they are from the same port. * pf A takes the per pf misc lock. Performs eeprom access. * pf A finishes. Unlocks the per pf misc lock. * Pf B takes the lock and proceeds to perform it's own access. * pf A unlocks the per port lock, while pf B is still working (!). * mcp takes the per port lock and corrupts pf B's access (and/or has it's own * access corrupted by pf B).* */ static int bxe_acquire_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* acquire HW lock: protect against other PFs in PF Direct Assignment */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* request access to nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_SET1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { break; } DELAY(5); } if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { BLOGE(sc, "Cannot get access to nvram interface " "port %d val 0x%x (MCPR_NVM_SW_ARB_ARB_ARB1 << port)\n", port, val); return (-1); } return (0); } static int bxe_release_nvram_lock(struct bxe_softc *sc) { int port = SC_PORT(sc); int count, i; uint32_t val = 0; /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* relinquish nvram interface */ REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << port)); for (i = 0; i < count*10; i++) { val = REG_RD(sc, MCP_REG_MCPR_NVM_SW_ARB); if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { break; } DELAY(5); } if (val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port)) { BLOGE(sc, "Cannot free access to nvram interface " "port %d val 0x%x (MCPR_NVM_SW_ARB_ARB_ARB1 << port)\n", port, val); return (-1); } /* release HW lock: protect against other PFs in PF Direct Assignment */ bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_NVRAM); return (0); } static void bxe_enable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* enable both bits, even on read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val | MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN)); } static void bxe_disable_nvram_access(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE); /* disable both bits, even after read */ REG_WR(sc, MCP_REG_MCPR_NVM_ACCESS_ENABLE, (val & ~(MCPR_NVM_ACCESS_ENABLE_EN | MCPR_NVM_ACCESS_ENABLE_WR_EN))); } static int bxe_nvram_read_dword(struct bxe_softc *sc, uint32_t offset, uint32_t *ret_val, uint32_t cmd_flags) { int count, i, rc; uint32_t val; /* build the command word */ cmd_flags |= MCPR_NVM_COMMAND_DOIT; /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* address of the NVRAM to read from */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue a read command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ *ret_val = 0; rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { val = REG_RD(sc, MCP_REG_MCPR_NVM_READ); /* we read nvram data in cpu order * but ethtool sees it as an array of bytes * converting to big-endian will do the work */ *ret_val = htobe32(val); rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram read timeout expired " "(offset 0x%x cmd_flags 0x%x val 0x%x)\n", offset, cmd_flags, val); } return (rc); } static int bxe_nvram_read(struct bxe_softc *sc, uint32_t offset, uint8_t *ret_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; int rc; if ((offset & 0x03) || (buf_size & 0x03) || (buf_size == 0)) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); /* read the first word(s) */ cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((buf_size > sizeof(uint32_t)) && (rc == 0)) { rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); /* advance to the next dword */ offset += sizeof(uint32_t); ret_buf += sizeof(uint32_t); buf_size -= sizeof(uint32_t); cmd_flags = 0; } if (rc == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; rc = bxe_nvram_read_dword(sc, offset, &val, cmd_flags); memcpy(ret_buf, &val, 4); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write_dword(struct bxe_softc *sc, uint32_t offset, uint32_t val, uint32_t cmd_flags) { int count, i, rc; /* build the command word */ cmd_flags |= (MCPR_NVM_COMMAND_DOIT | MCPR_NVM_COMMAND_WR); /* need to clear DONE bit separately */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, MCPR_NVM_COMMAND_DONE); /* write the data */ REG_WR(sc, MCP_REG_MCPR_NVM_WRITE, val); /* address of the NVRAM to write to */ REG_WR(sc, MCP_REG_MCPR_NVM_ADDR, (offset & MCPR_NVM_ADDR_NVM_ADDR_VALUE)); /* issue the write command */ REG_WR(sc, MCP_REG_MCPR_NVM_COMMAND, cmd_flags); /* adjust timeout for emulation/FPGA */ count = NVRAM_TIMEOUT_COUNT; if (CHIP_REV_IS_SLOW(sc)) { count *= 100; } /* wait for completion */ rc = -1; for (i = 0; i < count; i++) { DELAY(5); val = REG_RD(sc, MCP_REG_MCPR_NVM_COMMAND); if (val & MCPR_NVM_COMMAND_DONE) { rc = 0; break; } } if (rc == -1) { BLOGE(sc, "nvram write timeout expired " "(offset 0x%x cmd_flags 0x%x val 0x%x)\n", offset, cmd_flags, val); } return (rc); } #define BYTE_OFFSET(offset) (8 * (offset & 0x03)) static int bxe_nvram_write1(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t align_offset; uint32_t val; int rc; if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); cmd_flags = (MCPR_NVM_COMMAND_FIRST | MCPR_NVM_COMMAND_LAST); align_offset = (offset & ~0x03); rc = bxe_nvram_read_dword(sc, align_offset, &val, cmd_flags); if (rc == 0) { val &= ~(0xff << BYTE_OFFSET(offset)); val |= (*data_buf << BYTE_OFFSET(offset)); /* nvram data is returned as an array of bytes * convert it back to cpu order */ val = be32toh(val); rc = bxe_nvram_write_dword(sc, align_offset, val, cmd_flags); } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } static int bxe_nvram_write(struct bxe_softc *sc, uint32_t offset, uint8_t *data_buf, int buf_size) { uint32_t cmd_flags; uint32_t val; uint32_t written_so_far; int rc; if (buf_size == 1) { return (bxe_nvram_write1(sc, offset, data_buf, buf_size)); } if ((offset & 0x03) || (buf_size & 0x03) /* || (buf_size == 0) */) { BLOGE(sc, "Invalid parameter, offset 0x%x buf_size 0x%x\n", offset, buf_size); return (-1); } if (buf_size == 0) { return (0); /* nothing to do */ } if ((offset + buf_size) > sc->devinfo.flash_size) { BLOGE(sc, "Invalid parameter, " "offset 0x%x + buf_size 0x%x > flash_size 0x%x\n", offset, buf_size, sc->devinfo.flash_size); return (-1); } /* request access to nvram interface */ rc = bxe_acquire_nvram_lock(sc); if (rc) { return (rc); } /* enable access to nvram interface */ bxe_enable_nvram_access(sc); written_so_far = 0; cmd_flags = MCPR_NVM_COMMAND_FIRST; while ((written_so_far < buf_size) && (rc == 0)) { if (written_so_far == (buf_size - sizeof(uint32_t))) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if (((offset + 4) % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_LAST; } else if ((offset % NVRAM_PAGE_SIZE) == 0) { cmd_flags |= MCPR_NVM_COMMAND_FIRST; } memcpy(&val, data_buf, 4); rc = bxe_nvram_write_dword(sc, offset, val, cmd_flags); /* advance to the next dword */ offset += sizeof(uint32_t); data_buf += sizeof(uint32_t); written_so_far += sizeof(uint32_t); cmd_flags = 0; } /* disable access to nvram interface */ bxe_disable_nvram_access(sc); bxe_release_nvram_lock(sc); return (rc); } /* copy command into DMAE command memory and set DMAE command Go */ void bxe_post_dmae(struct bxe_softc *sc, struct dmae_cmd *dmae, int idx) { uint32_t cmd_offset; int i; cmd_offset = (DMAE_REG_CMD_MEM + (sizeof(struct dmae_cmd) * idx)); for (i = 0; i < ((sizeof(struct dmae_cmd) / 4)); i++) { REG_WR(sc, (cmd_offset + (i * 4)), *(((uint32_t *)dmae) + i)); } REG_WR(sc, dmae_reg_go_c[idx], 1); } uint32_t bxe_dmae_opcode_add_comp(uint32_t opcode, uint8_t comp_type) { return (opcode | ((comp_type << DMAE_CMD_C_DST_SHIFT) | DMAE_CMD_C_TYPE_ENABLE)); } uint32_t bxe_dmae_opcode_clr_src_reset(uint32_t opcode) { return (opcode & ~DMAE_CMD_SRC_RESET); } uint32_t bxe_dmae_opcode(struct bxe_softc *sc, uint8_t src_type, uint8_t dst_type, uint8_t with_comp, uint8_t comp_type) { uint32_t opcode = 0; opcode |= ((src_type << DMAE_CMD_SRC_SHIFT) | (dst_type << DMAE_CMD_DST_SHIFT)); opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET); opcode |= (SC_PORT(sc) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0); opcode |= ((SC_VN(sc) << DMAE_CMD_E1HVN_SHIFT) | (SC_VN(sc) << DMAE_CMD_DST_VN_SHIFT)); opcode |= (DMAE_COM_SET_ERR << DMAE_CMD_ERR_POLICY_SHIFT); #ifdef __BIG_ENDIAN opcode |= DMAE_CMD_ENDIANITY_B_DW_SWAP; #else opcode |= DMAE_CMD_ENDIANITY_DW_SWAP; #endif if (with_comp) { opcode = bxe_dmae_opcode_add_comp(opcode, comp_type); } return (opcode); } static void bxe_prep_dmae_with_comp(struct bxe_softc *sc, struct dmae_cmd *dmae, uint8_t src_type, uint8_t dst_type) { memset(dmae, 0, sizeof(struct dmae_cmd)); /* set the opcode */ dmae->opcode = bxe_dmae_opcode(sc, src_type, dst_type, TRUE, DMAE_COMP_PCI); /* fill in the completion parameters */ dmae->comp_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_comp)); dmae->comp_val = DMAE_COMP_VAL; } /* issue a DMAE command over the init channel and wait for completion */ static int bxe_issue_dmae_with_comp(struct bxe_softc *sc, struct dmae_cmd *dmae) { uint32_t *wb_comp = BXE_SP(sc, wb_comp); int timeout = CHIP_REV_IS_SLOW(sc) ? 400000 : 4000; BXE_DMAE_LOCK(sc); /* reset completion */ *wb_comp = 0; /* post the command on the channel used for initializations */ bxe_post_dmae(sc, dmae, INIT_DMAE_C(sc)); /* wait for completion */ DELAY(5); while ((*wb_comp & ~DMAE_PCI_ERR_FLAG) != DMAE_COMP_VAL) { if (!timeout || (sc->recovery_state != BXE_RECOVERY_DONE && sc->recovery_state != BXE_RECOVERY_NIC_LOADING)) { BLOGE(sc, "DMAE timeout! *wb_comp 0x%x recovery_state 0x%x\n", *wb_comp, sc->recovery_state); BXE_DMAE_UNLOCK(sc); return (DMAE_TIMEOUT); } timeout--; DELAY(50); } if (*wb_comp & DMAE_PCI_ERR_FLAG) { BLOGE(sc, "DMAE PCI error! *wb_comp 0x%x recovery_state 0x%x\n", *wb_comp, sc->recovery_state); BXE_DMAE_UNLOCK(sc); return (DMAE_PCI_ERROR); } BXE_DMAE_UNLOCK(sc); return (0); } void bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, uint32_t len32) { struct dmae_cmd dmae; uint32_t *data; int i, rc; DBASSERT(sc, (len32 <= 4), ("DMAE read length is %d", len32)); if (!sc->dmae_ready) { data = BXE_SP(sc, wb_data[0]); for (i = 0; i < len32; i++) { data[i] = (CHIP_IS_E1(sc)) ? bxe_reg_rd_ind(sc, (src_addr + (i * 4))) : REG_RD(sc, (src_addr + (i * 4))); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_GRC, DMAE_DST_PCI); /* fill in addresses and len */ dmae.src_addr_lo = (src_addr >> 2); /* GRC addr has dword resolution */ dmae.src_addr_hi = 0; dmae.dst_addr_lo = U64_LO(BXE_SP_MAPPING(sc, wb_data)); dmae.dst_addr_hi = U64_HI(BXE_SP_MAPPING(sc, wb_data)); dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); } } void bxe_write_dmae(struct bxe_softc *sc, bus_addr_t dma_addr, uint32_t dst_addr, uint32_t len32) { struct dmae_cmd dmae; int rc; if (!sc->dmae_ready) { DBASSERT(sc, (len32 <= 4), ("DMAE not ready and length is %d", len32)); if (CHIP_IS_E1(sc)) { ecore_init_ind_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } else { ecore_init_str_wr(sc, dst_addr, BXE_SP(sc, wb_data[0]), len32); } return; } /* set opcode and fixed command fields */ bxe_prep_dmae_with_comp(sc, &dmae, DMAE_SRC_PCI, DMAE_DST_GRC); /* fill in addresses and len */ dmae.src_addr_lo = U64_LO(dma_addr); dmae.src_addr_hi = U64_HI(dma_addr); dmae.dst_addr_lo = (dst_addr >> 2); /* GRC addr has dword resolution */ dmae.dst_addr_hi = 0; dmae.len = len32; /* issue the command and wait for completion */ if ((rc = bxe_issue_dmae_with_comp(sc, &dmae)) != 0) { bxe_panic(sc, ("DMAE failed (%d)\n", rc)); } } void bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { int dmae_wr_max = DMAE_LEN32_WR_MAX(sc); int offset = 0; while (len > dmae_wr_max) { bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ dmae_wr_max); offset += (dmae_wr_max * 4); len -= dmae_wr_max; } bxe_write_dmae(sc, (phys_addr + offset), /* src DMA address */ (addr + offset), /* dst GRC address */ len); } void bxe_set_ctx_validation(struct bxe_softc *sc, struct eth_context *cxt, uint32_t cid) { /* ustorm cxt validation */ cxt->ustorm_ag_context.cdu_usage = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE); /* xcontext validation */ cxt->xstorm_ag_context.cdu_reserved = CDU_RSRVD_VALUE_TYPE_A(HW_CID(sc, cid), CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE); } static void bxe_storm_memset_hc_timeout(struct bxe_softc *sc, uint8_t port, uint8_t fw_sb_id, uint8_t sb_index, uint8_t ticks) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index)); REG_WR8(sc, addr, ticks); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d ticks %d\n", port, fw_sb_id, sb_index, ticks); } static void bxe_storm_memset_hc_disable(struct bxe_softc *sc, uint8_t port, uint16_t fw_sb_id, uint8_t sb_index, uint8_t disable) { uint32_t enable_flag = (disable) ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT); uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index)); uint8_t flags; /* clear and set */ flags = REG_RD8(sc, addr); flags &= ~HC_INDEX_DATA_HC_ENABLED; flags |= enable_flag; REG_WR8(sc, addr, flags); BLOGD(sc, DBG_LOAD, "port %d fw_sb_id %d sb_index %d disable %d\n", port, fw_sb_id, sb_index, disable); } void bxe_update_coalesce_sb_index(struct bxe_softc *sc, uint8_t fw_sb_id, uint8_t sb_index, uint8_t disable, uint16_t usec) { int port = SC_PORT(sc); uint8_t ticks = (usec / 4); /* XXX ??? */ bxe_storm_memset_hc_timeout(sc, port, fw_sb_id, sb_index, ticks); disable = (disable) ? 1 : ((usec) ? 0 : 1); bxe_storm_memset_hc_disable(sc, port, fw_sb_id, sb_index, disable); } void elink_cb_udelay(struct bxe_softc *sc, uint32_t usecs) { DELAY(usecs); } uint32_t elink_cb_reg_read(struct bxe_softc *sc, uint32_t reg_addr) { return (REG_RD(sc, reg_addr)); } void elink_cb_reg_write(struct bxe_softc *sc, uint32_t reg_addr, uint32_t val) { REG_WR(sc, reg_addr, val); } void elink_cb_reg_wb_write(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_WR_DMAE(sc, offset, wb_write, len); } void elink_cb_reg_wb_read(struct bxe_softc *sc, uint32_t offset, uint32_t *wb_write, uint16_t len) { REG_RD_DMAE(sc, offset, wb_write, len); } uint8_t elink_cb_path_id(struct bxe_softc *sc) { return (SC_PATH(sc)); } void elink_cb_event_log(struct bxe_softc *sc, const elink_log_id_t elink_log_id, ...) { /* XXX */ BLOGI(sc, "ELINK EVENT LOG (%d)\n", elink_log_id); } static int bxe_set_spio(struct bxe_softc *sc, int spio, uint32_t mode) { uint32_t spio_reg; /* Only 2 SPIOs are configurable */ if ((spio != MISC_SPIO_SPIO4) && (spio != MISC_SPIO_SPIO5)) { BLOGE(sc, "Invalid SPIO 0x%x mode 0x%x\n", spio, mode); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); /* read SPIO and mask except the float bits */ spio_reg = (REG_RD(sc, MISC_REG_SPIO) & MISC_SPIO_FLOAT); switch (mode) { case MISC_SPIO_OUTPUT_LOW: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output low\n", spio); /* clear FLOAT and set CLR */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_CLR_POS); break; case MISC_SPIO_OUTPUT_HIGH: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> output high\n", spio); /* clear FLOAT and set SET */ spio_reg &= ~(spio << MISC_SPIO_FLOAT_POS); spio_reg |= (spio << MISC_SPIO_SET_POS); break; case MISC_SPIO_INPUT_HI_Z: BLOGD(sc, DBG_LOAD, "Set SPIO 0x%x -> input\n", spio); /* set FLOAT */ spio_reg |= (spio << MISC_SPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_SPIO, spio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_SPIO); return (0); } static int bxe_gpio_read(struct bxe_softc *sc, int gpio_num, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d port 0x%x gpio_port %d gpio_shift %d" " gpio_mask 0x%x\n", gpio_num, port, gpio_port, gpio_shift, gpio_mask); return (-1); } /* read GPIO value */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); /* get the requested pin value */ return ((gpio_reg & gpio_mask) == gpio_mask) ? 1 : 0; } static int bxe_gpio_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d mode 0x%x port 0x%x gpio_port %d" " gpio_shift %d gpio_mask 0x%x\n", gpio_num, mode, port, gpio_port, gpio_shift, gpio_mask); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = (REG_RD(sc, MISC_REG_GPIO) & MISC_REGISTERS_GPIO_FLOAT); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear FLOAT and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear FLOAT and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO %d (shift %d) -> input\n", gpio_num, gpio_shift); /* set FLOAT */ gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint32_t mode) { uint32_t gpio_reg; /* any port swapping should be handled by caller */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO and mask except the float bits */ gpio_reg = REG_RD(sc, MISC_REG_GPIO); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_FLOAT_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_CLR_POS); gpio_reg &= ~(pins << MISC_REGISTERS_GPIO_SET_POS); switch (mode) { case MISC_REGISTERS_GPIO_OUTPUT_LOW: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output low\n", pins); /* set CLR */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_CLR_POS); break; case MISC_REGISTERS_GPIO_OUTPUT_HIGH: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> output high\n", pins); /* set SET */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_SET_POS); break; case MISC_REGISTERS_GPIO_INPUT_HI_Z: BLOGD(sc, DBG_PHY, "Set GPIO 0x%x -> input\n", pins); /* set FLOAT */ gpio_reg |= (pins << MISC_REGISTERS_GPIO_FLOAT_POS); break; default: BLOGE(sc, "Invalid GPIO mode assignment pins 0x%x mode 0x%x" " gpio_reg 0x%x\n", pins, mode, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (-1); } REG_WR(sc, MISC_REG_GPIO, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } static int bxe_gpio_int_write(struct bxe_softc *sc, int gpio_num, uint32_t mode, uint8_t port) { /* The GPIO should be swapped if swap register is set and active */ int gpio_port = ((REG_RD(sc, NIG_REG_PORT_SWAP) && REG_RD(sc, NIG_REG_STRAP_OVERRIDE)) ^ port); int gpio_shift = (gpio_num + (gpio_port ? MISC_REGISTERS_GPIO_PORT_SHIFT : 0)); uint32_t gpio_mask = (1 << gpio_shift); uint32_t gpio_reg; if (gpio_num > MISC_REGISTERS_GPIO_3) { BLOGE(sc, "Invalid GPIO %d mode 0x%x port 0x%x gpio_port %d" " gpio_shift %d gpio_mask 0x%x\n", gpio_num, mode, port, gpio_port, gpio_shift, gpio_mask); return (-1); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); /* read GPIO int */ gpio_reg = REG_RD(sc, MISC_REG_GPIO_INT); switch (mode) { case MISC_REGISTERS_GPIO_INT_OUTPUT_CLR: BLOGD(sc, DBG_PHY, "Clear GPIO INT %d (shift %d) -> output low\n", gpio_num, gpio_shift); /* clear SET and set CLR */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); break; case MISC_REGISTERS_GPIO_INT_OUTPUT_SET: BLOGD(sc, DBG_PHY, "Set GPIO INT %d (shift %d) -> output high\n", gpio_num, gpio_shift); /* clear CLR and set SET */ gpio_reg &= ~(gpio_mask << MISC_REGISTERS_GPIO_INT_CLR_POS); gpio_reg |= (gpio_mask << MISC_REGISTERS_GPIO_INT_SET_POS); break; default: break; } REG_WR(sc, MISC_REG_GPIO_INT, gpio_reg); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO); return (0); } uint32_t elink_cb_gpio_read(struct bxe_softc *sc, uint16_t gpio_num, uint8_t port) { return (bxe_gpio_read(sc, gpio_num, port)); } uint8_t elink_cb_gpio_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_write(sc, gpio_num, mode, port)); } uint8_t elink_cb_gpio_mult_write(struct bxe_softc *sc, uint8_t pins, uint8_t mode) /* 0=low 1=high */ { return (bxe_gpio_mult_write(sc, pins, mode)); } uint8_t elink_cb_gpio_int_write(struct bxe_softc *sc, uint16_t gpio_num, uint8_t mode, /* 0=low 1=high */ uint8_t port) { return (bxe_gpio_int_write(sc, gpio_num, mode, port)); } void elink_cb_notify_link_changed(struct bxe_softc *sc) { REG_WR(sc, (MISC_REG_AEU_GENERAL_ATTN_12 + (SC_FUNC(sc) * sizeof(uint32_t))), 1); } /* send the MCP a request, block until there is a reply */ uint32_t elink_cb_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t seq; uint32_t rc = 0; uint32_t cnt = 1; uint8_t delay = CHIP_REV_IS_SLOW(sc) ? 100 : 10; BXE_FWMB_LOCK(sc); seq = ++sc->fw_seq; SHMEM_WR(sc, func_mb[mb_idx].drv_mb_param, param); SHMEM_WR(sc, func_mb[mb_idx].drv_mb_header, (command | seq)); BLOGD(sc, DBG_PHY, "wrote command 0x%08x to FW MB param 0x%08x\n", (command | seq), param); /* Let the FW do it's magic. GIve it up to 5 seconds... */ do { DELAY(delay * 1000); rc = SHMEM_RD(sc, func_mb[mb_idx].fw_mb_header); } while ((seq != (rc & FW_MSG_SEQ_NUMBER_MASK)) && (cnt++ < 500)); BLOGD(sc, DBG_PHY, "[after %d ms] read 0x%x seq 0x%x from FW MB\n", cnt*delay, rc, seq); /* is this a reply to our command? */ if (seq == (rc & FW_MSG_SEQ_NUMBER_MASK)) { rc &= FW_MSG_CODE_MASK; } else { /* Ruh-roh! */ BLOGE(sc, "FW failed to respond!\n"); // XXX bxe_fw_dump(sc); rc = 0; } BXE_FWMB_UNLOCK(sc); return (rc); } static uint32_t bxe_fw_command(struct bxe_softc *sc, uint32_t command, uint32_t param) { return (elink_cb_fw_command(sc, command, param)); } static void __storm_memset_dma_mapping(struct bxe_softc *sc, uint32_t addr, bus_addr_t mapping) { REG_WR(sc, addr, U64_LO(mapping)); REG_WR(sc, (addr + 4), U64_HI(mapping)); } static void storm_memset_spq_addr(struct bxe_softc *sc, bus_addr_t mapping, uint16_t abs_fid) { uint32_t addr = (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PAGE_BASE_OFFSET(abs_fid)); __storm_memset_dma_mapping(sc, addr, mapping); } static void storm_memset_vf_to_pf(struct bxe_softc *sc, uint16_t abs_fid, uint16_t pf_id) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_VF_TO_PF_OFFSET(abs_fid)), pf_id); } static void storm_memset_func_en(struct bxe_softc *sc, uint16_t abs_fid, uint8_t enable) { REG_WR8(sc, (BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(abs_fid)), enable); REG_WR8(sc, (BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(abs_fid)), enable); } static void storm_memset_eq_data(struct bxe_softc *sc, struct event_ring_data *eq_data, uint16_t pfid) { uint32_t addr; size_t size; addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_DATA_OFFSET(pfid)); size = sizeof(struct event_ring_data); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)eq_data); } static void storm_memset_eq_prod(struct bxe_softc *sc, uint16_t eq_prod, uint16_t pfid) { uint32_t addr = (BAR_CSTRORM_INTMEM + CSTORM_EVENT_RING_PROD_OFFSET(pfid)); REG_WR16(sc, addr, eq_prod); } /* * Post a slowpath command. * * A slowpath command is used to propagate a configuration change through * the controller in a controlled manner, allowing each STORM processor and * other H/W blocks to phase in the change. The commands sent on the * slowpath are referred to as ramrods. Depending on the ramrod used the * completion of the ramrod will occur in different ways. Here's a * breakdown of ramrods and how they complete: * * RAMROD_CMD_ID_ETH_PORT_SETUP * Used to setup the leading connection on a port. Completes on the * Receive Completion Queue (RCQ) of that port (typically fp[0]). * * RAMROD_CMD_ID_ETH_CLIENT_SETUP * Used to setup an additional connection on a port. Completes on the * RCQ of the multi-queue/RSS connection being initialized. * * RAMROD_CMD_ID_ETH_STAT_QUERY * Used to force the storm processors to update the statistics database * in host memory. This ramrod is send on the leading connection CID and * completes as an index increment of the CSTORM on the default status * block. * * RAMROD_CMD_ID_ETH_UPDATE * Used to update the state of the leading connection, usually to udpate * the RSS indirection table. Completes on the RCQ of the leading * connection. (Not currently used under FreeBSD until OS support becomes * available.) * * RAMROD_CMD_ID_ETH_HALT * Used when tearing down a connection prior to driver unload. Completes * on the RCQ of the multi-queue/RSS connection being torn down. Don't * use this on the leading connection. * * RAMROD_CMD_ID_ETH_SET_MAC * Sets the Unicast/Broadcast/Multicast used by the port. Completes on * the RCQ of the leading connection. * * RAMROD_CMD_ID_ETH_CFC_DEL * Used when tearing down a conneciton prior to driver unload. Completes * on the RCQ of the leading connection (since the current connection * has been completely removed from controller memory). * * RAMROD_CMD_ID_ETH_PORT_DEL * Used to tear down the leading connection prior to driver unload, * typically fp[0]. Completes as an index increment of the CSTORM on the * default status block. * * RAMROD_CMD_ID_ETH_FORWARD_SETUP * Used for connection offload. Completes on the RCQ of the multi-queue * RSS connection that is being offloaded. (Not currently used under * FreeBSD.) * * There can only be one command pending per function. * * Returns: * 0 = Success, !0 = Failure. */ /* must be called under the spq lock */ static inline struct eth_spe *bxe_sp_get_next(struct bxe_softc *sc) { struct eth_spe *next_spe = sc->spq_prod_bd; if (sc->spq_prod_bd == sc->spq_last_bd) { /* wrap back to the first eth_spq */ sc->spq_prod_bd = sc->spq; sc->spq_prod_idx = 0; } else { sc->spq_prod_bd++; sc->spq_prod_idx++; } return (next_spe); } /* must be called under the spq lock */ static inline void bxe_sp_prod_update(struct bxe_softc *sc) { int func = SC_FUNC(sc); /* * Make sure that BD data is updated before writing the producer. * BD data is written to the memory, the producer is read from the * memory, thus we need a full memory barrier to ensure the ordering. */ mb(); REG_WR16(sc, (BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func)), sc->spq_prod_idx); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); } /** * bxe_is_contextless_ramrod - check if the current command ends on EQ * * @cmd: command to check * @cmd_type: command type */ static inline int bxe_is_contextless_ramrod(int cmd, int cmd_type) { if ((cmd_type == NONE_CONNECTION_TYPE) || (cmd == RAMROD_CMD_ID_ETH_FORWARD_SETUP) || (cmd == RAMROD_CMD_ID_ETH_CLASSIFICATION_RULES) || (cmd == RAMROD_CMD_ID_ETH_FILTER_RULES) || (cmd == RAMROD_CMD_ID_ETH_MULTICAST_RULES) || (cmd == RAMROD_CMD_ID_ETH_SET_MAC) || (cmd == RAMROD_CMD_ID_ETH_RSS_UPDATE)) { return (TRUE); } else { return (FALSE); } } /** * bxe_sp_post - place a single command on an SP ring * * @sc: driver handle * @command: command to place (e.g. SETUP, FILTER_RULES, etc.) * @cid: SW CID the command is related to * @data_hi: command private data address (high 32 bits) * @data_lo: command private data address (low 32 bits) * @cmd_type: command type (e.g. NONE, ETH) * * SP data is handled as if it's always an address pair, thus data fields are * not swapped to little endian in upper functions. Instead this function swaps * data as if it's two uint32 fields. */ int bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, uint32_t data_lo, int cmd_type) { struct eth_spe *spe; uint16_t type; int common; common = bxe_is_contextless_ramrod(command, cmd_type); BXE_SP_LOCK(sc); if (common) { if (!atomic_load_acq_long(&sc->eq_spq_left)) { BLOGE(sc, "EQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } else { if (!atomic_load_acq_long(&sc->cq_spq_left)) { BLOGE(sc, "SPQ ring is full!\n"); BXE_SP_UNLOCK(sc); return (-1); } } spe = bxe_sp_get_next(sc); /* CID needs port number to be encoded int it */ spe->hdr.conn_and_cmd_data = htole32((command << SPE_HDR_T_CMD_ID_SHIFT) | HW_CID(sc, cid)); type = (cmd_type << SPE_HDR_T_CONN_TYPE_SHIFT) & SPE_HDR_T_CONN_TYPE; /* TBD: Check if it works for VFs */ type |= ((SC_FUNC(sc) << SPE_HDR_T_FUNCTION_ID_SHIFT) & SPE_HDR_T_FUNCTION_ID); spe->hdr.type = htole16(type); spe->data.update_data_addr.hi = htole32(data_hi); spe->data.update_data_addr.lo = htole32(data_lo); /* * It's ok if the actual decrement is issued towards the memory * somewhere between the lock and unlock. Thus no more explict * memory barrier is needed. */ if (common) { atomic_subtract_acq_long(&sc->eq_spq_left, 1); } else { atomic_subtract_acq_long(&sc->cq_spq_left, 1); } BLOGD(sc, DBG_SP, "SPQE -> %#jx\n", (uintmax_t)sc->spq_dma.paddr); BLOGD(sc, DBG_SP, "FUNC_RDATA -> %p / %#jx\n", BXE_SP(sc, func_rdata), (uintmax_t)BXE_SP_MAPPING(sc, func_rdata)); BLOGD(sc, DBG_SP, "SPQE[%x] (%x:%x) (cmd, common?) (%d,%d) hw_cid %x data (%x:%x) type(0x%x) left (CQ, EQ) (%lx,%lx)\n", sc->spq_prod_idx, (uint32_t)U64_HI(sc->spq_dma.paddr), (uint32_t)(U64_LO(sc->spq_dma.paddr) + (uint8_t *)sc->spq_prod_bd - (uint8_t *)sc->spq), command, common, HW_CID(sc, cid), data_hi, data_lo, type, atomic_load_acq_long(&sc->cq_spq_left), atomic_load_acq_long(&sc->eq_spq_left)); bxe_sp_prod_update(sc); BXE_SP_UNLOCK(sc); return (0); } /** * bxe_debug_print_ind_table - prints the indirection table configuration. * * @sc: driver hanlde * @p: pointer to rss configuration */ /* * FreeBSD Device probe function. * * Compares the device found to the driver's list of supported devices and * reports back to the bsd loader whether this is the right driver for the device. * This is the driver entry function called from the "kldload" command. * * Returns: * BUS_PROBE_DEFAULT on success, positive value on failure. */ static int bxe_probe(device_t dev) { struct bxe_device_type *t; char *descbuf; uint16_t did, sdid, svid, vid; /* Find our device structure */ t = bxe_devs; /* Get the data for the device to be probed. */ vid = pci_get_vendor(dev); did = pci_get_device(dev); svid = pci_get_subvendor(dev); sdid = pci_get_subdevice(dev); /* Look through the list of known devices for a match. */ while (t->bxe_name != NULL) { if ((vid == t->bxe_vid) && (did == t->bxe_did) && ((svid == t->bxe_svid) || (t->bxe_svid == PCI_ANY_ID)) && ((sdid == t->bxe_sdid) || (t->bxe_sdid == PCI_ANY_ID))) { descbuf = malloc(BXE_DEVDESC_MAX, M_TEMP, M_NOWAIT); if (descbuf == NULL) return (ENOMEM); /* Print out the device identity. */ snprintf(descbuf, BXE_DEVDESC_MAX, "%s (%c%d) BXE v:%s\n", t->bxe_name, (((pci_read_config(dev, PCIR_REVID, 4) & 0xf0) >> 4) + 'A'), (pci_read_config(dev, PCIR_REVID, 4) & 0xf), BXE_DRIVER_VERSION); device_set_desc_copy(dev, descbuf); free(descbuf, M_TEMP); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static void bxe_init_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX snprintf(sc->core_sx_name, sizeof(sc->core_sx_name), "bxe%d_core_lock", sc->unit); sx_init(&sc->core_sx, sc->core_sx_name); #else snprintf(sc->core_mtx_name, sizeof(sc->core_mtx_name), "bxe%d_core_lock", sc->unit); mtx_init(&sc->core_mtx, sc->core_mtx_name, NULL, MTX_DEF); #endif snprintf(sc->sp_mtx_name, sizeof(sc->sp_mtx_name), "bxe%d_sp_lock", sc->unit); mtx_init(&sc->sp_mtx, sc->sp_mtx_name, NULL, MTX_DEF); snprintf(sc->dmae_mtx_name, sizeof(sc->dmae_mtx_name), "bxe%d_dmae_lock", sc->unit); mtx_init(&sc->dmae_mtx, sc->dmae_mtx_name, NULL, MTX_DEF); snprintf(sc->port.phy_mtx_name, sizeof(sc->port.phy_mtx_name), "bxe%d_phy_lock", sc->unit); mtx_init(&sc->port.phy_mtx, sc->port.phy_mtx_name, NULL, MTX_DEF); snprintf(sc->fwmb_mtx_name, sizeof(sc->fwmb_mtx_name), "bxe%d_fwmb_lock", sc->unit); mtx_init(&sc->fwmb_mtx, sc->fwmb_mtx_name, NULL, MTX_DEF); snprintf(sc->print_mtx_name, sizeof(sc->print_mtx_name), "bxe%d_print_lock", sc->unit); mtx_init(&(sc->print_mtx), sc->print_mtx_name, NULL, MTX_DEF); snprintf(sc->stats_mtx_name, sizeof(sc->stats_mtx_name), "bxe%d_stats_lock", sc->unit); mtx_init(&(sc->stats_mtx), sc->stats_mtx_name, NULL, MTX_DEF); snprintf(sc->mcast_mtx_name, sizeof(sc->mcast_mtx_name), "bxe%d_mcast_lock", sc->unit); mtx_init(&(sc->mcast_mtx), sc->mcast_mtx_name, NULL, MTX_DEF); } static void bxe_release_mutexes(struct bxe_softc *sc) { #ifdef BXE_CORE_LOCK_SX sx_destroy(&sc->core_sx); #else if (mtx_initialized(&sc->core_mtx)) { mtx_destroy(&sc->core_mtx); } #endif if (mtx_initialized(&sc->sp_mtx)) { mtx_destroy(&sc->sp_mtx); } if (mtx_initialized(&sc->dmae_mtx)) { mtx_destroy(&sc->dmae_mtx); } if (mtx_initialized(&sc->port.phy_mtx)) { mtx_destroy(&sc->port.phy_mtx); } if (mtx_initialized(&sc->fwmb_mtx)) { mtx_destroy(&sc->fwmb_mtx); } if (mtx_initialized(&sc->print_mtx)) { mtx_destroy(&sc->print_mtx); } if (mtx_initialized(&sc->stats_mtx)) { mtx_destroy(&sc->stats_mtx); } if (mtx_initialized(&sc->mcast_mtx)) { mtx_destroy(&sc->mcast_mtx); } } static void bxe_tx_disable(struct bxe_softc* sc) { if_t ifp = sc->ifp; /* tell the stack the driver is stopped and TX queue is full */ if (ifp != NULL) { if_setdrvflags(ifp, 0); } } static void bxe_drv_pulse(struct bxe_softc *sc) { SHMEM_WR(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb, sc->fw_drv_pulse_wr_seq); } static inline uint16_t bxe_tx_avail(struct bxe_softc *sc, struct bxe_fastpath *fp) { int16_t used; uint16_t prod; uint16_t cons; prod = fp->tx_bd_prod; cons = fp->tx_bd_cons; used = SUB_S16(prod, cons); return (int16_t)(sc->tx_ring_size) - used; } static inline int bxe_tx_queue_has_work(struct bxe_fastpath *fp) { uint16_t hw_cons; mb(); /* status block fields can change */ hw_cons = le16toh(*fp->tx_cons_sb); return (hw_cons != fp->tx_pkt_cons); } static inline uint8_t bxe_has_tx_work(struct bxe_fastpath *fp) { /* expand this for multi-cos if ever supported */ return (bxe_tx_queue_has_work(fp)) ? TRUE : FALSE; } static inline int bxe_has_rx_work(struct bxe_fastpath *fp) { uint16_t rx_cq_cons_sb; mb(); /* status block fields can change */ rx_cq_cons_sb = le16toh(*fp->rx_cq_cons_sb); if ((rx_cq_cons_sb & RCQ_MAX) == RCQ_MAX) rx_cq_cons_sb++; return (fp->rx_cq_cons != rx_cq_cons_sb); } static void bxe_sp_event(struct bxe_softc *sc, struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) { int cid = SW_CID(rr_cqe->ramrod_cqe.conn_and_cmd_data); int command = CQE_CMD(rr_cqe->ramrod_cqe.conn_and_cmd_data); enum ecore_queue_cmd drv_cmd = ECORE_Q_CMD_MAX; struct ecore_queue_sp_obj *q_obj = &BXE_SP_OBJ(sc, fp).q_obj; BLOGD(sc, DBG_SP, "fp=%d cid=%d got ramrod #%d state is %x type is %d\n", fp->index, cid, command, sc->state, rr_cqe->ramrod_cqe.ramrod_type); switch (command) { case (RAMROD_CMD_ID_ETH_CLIENT_UPDATE): BLOGD(sc, DBG_SP, "got UPDATE ramrod. CID %d\n", cid); drv_cmd = ECORE_Q_CMD_UPDATE; break; case (RAMROD_CMD_ID_ETH_CLIENT_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP; break; case (RAMROD_CMD_ID_ETH_TX_QUEUE_SETUP): BLOGD(sc, DBG_SP, "got MULTI[%d] tx-only setup ramrod\n", cid); drv_cmd = ECORE_Q_CMD_SETUP_TX_ONLY; break; case (RAMROD_CMD_ID_ETH_HALT): BLOGD(sc, DBG_SP, "got MULTI[%d] halt ramrod\n", cid); drv_cmd = ECORE_Q_CMD_HALT; break; case (RAMROD_CMD_ID_ETH_TERMINATE): BLOGD(sc, DBG_SP, "got MULTI[%d] teminate ramrod\n", cid); drv_cmd = ECORE_Q_CMD_TERMINATE; break; case (RAMROD_CMD_ID_ETH_EMPTY): BLOGD(sc, DBG_SP, "got MULTI[%d] empty ramrod\n", cid); drv_cmd = ECORE_Q_CMD_EMPTY; break; default: BLOGD(sc, DBG_SP, "ERROR: unexpected MC reply (%d) on fp[%d]\n", command, fp->index); return; } if ((drv_cmd != ECORE_Q_CMD_MAX) && q_obj->complete_cmd(sc, q_obj, drv_cmd)) { /* * q_obj->complete_cmd() failure means that this was * an unexpected completion. * * In this case we don't want to increase the sc->spq_left * because apparently we haven't sent this command the first * place. */ // bxe_panic(sc, ("Unexpected SP completion\n")); return; } atomic_add_acq_long(&sc->cq_spq_left, 1); BLOGD(sc, DBG_SP, "sc->cq_spq_left 0x%lx\n", atomic_load_acq_long(&sc->cq_spq_left)); } /* * The current mbuf is part of an aggregation. Move the mbuf into the TPA * aggregation queue, put an empty mbuf back onto the receive chain, and mark * the current aggregation queue as in-progress. */ static void bxe_tpa_start(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t queue, uint16_t cons, uint16_t prod, struct eth_fast_path_rx_cqe *cqe) { struct bxe_sw_rx_bd tmp_bd; struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; int max_agg_queues; struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; uint16_t index; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA START " "cons=%d prod=%d\n", fp->index, queue, cons, prod); max_agg_queues = MAX_AGG_QS(sc); KASSERT((queue < max_agg_queues), ("fp[%02d] invalid aggr queue (%d >= %d)!", fp->index, queue, max_agg_queues)); KASSERT((tpa_info->state == BXE_TPA_STATE_STOP), ("fp[%02d].tpa[%02d] starting aggr on queue not stopped!", fp->index, queue)); /* copy the existing mbuf and mapping from the TPA pool */ tmp_bd = tpa_info->bd; if (tmp_bd.m == NULL) { uint32_t *tmp; tmp = (uint32_t *)cqe; BLOGE(sc, "fp[%02d].tpa[%02d] cons[%d] prod[%d]mbuf not allocated!\n", fp->index, queue, cons, prod); BLOGE(sc, "cqe [0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x]\n", *tmp, *(tmp+1), *(tmp+2), *(tmp+3), *(tmp+4), *(tmp+5), *(tmp+6), *(tmp+7)); /* XXX Error handling? */ return; } /* change the TPA queue to the start state */ tpa_info->state = BXE_TPA_STATE_START; tpa_info->placement_offset = cqe->placement_offset; tpa_info->parsing_flags = le16toh(cqe->pars_flags.flags); tpa_info->vlan_tag = le16toh(cqe->vlan_tag); tpa_info->len_on_bd = le16toh(cqe->len_on_bd); fp->rx_tpa_queue_used |= (1 << queue); /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ index = (sc->max_rx_bufs != RX_BD_USABLE) ? prod : cons; /* move the received mbuf and mapping to TPA pool */ tpa_info->bd = fp->rx_mbuf_chain[cons]; /* release any existing RX BD mbuf mappings */ if (cons != index) { rx_buf = &fp->rx_mbuf_chain[cons]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We get here when the maximum number of rx buffers is less than * RX_BD_USABLE. The mbuf is already saved above so it's OK to NULL * it out here without concern of a memory leak. */ fp->rx_mbuf_chain[cons].m = NULL; } /* update the Rx SW BD with the mbuf info from the TPA pool */ fp->rx_mbuf_chain[index] = tmp_bd; /* update the Rx BD with the empty mbuf phys address from the TPA pool */ rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(tpa_info->seg.ds_addr)); rx_bd->addr_lo = htole32(U64_LO(tpa_info->seg.ds_addr)); } /* * When a TPA aggregation is completed, loop through the individual mbufs * of the aggregation, combining them into a single mbuf which will be sent * up the stack. Refill all freed SGEs with mbufs as we go along. */ static int bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct mbuf *m, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { struct mbuf *m_frag; uint32_t frag_len, frag_size, i; uint16_t sge_idx; int rc = 0; int j; frag_size = le16toh(cqe->pkt_len) - tpa_info->len_on_bd; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill len_on_bd=%d frag_size=%d pages=%d\n", fp->index, queue, tpa_info->len_on_bd, frag_size, pages); /* make sure the aggregated frame is not too big to handle */ if (pages > 8 * PAGES_PER_SGE) { uint32_t *tmp = (uint32_t *)cqe; BLOGE(sc, "fp[%02d].sge[0x%04x] has too many pages (%d)! " "pkt_len=%d len_on_bd=%d frag_size=%d\n", fp->index, cqe_idx, pages, le16toh(cqe->pkt_len), tpa_info->len_on_bd, frag_size); BLOGE(sc, "cqe [0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x]\n", *tmp, *(tmp+1), *(tmp+2), *(tmp+3), *(tmp+4), *(tmp+5), *(tmp+6), *(tmp+7)); bxe_panic(sc, ("sge page count error\n")); return (EINVAL); } /* * Scan through the scatter gather list pulling individual mbufs into a * single mbuf for the host stack. */ for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) { sge_idx = RX_SGE(le16toh(cqe->sgl_or_raw_data.sgl[j])); /* * Firmware gives the indices of the SGE as if the ring is an array * (meaning that the "next" element will consume 2 indices). */ frag_len = min(frag_size, (uint32_t)(SGE_PAGES)); BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill i=%d j=%d " "sge_idx=%d frag_size=%d frag_len=%d\n", fp->index, queue, i, j, sge_idx, frag_size, frag_len); m_frag = fp->rx_sge_mbuf_chain[sge_idx].m; /* allocate a new mbuf for the SGE */ rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); if (rc) { /* Leave all remaining SGEs in the ring! */ return (rc); } /* update the fragment length */ m_frag->m_len = frag_len; /* concatenate the fragment to the head mbuf */ m_cat(m, m_frag); fp->eth_q_stats.mbuf_alloc_sge--; /* update the TPA mbuf size and remaining fragment size */ m->m_pkthdr.len += frag_len; frag_size -= frag_len; } BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA fill done frag_size=%d\n", fp->index, queue, frag_size); return (rc); } static inline void bxe_clear_sge_mask_next_elems(struct bxe_fastpath *fp) { int i, j; for (i = 1; i <= RX_SGE_NUM_PAGES; i++) { int idx = RX_SGE_TOTAL_PER_PAGE * i - 1; for (j = 0; j < 2; j++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, idx); idx--; } } } static inline void bxe_init_sge_ring_bit_mask(struct bxe_fastpath *fp) { /* set the mask to all 1's, it's faster to compare to 0 than to 0xf's */ memset(fp->sge_mask, 0xff, sizeof(fp->sge_mask)); /* * Clear the two last indices in the page to 1. These are the indices that * correspond to the "next" element, hence will never be indicated and * should be removed from the calculations. */ bxe_clear_sge_mask_next_elems(fp); } static inline void bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t idx) { uint16_t last_max = fp->last_max_sge; if (SUB_S16(idx, last_max) > 0) { fp->last_max_sge = idx; } } static inline void bxe_update_sge_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t sge_len, union eth_sgl_or_raw_data *cqe) { uint16_t last_max, last_elem, first_elem; uint16_t delta = 0; uint16_t i; if (!sge_len) { return; } /* first mark all used pages */ for (i = 0; i < sge_len; i++) { BIT_VEC64_CLEAR_BIT(fp->sge_mask, RX_SGE(le16toh(cqe->sgl[i]))); } BLOGD(sc, DBG_LRO, "fp[%02d] fp_cqe->sgl[%d] = %d\n", fp->index, sge_len - 1, le16toh(cqe->sgl[sge_len - 1])); /* assume that the last SGE index is the biggest */ bxe_update_last_max_sge(fp, le16toh(cqe->sgl[sge_len - 1])); last_max = RX_SGE(fp->last_max_sge); last_elem = last_max >> BIT_VEC64_ELEM_SHIFT; first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT; /* if ring is not full */ if (last_elem + 1 != first_elem) { last_elem++; } /* now update the prod */ for (i = first_elem; i != last_elem; i = RX_SGE_NEXT_MASK_ELEM(i)) { if (__predict_true(fp->sge_mask[i])) { break; } fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK; delta += BIT_VEC64_ELEM_SZ; } if (delta > 0) { fp->rx_sge_prod += delta; /* clear page-end entries */ bxe_clear_sge_mask_next_elems(fp); } BLOGD(sc, DBG_LRO, "fp[%02d] fp->last_max_sge=%d fp->rx_sge_prod=%d\n", fp->index, fp->last_max_sge, fp->rx_sge_prod); } /* * The aggregation on the current TPA queue has completed. Pull the individual * mbuf fragments together into a single mbuf, perform all necessary checksum * calculations, and send the resuting mbuf to the stack. */ static void bxe_tpa_stop(struct bxe_softc *sc, struct bxe_fastpath *fp, struct bxe_sw_tpa_info *tpa_info, uint16_t queue, uint16_t pages, struct eth_end_agg_rx_cqe *cqe, uint16_t cqe_idx) { if_t ifp = sc->ifp; struct mbuf *m; int rc = 0; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] pad=%d pkt_len=%d pages=%d vlan=%d\n", fp->index, queue, tpa_info->placement_offset, le16toh(cqe->pkt_len), pages, tpa_info->vlan_tag); m = tpa_info->bd.m; /* allocate a replacement before modifying existing mbuf */ rc = bxe_alloc_rx_tpa_mbuf(fp, queue); if (rc) { /* drop the frame and log an error */ fp->eth_q_stats.rx_soft_errors++; goto bxe_tpa_stop_exit; } /* we have a replacement, fixup the current mbuf */ m_adj(m, tpa_info->placement_offset); m->m_pkthdr.len = m->m_len = tpa_info->len_on_bd; /* mark the checksums valid (taken care of by the firmware) */ fp->eth_q_stats.rx_ofld_frames_csum_ip++; fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xffff; m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); /* aggregate all of the SGEs into a single mbuf */ rc = bxe_fill_frag_mbuf(sc, fp, tpa_info, queue, pages, m, cqe, cqe_idx); if (rc) { /* drop the packet and log an error */ fp->eth_q_stats.rx_soft_errors++; m_freem(m); } else { if (tpa_info->parsing_flags & PARSING_FLAGS_INNER_VLAN_EXIST) { m->m_pkthdr.ether_vtag = tpa_info->vlan_tag; m->m_flags |= M_VLANTAG; } /* assign packet to this interface interface */ if_setrcvif(m, ifp); #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; BXE_SET_FLOWID(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); fp->eth_q_stats.rx_tpa_pkts++; /* pass the frame to the stack */ if_input(ifp, m); } /* we passed an mbuf up the stack or dropped the frame */ fp->eth_q_stats.mbuf_alloc_tpa--; bxe_tpa_stop_exit: fp->rx_tpa_info[queue].state = BXE_TPA_STATE_STOP; fp->rx_tpa_queue_used &= ~(1 << queue); } static uint8_t bxe_service_rxsgl( struct bxe_fastpath *fp, uint16_t len, uint16_t lenonbd, struct mbuf *m, struct eth_fast_path_rx_cqe *cqe_fp) { struct mbuf *m_frag; uint16_t frags, frag_len; uint16_t sge_idx = 0; uint16_t j; uint8_t i, rc = 0; uint32_t frag_size; /* adjust the mbuf */ m->m_len = lenonbd; frag_size = len - lenonbd; frags = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; for (i = 0, j = 0; i < frags; i += PAGES_PER_SGE, j++) { sge_idx = RX_SGE(le16toh(cqe_fp->sgl_or_raw_data.sgl[j])); m_frag = fp->rx_sge_mbuf_chain[sge_idx].m; frag_len = min(frag_size, (uint32_t)(SGE_PAGE_SIZE)); m_frag->m_len = frag_len; /* allocate a new mbuf for the SGE */ rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); if (rc) { /* Leave all remaining SGEs in the ring! */ return (rc); } fp->eth_q_stats.mbuf_alloc_sge--; /* concatenate the fragment to the head mbuf */ m_cat(m, m_frag); frag_size -= frag_len; } bxe_update_sge_prod(fp->sc, fp, frags, &cqe_fp->sgl_or_raw_data); return rc; } static uint8_t bxe_rxeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { if_t ifp = sc->ifp; uint16_t bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod; int rx_pkts = 0; int rc = 0; BXE_FP_RX_LOCK(fp); /* CQ "next element" is of the size of the regular element */ hw_cq_cons = le16toh(*fp->rx_cq_cons_sb); if ((hw_cq_cons & RCQ_USABLE_PER_PAGE) == RCQ_USABLE_PER_PAGE) { hw_cq_cons++; } bd_cons = fp->rx_bd_cons; bd_prod = fp->rx_bd_prod; bd_prod_fw = bd_prod; sw_cq_cons = fp->rx_cq_cons; sw_cq_prod = fp->rx_cq_prod; /* * Memory barrier necessary as speculative reads of the rx * buffer can be ahead of the index in the status block */ rmb(); BLOGD(sc, DBG_RX, "fp[%02d] Rx START hw_cq_cons=%u sw_cq_cons=%u\n", fp->index, hw_cq_cons, sw_cq_cons); while (sw_cq_cons != hw_cq_cons) { struct bxe_sw_rx_bd *rx_buf = NULL; union eth_rx_cqe *cqe; struct eth_fast_path_rx_cqe *cqe_fp; uint8_t cqe_fp_flags; enum eth_rx_cqe_type cqe_fp_type; uint16_t len, lenonbd, pad; struct mbuf *m = NULL; comp_ring_cons = RCQ(sw_cq_cons); bd_prod = RX_BD(bd_prod); bd_cons = RX_BD(bd_cons); cqe = &fp->rcq_chain[comp_ring_cons]; cqe_fp = &cqe->fast_path_cqe; cqe_fp_flags = cqe_fp->type_error_flags; cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE; BLOGD(sc, DBG_RX, "fp[%02d] Rx hw_cq_cons=%d hw_sw_cons=%d " "BD prod=%d cons=%d CQE type=0x%x err=0x%x " "status=0x%x rss_hash=0x%x vlan=0x%x len=%u lenonbd=%u\n", fp->index, hw_cq_cons, sw_cq_cons, bd_prod, bd_cons, CQE_TYPE(cqe_fp_flags), cqe_fp_flags, cqe_fp->status_flags, le32toh(cqe_fp->rss_hash_result), le16toh(cqe_fp->vlan_tag), le16toh(cqe_fp->pkt_len_or_gro_seg_len), le16toh(cqe_fp->len_on_bd)); /* is this a slowpath msg? */ if (__predict_false(CQE_TYPE_SLOW(cqe_fp_type))) { bxe_sp_event(sc, fp, cqe); goto next_cqe; } rx_buf = &fp->rx_mbuf_chain[bd_cons]; if (!CQE_TYPE_FAST(cqe_fp_type)) { struct bxe_sw_tpa_info *tpa_info; uint16_t frag_size, pages; uint8_t queue; if (CQE_TYPE_START(cqe_fp_type)) { bxe_tpa_start(sc, fp, cqe_fp->queue_index, bd_cons, bd_prod, cqe_fp); m = NULL; /* packet not ready yet */ goto next_rx; } KASSERT(CQE_TYPE_STOP(cqe_fp_type), ("CQE type is not STOP! (0x%x)\n", cqe_fp_type)); queue = cqe->end_agg_cqe.queue_index; tpa_info = &fp->rx_tpa_info[queue]; BLOGD(sc, DBG_LRO, "fp[%02d].tpa[%02d] TPA STOP\n", fp->index, queue); frag_size = (le16toh(cqe->end_agg_cqe.pkt_len) - tpa_info->len_on_bd); pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; bxe_tpa_stop(sc, fp, tpa_info, queue, pages, &cqe->end_agg_cqe, comp_ring_cons); bxe_update_sge_prod(sc, fp, pages, &cqe->end_agg_cqe.sgl_or_raw_data); goto next_cqe; } /* non TPA */ /* is this an error packet? */ if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG)) { BLOGE(sc, "flags 0x%x rx packet %u\n", cqe_fp_flags, sw_cq_cons); fp->eth_q_stats.rx_soft_errors++; goto next_rx; } len = le16toh(cqe_fp->pkt_len_or_gro_seg_len); lenonbd = le16toh(cqe_fp->len_on_bd); pad = cqe_fp->placement_offset; m = rx_buf->m; if (__predict_false(m == NULL)) { BLOGE(sc, "No mbuf in rx chain descriptor %d for fp[%02d]\n", bd_cons, fp->index); goto next_rx; } /* XXX double copy if packet length under a threshold */ /* * If all the buffer descriptors are filled with mbufs then fill in * the current consumer index with a new BD. Else if a maximum Rx * buffer limit is imposed then fill in the next producer index. */ rc = bxe_alloc_rx_bd_mbuf(fp, bd_cons, (sc->max_rx_bufs != RX_BD_USABLE) ? bd_prod : bd_cons); if (rc != 0) { /* we simply reuse the received mbuf and don't post it to the stack */ m = NULL; BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", fp->index, rc); fp->eth_q_stats.rx_soft_errors++; if (sc->max_rx_bufs != RX_BD_USABLE) { /* copy this consumer index to the producer index */ memcpy(&fp->rx_mbuf_chain[bd_prod], rx_buf, sizeof(struct bxe_sw_rx_bd)); memset(rx_buf, 0, sizeof(struct bxe_sw_rx_bd)); } goto next_rx; } /* current mbuf was detached from the bd */ fp->eth_q_stats.mbuf_alloc_rx--; /* we allocated a replacement mbuf, fixup the current one */ m_adj(m, pad); m->m_pkthdr.len = m->m_len = len; if ((len > 60) && (len > lenonbd)) { fp->eth_q_stats.rx_bxe_service_rxsgl++; rc = bxe_service_rxsgl(fp, len, lenonbd, m, cqe_fp); if (rc) break; fp->eth_q_stats.rx_jumbo_sge_pkts++; } else if (lenonbd < len) { fp->eth_q_stats.rx_erroneous_jumbo_sge_pkts++; } /* assign packet to this interface interface */ if_setrcvif(m, ifp); /* assume no hardware checksum has complated */ m->m_pkthdr.csum_flags = 0; /* validate checksum if offload enabled */ if (if_getcapenable(ifp) & IFCAP_RXCSUM) { /* check for a valid IP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG)) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_ip++; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } } /* check for a valid TCP/UDP frame */ if (!(cqe->fast_path_cqe.status_flags & ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) { if (__predict_false(cqe_fp_flags & ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) { fp->eth_q_stats.rx_hw_csum_errors++; } else { fp->eth_q_stats.rx_ofld_frames_csum_tcp_udp++; m->m_pkthdr.csum_data = 0xFFFF; m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } } } /* if there is a VLAN tag then flag that info */ if (cqe->fast_path_cqe.pars_flags.flags & PARSING_FLAGS_INNER_VLAN_EXIST) { m->m_pkthdr.ether_vtag = cqe->fast_path_cqe.vlan_tag; m->m_flags |= M_VLANTAG; } #if __FreeBSD_version >= 800000 /* specify what RSS queue was used for this flow */ m->m_pkthdr.flowid = fp->index; BXE_SET_FLOWID(m); #endif next_rx: bd_cons = RX_BD_NEXT(bd_cons); bd_prod = RX_BD_NEXT(bd_prod); bd_prod_fw = RX_BD_NEXT(bd_prod_fw); /* pass the frame to the stack */ if (__predict_true(m != NULL)) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rx_pkts++; if_input(ifp, m); } next_cqe: sw_cq_prod = RCQ_NEXT(sw_cq_prod); sw_cq_cons = RCQ_NEXT(sw_cq_cons); /* limit spinning on the queue */ if (rc != 0) break; if (rx_pkts == sc->rx_budget) { fp->eth_q_stats.rx_budget_reached++; break; } } /* while work to do */ fp->rx_bd_cons = bd_cons; fp->rx_bd_prod = bd_prod_fw; fp->rx_cq_cons = sw_cq_cons; fp->rx_cq_prod = sw_cq_prod; /* Update producers */ bxe_update_rx_prod(sc, fp, bd_prod_fw, sw_cq_prod, fp->rx_sge_prod); fp->eth_q_stats.rx_pkts += rx_pkts; fp->eth_q_stats.rx_calls++; BXE_FP_RX_UNLOCK(fp); return (sw_cq_cons != hw_cq_cons); } static uint16_t bxe_free_tx_pkt(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t idx) { struct bxe_sw_tx_bd *tx_buf = &fp->tx_mbuf_chain[idx]; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_idx = TX_BD(tx_buf->first_bd); uint16_t new_cons; int nbd; /* unmap the mbuf from non-paged memory */ bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); tx_start_bd = &fp->tx_chain[bd_idx].start_bd; nbd = le16toh(tx_start_bd->nbd) - 1; new_cons = (tx_buf->first_bd + nbd); /* free the mbuf */ if (__predict_true(tx_buf->m != NULL)) { m_freem(tx_buf->m); fp->eth_q_stats.mbuf_alloc_tx--; } else { fp->eth_q_stats.tx_chain_lost_mbuf++; } tx_buf->m = NULL; tx_buf->first_bd = 0; return (new_cons); } /* transmit timeout watchdog */ static int bxe_watchdog(struct bxe_softc *sc, struct bxe_fastpath *fp) { BXE_FP_TX_LOCK(fp); if ((fp->watchdog_timer == 0) || (--fp->watchdog_timer)) { BXE_FP_TX_UNLOCK(fp); return (0); } BLOGE(sc, "TX watchdog timeout on fp[%02d], resetting!\n", fp->index); if(sc->trigger_grcdump) { /* taking grcdump */ bxe_grc_dump(sc); } BXE_FP_TX_UNLOCK(fp); atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_REINIT); taskqueue_enqueue(sc->chip_tq, &sc->chip_tq_task); return (-1); } /* processes transmit completions */ static uint8_t bxe_txeof(struct bxe_softc *sc, struct bxe_fastpath *fp) { if_t ifp = sc->ifp; uint16_t bd_cons, hw_cons, sw_cons, pkt_cons; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); bd_cons = fp->tx_bd_cons; hw_cons = le16toh(*fp->tx_cons_sb); sw_cons = fp->tx_pkt_cons; while (sw_cons != hw_cons) { pkt_cons = TX_BD(sw_cons); BLOGD(sc, DBG_TX, "TX: fp[%d]: hw_cons=%u sw_cons=%u pkt_cons=%u\n", fp->index, hw_cons, sw_cons, pkt_cons); bd_cons = bxe_free_tx_pkt(sc, fp, pkt_cons); sw_cons++; } fp->tx_pkt_cons = sw_cons; fp->tx_bd_cons = bd_cons; BLOGD(sc, DBG_TX, "TX done: fp[%d]: hw_cons=%u sw_cons=%u sw_prod=%u\n", fp->index, hw_cons, fp->tx_pkt_cons, fp->tx_pkt_prod); mb(); tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); } else { if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); } if (fp->tx_pkt_prod != fp->tx_pkt_cons) { /* reset the watchdog timer if there are pending transmits */ fp->watchdog_timer = BXE_TX_TIMEOUT; return (TRUE); } else { /* clear watchdog when there are no pending transmits */ fp->watchdog_timer = 0; return (FALSE); } } static void bxe_drain_tx_queues(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, count; /* wait until all TX fastpath tasks have completed */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; count = 1000; while (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); if (count == 0) { BLOGE(sc, "Timeout waiting for fp[%d] " "transmits to complete!\n", i); bxe_panic(sc, ("tx drain failure\n")); return; } count--; DELAY(1000); rmb(); } } return; } static int bxe_del_all_macs(struct bxe_softc *sc, struct ecore_vlan_mac_obj *mac_obj, int mac_type, uint8_t wait_for_comp) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; int rc; /* wait for completion of requested */ if (wait_for_comp) { bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); } /* Set the mac type of addresses we want to clear */ bxe_set_bit(mac_type, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to delete MACs (%d) mac_type %d wait_for_comp 0x%x\n", rc, mac_type, wait_for_comp); } return (rc); } static int bxe_fill_accept_flags(struct bxe_softc *sc, uint32_t rx_mode, unsigned long *rx_accept_flags, unsigned long *tx_accept_flags) { /* Clear the flags first */ *rx_accept_flags = 0; *tx_accept_flags = 0; switch (rx_mode) { case BXE_RX_MODE_NONE: /* * 'drop all' supersedes any accept flags that may have been * passed to the function. */ break; case BXE_RX_MODE_NORMAL: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_ALLMULTI: bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); break; case BXE_RX_MODE_PROMISC: /* * According to deffinition of SI mode, iface in promisc mode * should receive matched and unmatched (in resolution of port) * unicast packets. */ bxe_set_bit(ECORE_ACCEPT_UNMATCHED, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_UNICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, rx_accept_flags); /* internal switching mode */ bxe_set_bit(ECORE_ACCEPT_ALL_MULTICAST, tx_accept_flags); bxe_set_bit(ECORE_ACCEPT_BROADCAST, tx_accept_flags); if (IS_MF_SI(sc)) { bxe_set_bit(ECORE_ACCEPT_ALL_UNICAST, tx_accept_flags); } else { bxe_set_bit(ECORE_ACCEPT_UNICAST, tx_accept_flags); } break; default: BLOGE(sc, "Unknown rx_mode (0x%x)\n", rx_mode); return (-1); } /* Set ACCEPT_ANY_VLAN as we do not enable filtering by VLAN */ if (rx_mode != BXE_RX_MODE_NONE) { bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, rx_accept_flags); bxe_set_bit(ECORE_ACCEPT_ANY_VLAN, tx_accept_flags); } return (0); } static int bxe_set_q_rx_mode(struct bxe_softc *sc, uint8_t cl_id, unsigned long rx_mode_flags, unsigned long rx_accept_flags, unsigned long tx_accept_flags, unsigned long ramrod_flags) { struct ecore_rx_mode_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* Prepare ramrod parameters */ ramrod_param.cid = 0; ramrod_param.cl_id = cl_id; ramrod_param.rx_mode_obj = &sc->rx_mode_obj; ramrod_param.func_id = SC_FUNC(sc); ramrod_param.pstate = &sc->sp_state; ramrod_param.state = ECORE_FILTER_RX_MODE_PENDING; ramrod_param.rdata = BXE_SP(sc, rx_mode_rdata); ramrod_param.rdata_mapping = BXE_SP_MAPPING(sc, rx_mode_rdata); bxe_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); ramrod_param.ramrod_flags = ramrod_flags; ramrod_param.rx_mode_flags = rx_mode_flags; ramrod_param.rx_accept_flags = rx_accept_flags; ramrod_param.tx_accept_flags = tx_accept_flags; rc = ecore_config_rx_mode(sc, &ramrod_param); if (rc < 0) { BLOGE(sc, "Set rx_mode %d cli_id 0x%x rx_mode_flags 0x%x " "rx_accept_flags 0x%x tx_accept_flags 0x%x " "ramrod_flags 0x%x rc %d failed\n", sc->rx_mode, cl_id, (uint32_t)rx_mode_flags, (uint32_t)rx_accept_flags, (uint32_t)tx_accept_flags, (uint32_t)ramrod_flags, rc); return (rc); } return (0); } static int bxe_set_storm_rx_mode(struct bxe_softc *sc) { unsigned long rx_mode_flags = 0, ramrod_flags = 0; unsigned long rx_accept_flags = 0, tx_accept_flags = 0; int rc; rc = bxe_fill_accept_flags(sc, sc->rx_mode, &rx_accept_flags, &tx_accept_flags); if (rc) { return (rc); } bxe_set_bit(RAMROD_RX, &ramrod_flags); bxe_set_bit(RAMROD_TX, &ramrod_flags); /* XXX ensure all fastpath have same cl_id and/or move it to bxe_softc */ return (bxe_set_q_rx_mode(sc, sc->fp[0].cl_id, rx_mode_flags, rx_accept_flags, tx_accept_flags, ramrod_flags)); } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_load_no_mcp(struct bxe_softc *sc) { int path = SC_PATH(sc); int port = SC_PORT(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]++; load_count[path][1 + port]++; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 1) { return (FW_MSG_CODE_DRV_LOAD_COMMON); } else if (load_count[path][1 + port] == 1) { return (FW_MSG_CODE_DRV_LOAD_PORT); } else { return (FW_MSG_CODE_DRV_LOAD_FUNCTION); } } /* returns the "mcp load_code" according to global load_count array */ static int bxe_nic_unload_no_mcp(struct bxe_softc *sc) { int port = SC_PORT(sc); int path = SC_PATH(sc); BLOGI(sc, "NO MCP - load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); load_count[path][0]--; load_count[path][1 + port]--; BLOGI(sc, "NO MCP - new load counts[%d] %d, %d, %d\n", path, load_count[path][0], load_count[path][1], load_count[path][2]); if (load_count[path][0] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_COMMON); } else if (load_count[path][1 + port] == 0) { return (FW_MSG_CODE_DRV_UNLOAD_PORT); } else { return (FW_MSG_CODE_DRV_UNLOAD_FUNCTION); } } /* request unload mode from the MCP: COMMON, PORT or FUNCTION */ static uint32_t bxe_send_unload_req(struct bxe_softc *sc, int unload_mode) { uint32_t reset_code = 0; /* Select the UNLOAD request mode */ if (unload_mode == UNLOAD_NORMAL) { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } else { reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } /* Send the request to the MCP */ if (!BXE_NOMCP(sc)) { reset_code = bxe_fw_command(sc, reset_code, 0); } else { reset_code = bxe_nic_unload_no_mcp(sc); } return (reset_code); } /* send UNLOAD_DONE command to the MCP */ static void bxe_send_unload_done(struct bxe_softc *sc, uint8_t keep_link) { uint32_t reset_param = keep_link ? DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET : 0; /* Report UNLOAD_DONE to MCP */ if (!BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, reset_param); } } static int bxe_func_wait_started(struct bxe_softc *sc) { int tout = 50; if (!sc->port.pmf) { return (0); } /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction * 1. Sync IRS for default SB * 2. Sync SP queue - this guarantees us that attention handling started * 3. Wait, that TX disable/enable transaction completes * * 1+2 guarantee that if DCBX attention was scheduled it already changed * pending bit of transaction from STARTED-->TX_STOPPED, if we already * received completion for the transaction the state is TX_STOPPED. * State will return to STARTED after completion of TX_STOPPED-->STARTED * transaction. */ /* XXX make sure default SB ISR is done */ /* need a way to synchronize an irq (intr_mtx?) */ /* XXX flush any work queues */ while (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED && tout--) { DELAY(20000); } if (ecore_func_get_state(sc, &sc->func_obj) != ECORE_F_STATE_STARTED) { /* * Failed to complete the transaction in a "good way" * Force both transactions with CLR bit. */ struct ecore_func_state_params func_params = { NULL }; BLOGE(sc, "Unexpected function state! " "Forcing STARTED-->TX_STOPPED-->STARTED\n"); func_params.f_obj = &sc->func_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); /* STARTED-->TX_STOPPED */ func_params.cmd = ECORE_F_CMD_TX_STOP; ecore_func_state_change(sc, &func_params); /* TX_STOPPED-->STARTED */ func_params.cmd = ECORE_F_CMD_TX_START; return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_stop_queue(struct bxe_softc *sc, int index) { struct bxe_fastpath *fp = &sc->fp[index]; struct ecore_queue_state_params q_params = { NULL }; int rc; BLOGD(sc, DBG_LOAD, "stopping queue %d cid %d\n", index, fp->index); q_params.q_obj = &sc->sp_objs[fp->index].q_obj; /* We want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* Stop the primary connection: */ /* ...halt the connection */ q_params.cmd = ECORE_Q_CMD_HALT; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...terminate the connection */ q_params.cmd = ECORE_Q_CMD_TERMINATE; memset(&q_params.params.terminate, 0, sizeof(q_params.params.terminate)); q_params.params.terminate.cid_index = FIRST_TX_COS_INDEX; rc = ecore_queue_state_change(sc, &q_params); if (rc) { return (rc); } /* ...delete cfc entry */ q_params.cmd = ECORE_Q_CMD_CFC_DEL; memset(&q_params.params.cfc_del, 0, sizeof(q_params.params.cfc_del)); q_params.params.cfc_del.cid_index = FIRST_TX_COS_INDEX; return (ecore_queue_state_change(sc, &q_params)); } /* wait for the outstanding SP commands */ static inline uint8_t bxe_wait_sp_comp(struct bxe_softc *sc, unsigned long mask) { unsigned long tmp; int tout = 5000; /* wait for 5 secs tops */ while (tout--) { mb(); if (!(atomic_load_acq_long(&sc->sp_state) & mask)) { return (TRUE); } DELAY(1000); } mb(); tmp = atomic_load_acq_long(&sc->sp_state); if (tmp & mask) { BLOGE(sc, "Filtering completion timed out: " "sp_state 0x%lx, mask 0x%lx\n", tmp, mask); return (FALSE); } return (FALSE); } static int bxe_func_stop(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_STOP; /* * Try to stop the function the 'good way'. If it fails (in case * of a parity error during bxe_chip_cleanup()) and we are * not in a debug mode, perform a state transaction in order to * enable further HW_RESET transaction. */ rc = ecore_func_state_change(sc, &func_params); if (rc) { BLOGE(sc, "FUNC_STOP ramrod failed. " "Running a dry transaction (%d)\n", rc); bxe_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags); return (ecore_func_state_change(sc, &func_params)); } return (0); } static int bxe_reset_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; /* Prepare parameters for function state transitions */ bxe_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_RESET; func_params.params.hw_init.load_phase = load_code; return (ecore_func_state_change(sc, &func_params)); } static void bxe_int_disable_sync(struct bxe_softc *sc, int disable_hw) { if (disable_hw) { /* prevent the HW from sending interrupts */ bxe_int_disable(sc); } /* XXX need a way to synchronize ALL irqs (intr_mtx?) */ /* make sure all ISRs are done */ /* XXX make sure sp_task is not running */ /* cancel and flush work queues */ } static void bxe_chip_cleanup(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { int port = SC_PORT(sc); struct ecore_mcast_ramrod_params rparam = { NULL }; uint32_t reset_code; int i, rc = 0; bxe_drain_tx_queues(sc); /* give HW time to discard old tx messages */ DELAY(1000); /* Clean all ETH MACs */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_ETH_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to delete all ETH MACs (%d)\n", rc); } /* Clean up UC list */ rc = bxe_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_UC_LIST_MAC, TRUE); if (rc < 0) { BLOGE(sc, "Failed to delete UC MACs list (%d)\n", rc); } /* Disable LLH */ if (!CHIP_IS_E1(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } /* Set "drop all" to stop Rx */ /* * We need to take the BXE_MCAST_LOCK() here in order to prevent * a race between the completion code and this code. */ BXE_MCAST_LOCK(sc); if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); } else { bxe_set_storm_rx_mode(sc); } /* Clean up multicast configuration */ rparam.mcast_obj = &sc->mcast_obj; rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } BXE_MCAST_UNLOCK(sc); // XXX bxe_iov_chip_cleanup(sc); /* * Send the UNLOAD_REQUEST to the MCP. This will return if * this function should perform FUNCTION, PORT, or COMMON HW * reset. */ reset_code = bxe_send_unload_req(sc, unload_mode); /* * (assumption: No Attention from MCP at this stage) * PMF probably in the middle of TX disable/enable transaction */ rc = bxe_func_wait_started(sc); if (rc) { BLOGE(sc, "bxe_func_wait_started failed (%d)\n", rc); } /* * Close multi and leading connections * Completions for ramrods are collected in a synchronous way */ for (i = 0; i < sc->num_queues; i++) { if (bxe_stop_queue(sc, i)) { goto unload_error; } } /* * If SP settings didn't get completed so far - something * very wrong has happen. */ if (!bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Common slow path ramrods got stuck!(%d)\n", rc); } unload_error: rc = bxe_func_stop(sc); if (rc) { BLOGE(sc, "Function stop failed!(%d)\n", rc); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Reset the chip */ rc = bxe_reset_hw(sc, reset_code); if (rc) { BLOGE(sc, "Hardware reset failed(%d)\n", rc); } /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, keep_link); } static void bxe_disable_close_the_gate(struct bxe_softc *sc) { uint32_t val; int port = SC_PORT(sc); BLOGD(sc, DBG_LOAD, "Disabling 'close the gates'\n"); if (CHIP_IS_E1(sc)) { uint32_t addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; val = REG_RD(sc, addr); val &= ~(0x300); REG_WR(sc, addr, val); } else { val = REG_RD(sc, MISC_REG_AEU_GENERAL_MASK); val &= ~(MISC_AEU_GENERAL_MASK_REG_AEU_PXP_CLOSE_MASK | MISC_AEU_GENERAL_MASK_REG_AEU_NIG_CLOSE_MASK); REG_WR(sc, MISC_REG_AEU_GENERAL_MASK, val); } } /* * Cleans the object that have internal lists without sending * ramrods. Should be run when interrutps are disabled. */ static void bxe_squeeze_objects(struct bxe_softc *sc) { unsigned long ramrod_flags = 0, vlan_mac_flags = 0; struct ecore_mcast_ramrod_params rparam = { NULL }; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; int rc; /* Cleanup MACs' object first... */ /* Wait for completion of requested */ bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Perform a dry cleanup */ bxe_set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags); /* Clean ETH primary MAC */ bxe_set_bit(ECORE_ETH_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean ETH MACs (%d)\n", rc); } /* Cleanup UC list */ vlan_mac_flags = 0; bxe_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags); rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) { BLOGE(sc, "Failed to clean UC list MACs (%d)\n", rc); } /* Now clean mcast object... */ rparam.mcast_obj = &sc->mcast_obj; bxe_set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags); /* Add a DEL command... */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to send DEL MCAST command (%d)\n", rc); } /* now wait until all pending commands are cleared */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); while (rc != 0) { if (rc < 0) { BLOGE(sc, "Failed to clean MCAST object (%d)\n", rc); return; } rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); } } /* stop the controller */ static __noinline int bxe_nic_unload(struct bxe_softc *sc, uint32_t unload_mode, uint8_t keep_link) { uint8_t global = FALSE; uint32_t val; int i; BXE_CORE_LOCK_ASSERT(sc); if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); for (i = 0; i < sc->num_queues; i++) { struct bxe_fastpath *fp; fp = &sc->fp[i]; BXE_FP_TX_LOCK(fp); BXE_FP_TX_UNLOCK(fp); } BLOGD(sc, DBG_LOAD, "Starting NIC unload...\n"); /* mark driver as unloaded in shmem2 */ if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], val & ~DRV_FLAGS_CAPABILITIES_LOADED_L2); } if (IS_PF(sc) && sc->recovery_state != BXE_RECOVERY_DONE && (sc->state == BXE_STATE_CLOSED || sc->state == BXE_STATE_ERROR)) { /* * We can get here if the driver has been unloaded * during parity error recovery and is either waiting for a * leader to complete or for other functions to unload and * then ifconfig down has been issued. In this case we want to * unload and let other functions to complete a recovery * process. */ sc->recovery_state = BXE_RECOVERY_DONE; sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); BLOGD(sc, DBG_LOAD, "Releasing a leadership...\n"); BLOGE(sc, "Can't unload in closed or error state recover_state 0x%x" " state = 0x%x\n", sc->recovery_state, sc->state); return (-1); } /* * Nothing to do during unload if previous bxe_nic_load() * did not completed successfully - all resourses are released. */ if ((sc->state == BXE_STATE_CLOSED) || (sc->state == BXE_STATE_ERROR)) { return (0); } sc->state = BXE_STATE_CLOSING_WAITING_HALT; mb(); /* stop tx */ bxe_tx_disable(sc); sc->rx_mode = BXE_RX_MODE_NONE; /* XXX set rx mode ??? */ if (IS_PF(sc) && !sc->grcdump_done) { /* set ALWAYS_ALIVE bit in shmem */ sc->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bxe_drv_pulse(sc); bxe_stats_handle(sc, STATS_EVENT_STOP); bxe_save_statistics(sc); } /* wait till consumers catch up with producers in all queues */ bxe_drain_tx_queues(sc); /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ if (IS_VF(sc)) { ; /* bxe_vfpf_close_vf(sc); */ } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip */ if (!sc->grcdump_done) bxe_chip_cleanup(sc, unload_mode, keep_link); } else { /* Send the UNLOAD_REQUEST to the MCP */ bxe_send_unload_req(sc, unload_mode); /* * Prevent transactions to host from the functions on the * engine that doesn't reset global blocks in case of global * attention once gloabl blocks are reset and gates are opened * (the engine which leader will perform the recovery * last). */ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } /* disable HW interrupts */ bxe_int_disable_sync(sc, TRUE); /* detach interrupts */ bxe_interrupt_detach(sc); /* Report UNLOAD_DONE to MCP */ bxe_send_unload_done(sc, FALSE); } /* * At this stage no more interrupts will arrive so we may safely clean * the queue'able objects here in case they failed to get cleaned so far. */ if (IS_PF(sc)) { bxe_squeeze_objects(sc); } /* There should be no more pending SP commands at this stage */ sc->sp_state = 0; sc->port.pmf = 0; bxe_free_fp_buffers(sc); if (IS_PF(sc)) { bxe_free_mem(sc); } bxe_free_fw_stats_mem(sc); sc->state = BXE_STATE_CLOSED; /* * Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS. */ if (IS_PF(sc) && bxe_chk_parity_attn(sc, &global, FALSE)) { bxe_set_reset_in_progress(sc); /* Set RESET_IS_GLOBAL if needed */ if (global) { bxe_set_reset_global(sc); } } /* * The last driver must disable a "close the gate" if there is no * parity attention or "process kill" pending. */ if (IS_PF(sc) && !bxe_clear_pf_load(sc) && bxe_reset_is_done(sc, SC_PATH(sc))) { bxe_disable_close_the_gate(sc); } BLOGD(sc, DBG_LOAD, "Ended NIC unload\n"); return (0); } /* * Called by the OS to set various media options (i.e. link, speed, etc.) when * the user runs "ifconfig bxe media ..." or "ifconfig bxe mediaopt ...". */ static int bxe_ifmedia_update(struct ifnet *ifp) { struct bxe_softc *sc = (struct bxe_softc *)if_getsoftc(ifp); struct ifmedia *ifm; ifm = &sc->ifmedia; /* We only support Ethernet media type. */ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) { return (EINVAL); } switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_CX4: case IFM_10G_SR: case IFM_10G_T: case IFM_10G_TWINAX: default: /* We don't support changing the media type. */ BLOGD(sc, DBG_LOAD, "Invalid media type (%d)\n", IFM_SUBTYPE(ifm->ifm_media)); return (EINVAL); } return (0); } /* * Called by the OS to get the current media status (i.e. link, speed, etc.). */ static void bxe_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct bxe_softc *sc = if_getsoftc(ifp); /* Report link down if the driver isn't running. */ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { ifmr->ifm_active |= IFM_NONE; return; } /* Setup the default interface info. */ ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->link_vars.link_up) { ifmr->ifm_status |= IFM_ACTIVE; } else { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_active |= sc->media; if (sc->link_vars.duplex == DUPLEX_FULL) { ifmr->ifm_active |= IFM_FDX; } else { ifmr->ifm_active |= IFM_HDX; } } static void bxe_handle_chip_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; long work = atomic_load_acq_long(&sc->chip_tq_flags); switch (work) { case CHIP_TQ_REINIT: if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) { /* restart the interface */ BLOGD(sc, DBG_LOAD, "Restarting the interface...\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } break; default: break; } } /* * Handles any IOCTL calls from the operating system. * * Returns: * 0 = Success, >0 Failure */ static int bxe_ioctl(if_t ifp, u_long command, caddr_t data) { struct bxe_softc *sc = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; int mask = 0; int reinit = 0; int error = 0; int mtu_min = (ETH_MIN_PACKET_SIZE - ETH_HLEN); int mtu_max = (MJUM9BYTES - ETH_OVERHEAD - IP_HEADER_ALIGNMENT_PADDING); switch (command) { case SIOCSIFMTU: BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMTU ioctl (mtu=%d)\n", ifr->ifr_mtu); if (sc->mtu == ifr->ifr_mtu) { /* nothing to change */ break; } if ((ifr->ifr_mtu < mtu_min) || (ifr->ifr_mtu > mtu_max)) { BLOGE(sc, "Unsupported MTU size %d (range is %d-%d)\n", ifr->ifr_mtu, mtu_min, mtu_max); error = EINVAL; break; } atomic_store_rel_int((volatile unsigned int *)&sc->mtu, (unsigned long)ifr->ifr_mtu); /* atomic_store_rel_long((volatile unsigned long *)&if_getmtu(ifp), (unsigned long)ifr->ifr_mtu); XXX - Not sure why it needs to be atomic */ if_setmtu(ifp, ifr->ifr_mtu); reinit = 1; break; case SIOCSIFFLAGS: /* toggle the interface state up or down */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFFLAGS ioctl\n"); BXE_CORE_LOCK(sc); /* check if the interface is up */ if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* set the receive mode flags */ bxe_set_rx_mode(sc); } else if(sc->state != BXE_STATE_DISABLED) { bxe_init_locked(sc); } } else { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bxe_periodic_stop(sc); bxe_stop_locked(sc); } } BXE_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* add/delete multicast addresses */ BLOGD(sc, DBG_IOCTL, "Received SIOCADDMULTI/SIOCDELMULTI ioctl\n"); /* check if the interface is up */ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { /* set the receive mode flags */ BXE_CORE_LOCK(sc); bxe_set_rx_mode(sc); BXE_CORE_UNLOCK(sc); } break; case SIOCSIFCAP: /* find out which capabilities have changed */ mask = (ifr->ifr_reqcap ^ if_getcapenable(ifp)); BLOGD(sc, DBG_IOCTL, "Received SIOCSIFCAP ioctl (mask=0x%08x)\n", mask); /* toggle the LRO capabilites enable flag */ if (mask & IFCAP_LRO) { if_togglecapenable(ifp, IFCAP_LRO); BLOGD(sc, DBG_IOCTL, "Turning LRO %s\n", (if_getcapenable(ifp) & IFCAP_LRO) ? "ON" : "OFF"); reinit = 1; } /* toggle the TXCSUM checksum capabilites enable flag */ if (mask & IFCAP_TXCSUM) { if_togglecapenable(ifp, IFCAP_TXCSUM); BLOGD(sc, DBG_IOCTL, "Turning TXCSUM %s\n", (if_getcapenable(ifp) & IFCAP_TXCSUM) ? "ON" : "OFF"); if (if_getcapenable(ifp) & IFCAP_TXCSUM) { if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0); } else { if_clearhwassist(ifp); /* XXX */ } } /* toggle the RXCSUM checksum capabilities enable flag */ if (mask & IFCAP_RXCSUM) { if_togglecapenable(ifp, IFCAP_RXCSUM); BLOGD(sc, DBG_IOCTL, "Turning RXCSUM %s\n", (if_getcapenable(ifp) & IFCAP_RXCSUM) ? "ON" : "OFF"); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0); } else { if_clearhwassist(ifp); /* XXX */ } } /* toggle TSO4 capabilities enabled flag */ if (mask & IFCAP_TSO4) { if_togglecapenable(ifp, IFCAP_TSO4); BLOGD(sc, DBG_IOCTL, "Turning TSO4 %s\n", (if_getcapenable(ifp) & IFCAP_TSO4) ? "ON" : "OFF"); } /* toggle TSO6 capabilities enabled flag */ if (mask & IFCAP_TSO6) { if_togglecapenable(ifp, IFCAP_TSO6); BLOGD(sc, DBG_IOCTL, "Turning TSO6 %s\n", (if_getcapenable(ifp) & IFCAP_TSO6) ? "ON" : "OFF"); } /* toggle VLAN_HWTSO capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTSO) { if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); BLOGD(sc, DBG_IOCTL, "Turning VLAN_HWTSO %s\n", (if_getcapenable(ifp) & IFCAP_VLAN_HWTSO) ? "ON" : "OFF"); } /* toggle VLAN_HWCSUM capabilities enabled flag */ if (mask & IFCAP_VLAN_HWCSUM) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWCSUM is not supported!\n"); error = EINVAL; } /* toggle VLAN_MTU capabilities enable flag */ if (mask & IFCAP_VLAN_MTU) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_MTU is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWTAGGING capabilities enabled flag */ if (mask & IFCAP_VLAN_HWTAGGING) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWTAGGING is not supported!\n"); error = EINVAL; } /* toggle VLAN_HWFILTER capabilities enabled flag */ if (mask & IFCAP_VLAN_HWFILTER) { /* XXX investigate this... */ BLOGE(sc, "Changing VLAN_HWFILTER is not supported!\n"); error = EINVAL; } /* XXX not yet... * IFCAP_WOL_MAGIC */ break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: /* set/get interface media */ BLOGD(sc, DBG_IOCTL, "Received SIOCSIFMEDIA/SIOCGIFMEDIA ioctl (cmd=%lu)\n", (command & 0xff)); error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; default: BLOGD(sc, DBG_IOCTL, "Received Unknown Ioctl (cmd=%lu)\n", (command & 0xff)); error = ether_ioctl(ifp, command, data); break; } if (reinit && (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING)) { BLOGD(sc, DBG_LOAD | DBG_IOCTL, "Re-initializing hardware from IOCTL change\n"); bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_stop_locked(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } return (error); } static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m, uint8_t contents) { char * type; int i = 0; if (!(sc->debug & DBG_MBUF)) { return; } if (m == NULL) { BLOGD(sc, DBG_MBUF, "mbuf: null pointer\n"); return; } while (m) { #if __FreeBSD_version >= 1000000 BLOGD(sc, DBG_MBUF, "%02d: mbuf=%p m_len=%d m_flags=0x%b m_data=%p\n", i, m, m->m_len, m->m_flags, M_FLAG_BITS, m->m_data); if (m->m_flags & M_PKTHDR) { BLOGD(sc, DBG_MBUF, "%02d: - m_pkthdr: tot_len=%d flags=0x%b csum_flags=%b\n", i, m->m_pkthdr.len, m->m_flags, M_FLAG_BITS, (int)m->m_pkthdr.csum_flags, CSUM_BITS); } #else BLOGD(sc, DBG_MBUF, "%02d: mbuf=%p m_len=%d m_flags=0x%b m_data=%p\n", i, m, m->m_len, m->m_flags, "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY", m->m_data); if (m->m_flags & M_PKTHDR) { BLOGD(sc, DBG_MBUF, "%02d: - m_pkthdr: tot_len=%d flags=0x%b csum_flags=%b\n", i, m->m_pkthdr.len, m->m_flags, "\20\12M_BCAST\13M_MCAST\14M_FRAG" "\15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG" "\22M_PROMISC\23M_NOFREE", (int)m->m_pkthdr.csum_flags, "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP\4CSUM_IP_FRAGS" "\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED" "\12CSUM_IP_VALID\13CSUM_DATA_VALID" "\14CSUM_PSEUDO_HDR"); } #endif /* #if __FreeBSD_version >= 1000000 */ if (m->m_flags & M_EXT) { switch (m->m_ext.ext_type) { case EXT_CLUSTER: type = "EXT_CLUSTER"; break; case EXT_SFBUF: type = "EXT_SFBUF"; break; case EXT_JUMBOP: type = "EXT_JUMBOP"; break; case EXT_JUMBO9: type = "EXT_JUMBO9"; break; case EXT_JUMBO16: type = "EXT_JUMBO16"; break; case EXT_PACKET: type = "EXT_PACKET"; break; case EXT_MBUF: type = "EXT_MBUF"; break; case EXT_NET_DRV: type = "EXT_NET_DRV"; break; case EXT_MOD_TYPE: type = "EXT_MOD_TYPE"; break; case EXT_DISPOSABLE: type = "EXT_DISPOSABLE"; break; case EXT_EXTREF: type = "EXT_EXTREF"; break; default: type = "UNKNOWN"; break; } BLOGD(sc, DBG_MBUF, "%02d: - m_ext: %p ext_size=%d type=%s\n", i, m->m_ext.ext_buf, m->m_ext.ext_size, type); } if (contents) { bxe_dump_mbuf_data(sc, "mbuf data", m, TRUE); } m = m->m_next; i++; } } /* * Checks to ensure the 13 bd sliding window is >= MSS for TSO. * Check that (13 total bds - 3 bds) = 10 bd window >= MSS. * The window: 3 bds are = 1 for headers BD + 2 for parse BD and last BD * The headers comes in a separate bd in FreeBSD so 13-3=10. * Returns: 0 if OK to send, 1 if packet needs further defragmentation */ static int bxe_chktso_window(struct bxe_softc *sc, int nsegs, bus_dma_segment_t *segs, struct mbuf *m) { uint32_t num_wnds, wnd_size, wnd_sum; int32_t frag_idx, wnd_idx; unsigned short lso_mss; int defrag; defrag = 0; wnd_sum = 0; wnd_size = 10; num_wnds = nsegs - wnd_size; lso_mss = htole16(m->m_pkthdr.tso_segsz); /* * Total header lengths Eth+IP+TCP in first FreeBSD mbuf so calculate the * first window sum of data while skipping the first assuming it is the * header in FreeBSD. */ for (frag_idx = 1; (frag_idx <= wnd_size); frag_idx++) { wnd_sum += htole16(segs[frag_idx].ds_len); } /* check the first 10 bd window size */ if (wnd_sum < lso_mss) { return (1); } /* run through the windows */ for (wnd_idx = 0; wnd_idx < num_wnds; wnd_idx++, frag_idx++) { /* subtract the first mbuf->m_len of the last wndw(-header) */ wnd_sum -= htole16(segs[wnd_idx+1].ds_len); /* add the next mbuf len to the len of our new window */ wnd_sum += htole16(segs[frag_idx].ds_len); if (wnd_sum < lso_mss) { return (1); } } return (0); } static uint8_t bxe_set_pbd_csum_e2(struct bxe_fastpath *fp, struct mbuf *m, uint32_t *parsing_data) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; int e_hlen, ip_hlen, l4_off; uint16_t proto; if (m->m_pkthdr.csum_flags == CSUM_IP) { /* no L4 checksum offload needed */ return (0); } /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 2); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = sizeof(struct ip6_hdr); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ l4_off = (e_hlen + ip_hlen); *parsing_data |= (((l4_off >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; th = (struct tcphdr *)(ip + ip_hlen); /* th_off is number of 32-bit words */ *parsing_data |= ((th->th_off << ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW); return (l4_off + (th->th_off << 2)); /* entire header length */ } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; return (l4_off + sizeof(struct udphdr)); /* entire header length */ } else { /* XXX error stat ??? */ return (0); } } static uint8_t bxe_set_pbd_csum(struct bxe_fastpath *fp, struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip4 = NULL; struct ip6_hdr *ip6 = NULL; caddr_t ip = NULL; struct tcphdr *th = NULL; struct udphdr *uh = NULL; int e_hlen, ip_hlen; uint16_t proto; uint8_t hlen; uint16_t tmp_csum; uint32_t *tmp_uh; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { e_hlen = (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); proto = ntohs(eh->evl_proto); } else { e_hlen = ETHER_HDR_LEN; proto = ntohs(eh->evl_encap_proto); } switch (proto) { case ETHERTYPE_IP: /* get the IP header, if mbuf len < 20 then header in next mbuf */ ip4 = (m->m_len < sizeof(struct ip)) ? (struct ip *)m->m_next->m_data : (struct ip *)(m->m_data + e_hlen); /* ip_hl is number of 32-bit words */ ip_hlen = (ip4->ip_hl << 1); ip = (caddr_t)ip4; break; case ETHERTYPE_IPV6: /* get the IPv6 header, if mbuf len < 40 then header in next mbuf */ ip6 = (m->m_len < sizeof(struct ip6_hdr)) ? (struct ip6_hdr *)m->m_next->m_data : (struct ip6_hdr *)(m->m_data + e_hlen); /* XXX cannot support offload with IPv6 extensions */ ip_hlen = (sizeof(struct ip6_hdr) >> 1); ip = (caddr_t)ip6; break; default: /* We can't offload in this case... */ /* XXX error stat ??? */ return (0); } hlen = (e_hlen >> 1); /* note that rest of global_data is indirectly zeroed here */ if (m->m_flags & M_VLANTAG) { pbd->global_data = htole16(hlen | (1 << ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT)); } else { pbd->global_data = htole16(hlen); } pbd->ip_hlen_w = ip_hlen; hlen += pbd->ip_hlen_w; /* XXX assuming L4 header is contiguous to IPv4/IPv6 in the same mbuf */ if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { th = (struct tcphdr *)(ip + (ip_hlen << 1)); /* th_off is number of 32-bit words */ hlen += (uint16_t)(th->th_off << 1); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { uh = (struct udphdr *)(ip + (ip_hlen << 1)); hlen += (sizeof(struct udphdr) / 2); } else { /* valid case as only CSUM_IP was set */ return (0); } pbd->total_hlen_w = htole16(hlen); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TSO | CSUM_TCP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_tcp++; pbd->tcp_pseudo_csum = ntohs(th->th_sum); } else if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)) { fp->eth_q_stats.tx_ofld_frames_csum_udp++; /* * Everest1 (i.e. 57710, 57711, 57711E) does not natively support UDP * checksums and does not know anything about the UDP header and where * the checksum field is located. It only knows about TCP. Therefore * we "lie" to the hardware for outgoing UDP packets w/ checksum * offload. Since the checksum field offset for TCP is 16 bytes and * for UDP it is 6 bytes we pass a pointer to the hardware that is 10 * bytes less than the start of the UDP header. This allows the * hardware to write the checksum in the correct spot. But the * hardware will compute a checksum which includes the last 10 bytes * of the IP header. To correct this we tweak the stack computed * pseudo checksum by folding in the calculation of the inverse * checksum for those final 10 bytes of the IP header. This allows * the correct checksum to be computed by the hardware. */ /* set pointer 10 bytes before UDP header */ tmp_uh = (uint32_t *)((uint8_t *)uh - 10); /* calculate a pseudo header checksum over the first 10 bytes */ tmp_csum = in_pseudo(*tmp_uh, *(tmp_uh + 1), *(uint16_t *)(tmp_uh + 2)); pbd->tcp_pseudo_csum = ntohs(in_addword(uh->uh_sum, ~tmp_csum)); } return (hlen * 2); /* entire header length, number of bytes */ } static void bxe_set_pbd_lso_e2(struct mbuf *m, uint32_t *parsing_data) { *parsing_data |= ((m->m_pkthdr.tso_segsz << ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) & ETH_TX_PARSE_BD_E2_LSO_MSS); /* XXX test for IPv6 with extension header... */ } static void bxe_set_pbd_lso(struct mbuf *m, struct eth_tx_parse_bd_e1x *pbd) { struct ether_vlan_header *eh = NULL; struct ip *ip = NULL; struct tcphdr *th = NULL; int e_hlen; /* get the Ethernet header */ eh = mtod(m, struct ether_vlan_header *); /* handle VLAN encapsulation if present */ e_hlen = (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ? (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) : ETHER_HDR_LEN; /* get the IP and TCP header, with LSO entire header in first mbuf */ /* XXX assuming IPv4 */ ip = (struct ip *)(m->m_data + e_hlen); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); pbd->lso_mss = htole16(m->m_pkthdr.tso_segsz); pbd->tcp_send_seq = ntohl(th->th_seq); pbd->tcp_flags = ((ntohl(((uint32_t *)th)[3]) >> 16) & 0xff); #if 1 /* XXX IPv4 */ pbd->ip_id = ntohs(ip->ip_id); pbd->tcp_pseudo_csum = ntohs(in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP))); #else /* XXX IPv6 */ pbd->tcp_pseudo_csum = ntohs(in_pseudo(&ip6->ip6_src, &ip6->ip6_dst, htons(IPPROTO_TCP))); #endif pbd->global_data |= htole16(ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN); } /* * Encapsulte an mbuf cluster into the tx bd chain and makes the memory * visible to the controller. * * If an mbuf is submitted to this routine and cannot be given to the * controller (e.g. it has too many fragments) then the function may free * the mbuf and return to the caller. * * Returns: * 0 = Success, !0 = Failure * Note the side effect that an mbuf may be freed if it causes a problem. */ static int bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) { bus_dma_segment_t segs[32]; struct mbuf *m0; struct bxe_sw_tx_bd *tx_buf; struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; /* struct eth_tx_parse_2nd_bd *pbd2 = NULL; */ struct eth_tx_bd *tx_data_bd; struct eth_tx_bd *tx_total_pkt_size_bd; struct eth_tx_start_bd *tx_start_bd; uint16_t bd_prod, pkt_prod, total_pkt_size; uint8_t mac_type; int defragged, error, nsegs, rc, nbds, vlan_off, ovlan; struct bxe_softc *sc; uint16_t tx_bd_avail; struct ether_vlan_header *eh; uint32_t pbd_e2_parsing_data = 0; uint8_t hlen = 0; int tmp_bd; int i; sc = fp->sc; #if __FreeBSD_version >= 800000 M_ASSERTPKTHDR(*m_head); #endif /* #if __FreeBSD_version >= 800000 */ m0 = *m_head; rc = defragged = nbds = ovlan = vlan_off = total_pkt_size = 0; tx_start_bd = NULL; tx_data_bd = NULL; tx_total_pkt_size_bd = NULL; /* get the H/W pointer for packets and BDs */ pkt_prod = fp->tx_pkt_prod; bd_prod = fp->tx_bd_prod; mac_type = UNICAST_ADDRESS; /* map the mbuf into the next open DMAable memory */ tx_buf = &fp->tx_mbuf_chain[TX_BD(pkt_prod)]; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); /* mapping errors */ if(__predict_false(error != 0)) { fp->eth_q_stats.tx_dma_mapping_failure++; if (error == ENOMEM) { /* resource issue, try again later */ rc = ENOMEM; } else if (error == EFBIG) { /* possibly recoverable with defragmentation */ fp->eth_q_stats.mbuf_defrag_attempts++; m0 = m_defrag(*m_head, M_NOWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; rc = error; } } } else { /* unknown, unrecoverable mapping error */ BLOGE(sc, "Unknown TX mapping error rc=%d\n", error); bxe_dump_mbuf(sc, m0, FALSE); rc = error; } goto bxe_tx_encap_continue; } tx_bd_avail = bxe_tx_avail(sc, fp); /* make sure there is enough room in the send queue */ if (__predict_false(tx_bd_avail < (nsegs + 2))) { /* Recoverable, try again later. */ fp->eth_q_stats.tx_hw_queue_full++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); rc = ENOMEM; goto bxe_tx_encap_continue; } /* capture the current H/W TX chain high watermark */ if (__predict_false(fp->eth_q_stats.tx_hw_max_queue_depth < (TX_BD_USABLE - tx_bd_avail))) { fp->eth_q_stats.tx_hw_max_queue_depth = (TX_BD_USABLE - tx_bd_avail); } /* make sure it fits in the packet window */ if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { /* * The mbuf may be to big for the controller to handle. If the frame * is a TSO frame we'll need to do an additional check. */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { if (bxe_chktso_window(sc, nsegs, segs, m0) == 0) { goto bxe_tx_encap_continue; /* OK to send */ } else { fp->eth_q_stats.tx_window_violation_tso++; } } else { fp->eth_q_stats.tx_window_violation_std++; } /* lets try to defragment this mbuf and remap it */ fp->eth_q_stats.mbuf_defrag_attempts++; bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); m0 = m_defrag(*m_head, M_NOWAIT); if (m0 == NULL) { fp->eth_q_stats.mbuf_defrag_failures++; /* Ugh, just drop the frame... :( */ rc = ENOBUFS; } else { /* defrag successful, try mapping again */ *m_head = m0; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, tx_buf->m_map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { fp->eth_q_stats.tx_dma_mapping_failure++; /* No sense in trying to defrag/copy chain, drop it. :( */ rc = error; } else { /* if the chain is still too long then drop it */ if(m0->m_pkthdr.csum_flags & CSUM_TSO) { /* * in case TSO is enabled nsegs should be checked against * BXE_TSO_MAX_SEGMENTS */ if (__predict_false(nsegs > BXE_TSO_MAX_SEGMENTS)) { bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); fp->eth_q_stats.nsegs_path1_errors++; rc = ENODEV; } } else { if (__predict_false(nsegs > BXE_MAX_SEGMENTS)) { bus_dmamap_unload(fp->tx_mbuf_tag, tx_buf->m_map); fp->eth_q_stats.nsegs_path2_errors++; rc = ENODEV; } } } } } bxe_tx_encap_continue: /* Check for errors */ if (rc) { if (rc == ENOMEM) { /* recoverable try again later */ } else { fp->eth_q_stats.tx_soft_errors++; fp->eth_q_stats.mbuf_alloc_tx--; m_freem(*m_head); *m_head = NULL; } return (rc); } /* set flag according to packet type (UNICAST_ADDRESS is default) */ if (m0->m_flags & M_BCAST) { mac_type = BROADCAST_ADDRESS; } else if (m0->m_flags & M_MCAST) { mac_type = MULTICAST_ADDRESS; } /* store the mbuf into the mbuf ring */ tx_buf->m = m0; tx_buf->first_bd = fp->tx_bd_prod; tx_buf->flags = 0; /* prepare the first transmit (start) BD for the mbuf */ tx_start_bd = &fp->tx_chain[TX_BD(bd_prod)].start_bd; BLOGD(sc, DBG_TX, "sending pkt_prod=%u tx_buf=%p next_idx=%u bd=%u tx_start_bd=%p\n", pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_start_bd); tx_start_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); tx_start_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); tx_start_bd->nbytes = htole16(segs[0].ds_len); total_pkt_size += tx_start_bd->nbytes; tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; tx_start_bd->general_data = (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); /* all frames have at least Start BD + Parsing BD */ nbds = nsegs + 1; tx_start_bd->nbd = htole16(nbds); if (m0->m_flags & M_VLANTAG) { tx_start_bd->vlan_or_ethertype = htole16(m0->m_pkthdr.ether_vtag); tx_start_bd->bd_flags.as_bitfield |= (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); } else { /* vf tx, start bd must hold the ethertype for fw to enforce it */ if (IS_VF(sc)) { /* map ethernet header to find type and header length */ eh = mtod(m0, struct ether_vlan_header *); tx_start_bd->vlan_or_ethertype = eh->evl_encap_proto; } else { /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = htole16(fp->tx_pkt_prod); } } /* * add a parsing BD from the chain. The parsing BD is always added * though it is only used for TSO and chksum */ bd_prod = TX_BD_NEXT(bd_prod); if (m0->m_pkthdr.csum_flags) { if (m0->m_pkthdr.csum_flags & CSUM_IP) { fp->eth_q_stats.tx_ofld_frames_csum_ip++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM; } if (m0->m_pkthdr.csum_flags & CSUM_TCP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_L4_CSUM); } else if (m0->m_pkthdr.csum_flags & CSUM_UDP_IPV6) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_IPV6 | ETH_TX_BD_FLAGS_IS_UDP | ETH_TX_BD_FLAGS_L4_CSUM); } else if ((m0->m_pkthdr.csum_flags & CSUM_TCP) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM; } else if (m0->m_pkthdr.csum_flags & CSUM_UDP) { tx_start_bd->bd_flags.as_bitfield |= (ETH_TX_BD_FLAGS_L4_CSUM | ETH_TX_BD_FLAGS_IS_UDP); } } if (!CHIP_IS_E1x(sc)) { pbd_e2 = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e2; memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum_e2(fp, m0, &pbd_e2_parsing_data); } SET_FLAG(pbd_e2_parsing_data, ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type); } else { uint16_t global_data = 0; pbd_e1x = &fp->tx_chain[TX_BD(bd_prod)].parse_bd_e1x; memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); if (m0->m_pkthdr.csum_flags) { hlen = bxe_set_pbd_csum(fp, m0, pbd_e1x); } SET_FLAG(global_data, ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, mac_type); pbd_e1x->global_data |= htole16(global_data); } /* setup the parsing BD with TSO specific info */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { fp->eth_q_stats.tx_ofld_frames_lso++; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; if (__predict_false(tx_start_bd->nbytes > hlen)) { fp->eth_q_stats.tx_ofld_frames_lso_hdr_splits++; /* split the first BD into header/data making the fw job easy */ nbds++; tx_start_bd->nbd = htole16(nbds); tx_start_bd->nbytes = htole16(hlen); bd_prod = TX_BD_NEXT(bd_prod); /* new transmit BD after the tx_parse_bd */ tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr + hlen)); tx_data_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr + hlen)); tx_data_bd->nbytes = htole16(segs[0].ds_len - hlen); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } BLOGD(sc, DBG_TX, "TSO split header size is %d (%x:%x) nbds %d\n", le16toh(tx_start_bd->nbytes), le32toh(tx_start_bd->addr_hi), le32toh(tx_start_bd->addr_lo), nbds); } if (!CHIP_IS_E1x(sc)) { bxe_set_pbd_lso_e2(m0, &pbd_e2_parsing_data); } else { bxe_set_pbd_lso(m0, pbd_e1x); } } if (pbd_e2_parsing_data) { pbd_e2->parsing_data = htole32(pbd_e2_parsing_data); } /* prepare remaining BDs, start tx bd contains first seg/frag */ for (i = 1; i < nsegs ; i++) { bd_prod = TX_BD_NEXT(bd_prod); tx_data_bd = &fp->tx_chain[TX_BD(bd_prod)].reg_bd; tx_data_bd->addr_lo = htole32(U64_LO(segs[i].ds_addr)); tx_data_bd->addr_hi = htole32(U64_HI(segs[i].ds_addr)); tx_data_bd->nbytes = htole16(segs[i].ds_len); if (tx_total_pkt_size_bd == NULL) { tx_total_pkt_size_bd = tx_data_bd; } total_pkt_size += tx_data_bd->nbytes; } BLOGD(sc, DBG_TX, "last bd %p\n", tx_data_bd); if (tx_total_pkt_size_bd != NULL) { tx_total_pkt_size_bd->total_pkt_bytes = total_pkt_size; } if (__predict_false(sc->debug & DBG_TX)) { tmp_bd = tx_buf->first_bd; for (i = 0; i < nbds; i++) { if (i == 0) { BLOGD(sc, DBG_TX, "TX Strt: %p bd=%d nbd=%d vlan=0x%x " "bd_flags=0x%x hdr_nbds=%d\n", tx_start_bd, tmp_bd, le16toh(tx_start_bd->nbd), le16toh(tx_start_bd->vlan_or_ethertype), tx_start_bd->bd_flags.as_bitfield, (tx_start_bd->general_data & ETH_TX_START_BD_HDR_NBDS)); } else if (i == 1) { if (pbd_e1x) { BLOGD(sc, DBG_TX, "-> Prse: %p bd=%d global=0x%x ip_hlen_w=%u " "ip_id=%u lso_mss=%u tcp_flags=0x%x csum=0x%x " "tcp_seq=%u total_hlen_w=%u\n", pbd_e1x, tmp_bd, pbd_e1x->global_data, pbd_e1x->ip_hlen_w, pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags, pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq, le16toh(pbd_e1x->total_hlen_w)); } else { /* if (pbd_e2) */ BLOGD(sc, DBG_TX, "-> Parse: %p bd=%d dst=%02x:%02x:%02x " "src=%02x:%02x:%02x parsing_data=0x%x\n", pbd_e2, tmp_bd, pbd_e2->data.mac_addr.dst_hi, pbd_e2->data.mac_addr.dst_mid, pbd_e2->data.mac_addr.dst_lo, pbd_e2->data.mac_addr.src_hi, pbd_e2->data.mac_addr.src_mid, pbd_e2->data.mac_addr.src_lo, pbd_e2->parsing_data); } } if (i != 1) { /* skip parse db as it doesn't hold data */ tx_data_bd = &fp->tx_chain[TX_BD(tmp_bd)].reg_bd; BLOGD(sc, DBG_TX, "-> Frag: %p bd=%d nbytes=%d hi=0x%x lo: 0x%x\n", tx_data_bd, tmp_bd, le16toh(tx_data_bd->nbytes), le32toh(tx_data_bd->addr_hi), le32toh(tx_data_bd->addr_lo)); } tmp_bd = TX_BD_NEXT(tmp_bd); } } BLOGD(sc, DBG_TX, "doorbell: nbds=%d bd=%u\n", nbds, bd_prod); /* update TX BD producer index value for next TX */ bd_prod = TX_BD_NEXT(bd_prod); /* * If the chain of tx_bd's describing this frame is adjacent to or spans * an eth_tx_next_bd element then we need to increment the nbds value. */ if (TX_BD_IDX(bd_prod) < nbds) { nbds++; } /* don't allow reordering of writes for nbd and packets */ mb(); fp->tx_db.data.prod += nbds; /* producer points to the next free tx_bd at this point */ fp->tx_pkt_prod++; fp->tx_bd_prod = bd_prod; DOORBELL(sc, fp->index, fp->tx_db.raw); fp->eth_q_stats.tx_pkts++; /* Prevent speculative reads from getting ahead of the status block. */ bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_READ); /* Prevent speculative reads from getting ahead of the doorbell. */ bus_space_barrier(sc->bar[BAR2].tag, sc->bar[BAR2].handle, 0, 0, BUS_SPACE_BARRIER_READ); return (0); } static void bxe_tx_start_locked(struct bxe_softc *sc, if_t ifp, struct bxe_fastpath *fp) { struct mbuf *m = NULL; int tx_count = 0; uint16_t tx_bd_avail; BXE_FP_TX_LOCK_ASSERT(fp); /* keep adding entries while there are frames to send */ while (!if_sendq_empty(ifp)) { /* * check for any frames to send * dequeue can still be NULL even if queue is not empty */ m = if_dequeue(ifp); if (__predict_false(m == NULL)) { break; } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ if (__predict_false(bxe_tx_encap(fp, &m))) { fp->eth_q_stats.tx_encap_failures++; if (m != NULL) { /* mark the TX queue as full and return the frame */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); if_sendq_prepend(ifp, m); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_queue_xoff++; } /* stop looking for more work */ break; } /* the frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners. */ if_etherbpfmtap(ifp, m); tx_bd_avail = bxe_tx_avail(sc, fp); /* handle any completions if we're running low */ if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) { break; } } } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } } /* Legacy (non-RSS) dispatch routine */ static void bxe_tx_start(if_t ifp) { struct bxe_softc *sc; struct bxe_fastpath *fp; sc = if_getsoftc(ifp); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { BLOGW(sc, "Interface not running, ignoring transmit request\n"); return; } if (!sc->link_vars.link_up) { BLOGW(sc, "Interface link is down, ignoring transmit request\n"); return; } fp = &sc->fp[0]; if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) { fp->eth_q_stats.tx_queue_full_return++; return; } BXE_FP_TX_LOCK(fp); bxe_tx_start_locked(sc, ifp, fp); BXE_FP_TX_UNLOCK(fp); } #if __FreeBSD_version >= 901504 static int bxe_tx_mq_start_locked(struct bxe_softc *sc, if_t ifp, struct bxe_fastpath *fp, struct mbuf *m) { struct buf_ring *tx_br = fp->tx_br; struct mbuf *next; int depth, rc, tx_count; uint16_t tx_bd_avail; rc = tx_count = 0; BXE_FP_TX_LOCK_ASSERT(fp); if (sc->state != BXE_STATE_OPEN) { fp->eth_q_stats.bxe_tx_mq_sc_state_failures++; return ENETDOWN; } if (!tx_br) { BLOGE(sc, "Multiqueue TX and no buf_ring!\n"); return (EINVAL); } if (m != NULL) { rc = drbr_enqueue(ifp, tx_br, m); if (rc != 0) { fp->eth_q_stats.tx_soft_errors++; goto bxe_tx_mq_start_locked_exit; } } if (!sc->link_vars.link_up || !(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { fp->eth_q_stats.tx_request_link_down_failures++; goto bxe_tx_mq_start_locked_exit; } /* fetch the depth of the driver queue */ depth = drbr_inuse_drv(ifp, tx_br); if (depth > fp->eth_q_stats.tx_max_drbr_queue_depth) { fp->eth_q_stats.tx_max_drbr_queue_depth = depth; } /* keep adding entries while there are frames to send */ while ((next = drbr_peek(ifp, tx_br)) != NULL) { /* handle any completions if we're running low */ tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < BXE_TX_CLEANUP_THRESHOLD) { /* bxe_txeof will set IFF_DRV_OACTIVE appropriately */ bxe_txeof(sc, fp); tx_bd_avail = bxe_tx_avail(sc, fp); if (tx_bd_avail < (BXE_TSO_MAX_SEGMENTS + 1)) { fp->eth_q_stats.bd_avail_too_less_failures++; m_freem(next); drbr_advance(ifp, tx_br); rc = ENOBUFS; break; } } /* the mbuf now belongs to us */ fp->eth_q_stats.mbuf_alloc_tx++; /* * Put the frame into the transmit ring. If we don't have room, * place the mbuf back at the head of the TX queue, set the * OACTIVE flag, and wait for the NIC to drain the chain. */ rc = bxe_tx_encap(fp, &next); if (__predict_false(rc != 0)) { fp->eth_q_stats.tx_encap_failures++; if (next != NULL) { /* mark the TX queue as full and save the frame */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); drbr_putback(ifp, tx_br, next); fp->eth_q_stats.mbuf_alloc_tx--; fp->eth_q_stats.tx_frames_deferred++; } else drbr_advance(ifp, tx_br); /* stop looking for more work */ break; } /* the transmit frame was enqueued successfully */ tx_count++; /* send a copy of the frame to any BPF listeners */ if_etherbpfmtap(ifp, next); drbr_advance(ifp, tx_br); } /* all TX packets were dequeued and/or the tx ring is full */ if (tx_count > 0) { /* reset the TX watchdog timeout timer */ fp->watchdog_timer = BXE_TX_TIMEOUT; } bxe_tx_mq_start_locked_exit: /* If we didn't drain the drbr, enqueue a task in the future to do it. */ if (!drbr_empty(ifp, tx_br)) { fp->eth_q_stats.tx_mq_not_empty++; taskqueue_enqueue_timeout(fp->tq, &fp->tx_timeout_task, 1); } return (rc); } static void bxe_tx_mq_start_deferred(void *arg, int pending) { struct bxe_fastpath *fp = (struct bxe_fastpath *)arg; struct bxe_softc *sc = fp->sc; if_t ifp = sc->ifp; BXE_FP_TX_LOCK(fp); bxe_tx_mq_start_locked(sc, ifp, fp, NULL); BXE_FP_TX_UNLOCK(fp); } /* Multiqueue (TSS) dispatch routine. */ static int bxe_tx_mq_start(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc = if_getsoftc(ifp); struct bxe_fastpath *fp; int fp_index, rc; fp_index = 0; /* default is the first queue */ /* check if flowid is set */ if (BXE_VALID_FLOWID(m)) fp_index = (m->m_pkthdr.flowid % sc->num_queues); fp = &sc->fp[fp_index]; if (sc->state != BXE_STATE_OPEN) { fp->eth_q_stats.bxe_tx_mq_sc_state_failures++; return ENETDOWN; } if (BXE_FP_TX_TRYLOCK(fp)) { rc = bxe_tx_mq_start_locked(sc, ifp, fp, m); BXE_FP_TX_UNLOCK(fp); } else { rc = drbr_enqueue(ifp, fp->tx_br, m); taskqueue_enqueue(fp->tq, &fp->tx_task); } return (rc); } static void bxe_mq_flush(struct ifnet *ifp) { struct bxe_softc *sc = if_getsoftc(ifp); struct bxe_fastpath *fp; struct mbuf *m; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->state != BXE_FP_STATE_IRQ) { BLOGD(sc, DBG_LOAD, "Not clearing fp[%02d] buf_ring (state=%d)\n", fp->index, fp->state); continue; } if (fp->tx_br != NULL) { BLOGD(sc, DBG_LOAD, "Clearing fp[%02d] buf_ring\n", fp->index); BXE_FP_TX_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) { m_freem(m); } BXE_FP_TX_UNLOCK(fp); } } if_qflush(ifp); } #endif /* FreeBSD_version >= 901504 */ static uint16_t bxe_cid_ilt_lines(struct bxe_softc *sc) { if (IS_SRIOV(sc)) { return ((BXE_FIRST_VF_CID + BXE_VF_CIDS) / ILT_PAGE_CIDS); } return (L2_ILT_LINES(sc)); } static void bxe_ilt_set_info(struct bxe_softc *sc) { struct ilt_client_info *ilt_client; struct ecore_ilt *ilt = sc->ilt; uint16_t line = 0; ilt->start_line = FUNC_ILT_BASE(SC_FUNC(sc)); BLOGD(sc, DBG_LOAD, "ilt starts at line %d\n", ilt->start_line); /* CDU */ ilt_client = &ilt->clients[ILT_CLIENT_CDU]; ilt_client->client_num = ILT_CLIENT_CDU; ilt_client->page_size = CDU_ILT_PAGE_SZ; ilt_client->flags = ILT_CLIENT_SKIP_MEM; ilt_client->start = line; line += bxe_cid_ilt_lines(sc); if (CNIC_SUPPORT(sc)) { line += CNIC_ILT_LINES; } ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[CDU]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* QM */ if (QM_INIT(sc->qm_cid_count)) { ilt_client = &ilt->clients[ILT_CLIENT_QM]; ilt_client->client_num = ILT_CLIENT_QM; ilt_client->page_size = QM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; /* 4 bytes for each cid */ line += DIV_ROUND_UP(sc->qm_cid_count * QM_QUEUES_PER_FUNC * 4, QM_ILT_PAGE_SZ); ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[QM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } if (CNIC_SUPPORT(sc)) { /* SRC */ ilt_client = &ilt->clients[ILT_CLIENT_SRC]; ilt_client->client_num = ILT_CLIENT_SRC; ilt_client->page_size = SRC_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += SRC_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[SRC]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); /* TM */ ilt_client = &ilt->clients[ILT_CLIENT_TM]; ilt_client->client_num = ILT_CLIENT_TM; ilt_client->page_size = TM_ILT_PAGE_SZ; ilt_client->flags = 0; ilt_client->start = line; line += TM_ILT_LINES; ilt_client->end = (line - 1); BLOGD(sc, DBG_LOAD, "ilt client[TM]: start %d, end %d, " "psz 0x%x, flags 0x%x, hw psz %d\n", ilt_client->start, ilt_client->end, ilt_client->page_size, ilt_client->flags, ilog2(ilt_client->page_size >> 12)); } KASSERT((line <= ILT_MAX_LINES), ("Invalid number of ILT lines!")); } static void bxe_set_fp_rx_buf_size(struct bxe_softc *sc) { int i; uint32_t rx_buf_size; rx_buf_size = (IP_HEADER_ALIGNMENT_PADDING + ETH_OVERHEAD + sc->mtu); for (i = 0; i < sc->num_queues; i++) { if(rx_buf_size <= MCLBYTES){ sc->fp[i].rx_buf_size = rx_buf_size; sc->fp[i].mbuf_alloc_size = MCLBYTES; }else if (rx_buf_size <= MJUMPAGESIZE){ sc->fp[i].rx_buf_size = rx_buf_size; sc->fp[i].mbuf_alloc_size = MJUMPAGESIZE; }else if (rx_buf_size <= (MJUMPAGESIZE + MCLBYTES)){ sc->fp[i].rx_buf_size = MCLBYTES; sc->fp[i].mbuf_alloc_size = MCLBYTES; }else if (rx_buf_size <= (2 * MJUMPAGESIZE)){ sc->fp[i].rx_buf_size = MJUMPAGESIZE; sc->fp[i].mbuf_alloc_size = MJUMPAGESIZE; }else { sc->fp[i].rx_buf_size = MCLBYTES; sc->fp[i].mbuf_alloc_size = MCLBYTES; } } } static int bxe_alloc_ilt_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt = (struct ecore_ilt *)malloc(sizeof(struct ecore_ilt), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static int bxe_alloc_ilt_lines_mem(struct bxe_softc *sc) { int rc = 0; if ((sc->ilt->lines = (struct ilt_line *)malloc((sizeof(struct ilt_line) * ILT_MAX_LINES), M_BXE_ILT, (M_NOWAIT | M_ZERO))) == NULL) { rc = 1; } return (rc); } static void bxe_free_ilt_mem(struct bxe_softc *sc) { if (sc->ilt != NULL) { free(sc->ilt, M_BXE_ILT); sc->ilt = NULL; } } static void bxe_free_ilt_lines_mem(struct bxe_softc *sc) { if (sc->ilt->lines != NULL) { free(sc->ilt->lines, M_BXE_ILT); sc->ilt->lines = NULL; } } static void bxe_free_mem(struct bxe_softc *sc) { int i; for (i = 0; i < L2_ILT_LINES(sc); i++) { bxe_dma_free(sc, &sc->context[i].vcxt_dma); sc->context[i].vcxt = NULL; sc->context[i].size = 0; } ecore_ilt_mem_op(sc, ILT_MEMOP_FREE); bxe_free_ilt_lines_mem(sc); } static int bxe_alloc_mem(struct bxe_softc *sc) { int context_size; int allocated; int i; /* * Allocate memory for CDU context: * This memory is allocated separately and not in the generic ILT * functions because CDU differs in few aspects: * 1. There can be multiple entities allocating memory for context - * regular L2, CNIC, and SRIOV drivers. Each separately controls * its own ILT lines. * 2. Since CDU page-size is not a single 4KB page (which is the case * for the other ILT clients), to be efficient we want to support * allocation of sub-page-size in the last entry. * 3. Context pointers are used by the driver to pass to FW / update * the context (for the other ILT clients the pointers are used just to * free the memory during unload). */ context_size = (sizeof(union cdu_context) * BXE_L2_CID_COUNT(sc)); for (i = 0, allocated = 0; allocated < context_size; i++) { sc->context[i].size = min(CDU_ILT_PAGE_SZ, (context_size - allocated)); if (bxe_dma_alloc(sc, sc->context[i].size, &sc->context[i].vcxt_dma, "cdu context") != 0) { bxe_free_mem(sc); return (-1); } sc->context[i].vcxt = (union cdu_context *)sc->context[i].vcxt_dma.vaddr; allocated += sc->context[i].size; } bxe_alloc_ilt_lines_mem(sc); BLOGD(sc, DBG_LOAD, "ilt=%p start_line=%u lines=%p\n", sc->ilt, sc->ilt->start_line, sc->ilt->lines); { for (i = 0; i < 4; i++) { BLOGD(sc, DBG_LOAD, "c%d page_size=%u start=%u end=%u num=%u flags=0x%x\n", i, sc->ilt->clients[i].page_size, sc->ilt->clients[i].start, sc->ilt->clients[i].end, sc->ilt->clients[i].client_num, sc->ilt->clients[i].flags); } } if (ecore_ilt_mem_op(sc, ILT_MEMOP_ALLOC)) { BLOGE(sc, "ecore_ilt_mem_op ILT_MEMOP_ALLOC failed\n"); bxe_free_mem(sc); return (-1); } return (0); } static void bxe_free_rx_bd_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } /* free all mbufs and unload all maps */ for (i = 0; i < RX_BD_TOTAL; i++) { if (fp->rx_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[i].m_map); } if (fp->rx_mbuf_chain[i].m != NULL) { m_freem(fp->rx_mbuf_chain[i].m); fp->rx_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_rx--; } } } static void bxe_free_tpa_pool(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i, max_agg_queues; sc = fp->sc; if (fp->rx_mbuf_tag == NULL) { return; } max_agg_queues = MAX_AGG_QS(sc); /* release all mbufs and unload all DMA maps in the TPA pool */ for (i = 0; i < max_agg_queues; i++) { if (fp->rx_tpa_info[i].bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[i].bd.m_map); } if (fp->rx_tpa_info[i].bd.m != NULL) { m_freem(fp->rx_tpa_info[i].bd.m); fp->rx_tpa_info[i].bd.m = NULL; fp->eth_q_stats.mbuf_alloc_tpa--; } } } static void bxe_free_sge_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; int i; sc = fp->sc; if (fp->rx_sge_mbuf_tag == NULL) { return; } /* rree all mbufs and unload all maps */ for (i = 0; i < RX_SGE_TOTAL; i++) { if (fp->rx_sge_mbuf_chain[i].m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[i].m_map); } if (fp->rx_sge_mbuf_chain[i].m != NULL) { m_freem(fp->rx_sge_mbuf_chain[i].m); fp->rx_sge_mbuf_chain[i].m = NULL; fp->eth_q_stats.mbuf_alloc_sge--; } } } static void bxe_free_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; #if __FreeBSD_version >= 901504 if (fp->tx_br != NULL) { /* just in case bxe_mq_flush() wasn't called */ if (mtx_initialized(&fp->tx_mtx)) { struct mbuf *m; BXE_FP_TX_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->tx_br)) != NULL) m_freem(m); BXE_FP_TX_UNLOCK(fp); } } #endif /* free all RX buffers */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); if (fp->eth_q_stats.mbuf_alloc_rx != 0) { BLOGE(sc, "failed to claim all rx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_rx); } if (fp->eth_q_stats.mbuf_alloc_sge != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_sge); } if (fp->eth_q_stats.mbuf_alloc_tpa != 0) { BLOGE(sc, "failed to claim all sge mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tpa); } if (fp->eth_q_stats.mbuf_alloc_tx != 0) { BLOGE(sc, "failed to release tx mbufs (%d left)\n", fp->eth_q_stats.mbuf_alloc_tx); } /* XXX verify all mbufs were reclaimed */ } } static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t prev_index, uint16_t index) { struct bxe_sw_rx_bd *rx_buf; struct eth_rx_bd *rx_bd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs, rc; rc = 0; /* allocate the new RX BD mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_bd_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_rx++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_bd_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_rx--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing RX BD mbuf mappings */ if (prev_index != index) { rx_buf = &fp->rx_mbuf_chain[prev_index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* * We only get here from bxe_rxeof() when the maximum number * of rx buffers is less than RX_BD_USABLE. bxe_rxeof() already * holds the mbuf in the prev_index so it's OK to NULL it out * here without concern of a memory leak. */ fp->rx_mbuf_chain[prev_index].m = NULL; } rx_buf = &fp->rx_mbuf_chain[index]; if (rx_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, rx_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = (prev_index != index) ? fp->rx_mbuf_chain[prev_index].m_map : rx_buf->m_map; rx_buf->m_map = fp->rx_mbuf_spare_map; fp->rx_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, rx_buf->m_map, BUS_DMASYNC_PREREAD); rx_buf->m = m; rx_bd = &fp->rx_chain[index]; rx_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); rx_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static int bxe_alloc_rx_tpa_mbuf(struct bxe_fastpath *fp, int queue) { struct bxe_sw_tpa_info *tpa_info = &fp->rx_tpa_info[queue]; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate the new TPA mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, fp->mbuf_alloc_size); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_tpa_alloc_failed++; return (ENOBUFS); } fp->eth_q_stats.mbuf_alloc_tpa++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = fp->rx_buf_size; /* map the mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_tpa_mapping_failed++; m_free(m); fp->eth_q_stats.mbuf_alloc_tpa--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); /* release any existing TPA mbuf mapping */ if (tpa_info->bd.m_map != NULL) { bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_mbuf_tag, tpa_info->bd.m_map); } /* save the mbuf and mapping info for the TPA mbuf */ map = tpa_info->bd.m_map; tpa_info->bd.m_map = fp->rx_tpa_info_mbuf_spare_map; fp->rx_tpa_info_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_mbuf_tag, tpa_info->bd.m_map, BUS_DMASYNC_PREREAD); tpa_info->bd.m = m; tpa_info->seg = segs[0]; return (rc); } /* * Allocate an mbuf and assign it to the receive scatter gather chain. The * caller must take care to save a copy of the existing mbuf in the SG mbuf * chain. */ static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index) { struct bxe_sw_rx_bd *sge_buf; struct eth_rx_sge *sge; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *m; int nsegs; int rc = 0; /* allocate a new SGE mbuf */ m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, SGE_PAGE_SIZE); if (__predict_false(m == NULL)) { fp->eth_q_stats.mbuf_rx_sge_alloc_failed++; return (ENOMEM); } fp->eth_q_stats.mbuf_alloc_sge++; /* initialize the mbuf buffer length */ m->m_pkthdr.len = m->m_len = SGE_PAGE_SIZE; /* map the SGE mbuf into non-paged pool */ rc = bus_dmamap_load_mbuf_sg(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(rc != 0)) { fp->eth_q_stats.mbuf_rx_sge_mapping_failed++; m_freem(m); fp->eth_q_stats.mbuf_alloc_sge--; return (rc); } /* all mbufs must map to a single segment */ KASSERT((nsegs == 1), ("Too many segments, %d returned!", nsegs)); sge_buf = &fp->rx_sge_mbuf_chain[index]; /* release any existing SGE mbuf mapping */ if (sge_buf->m_map != NULL) { bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(fp->rx_sge_mbuf_tag, sge_buf->m_map); } /* save the mbuf and mapping info for a future packet */ map = sge_buf->m_map; sge_buf->m_map = fp->rx_sge_mbuf_spare_map; fp->rx_sge_mbuf_spare_map = map; bus_dmamap_sync(fp->rx_sge_mbuf_tag, sge_buf->m_map, BUS_DMASYNC_PREREAD); sge_buf->m = m; sge = &fp->rx_sge_chain[index]; sge->addr_hi = htole32(U64_HI(segs[0].ds_addr)); sge->addr_lo = htole32(U64_LO(segs[0].ds_addr)); return (rc); } static __noinline int bxe_alloc_fp_buffers(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, j, rc = 0; int ring_prod, cqe_ring_prod; int max_agg_queues; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; ring_prod = cqe_ring_prod = 0; fp->rx_bd_cons = 0; fp->rx_cq_cons = 0; /* allocate buffers for the RX BDs in RX BD chain */ for (j = 0; j < sc->max_rx_bufs; j++) { rc = bxe_alloc_rx_bd_mbuf(fp, ring_prod, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] rx chain (%d)\n", i, rc); goto bxe_alloc_fp_buffers_error; } ring_prod = RX_BD_NEXT(ring_prod); cqe_ring_prod = RCQ_NEXT(cqe_ring_prod); } fp->rx_bd_prod = ring_prod; fp->rx_cq_prod = cqe_ring_prod; fp->eth_q_stats.rx_calls = fp->eth_q_stats.rx_pkts = 0; max_agg_queues = MAX_AGG_QS(sc); fp->tpa_enable = TRUE; /* fill the TPA pool */ for (j = 0; j < max_agg_queues; j++) { rc = bxe_alloc_rx_tpa_mbuf(fp, j); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] TPA queue %d\n", i, j); fp->tpa_enable = FALSE; goto bxe_alloc_fp_buffers_error; } fp->rx_tpa_info[j].state = BXE_TPA_STATE_STOP; } if (fp->tpa_enable) { /* fill the RX SGE chain */ ring_prod = 0; for (j = 0; j < RX_SGE_USABLE; j++) { rc = bxe_alloc_rx_sge_mbuf(fp, ring_prod); if (rc != 0) { BLOGE(sc, "mbuf alloc fail for fp[%02d] SGE %d\n", i, ring_prod); fp->tpa_enable = FALSE; ring_prod = 0; goto bxe_alloc_fp_buffers_error; } ring_prod = RX_SGE_NEXT(ring_prod); } fp->rx_sge_prod = ring_prod; } } return (0); bxe_alloc_fp_buffers_error: /* unwind what was already allocated */ bxe_free_rx_bd_chain(fp); bxe_free_tpa_pool(fp); bxe_free_sge_chain(fp); return (ENOBUFS); } static void bxe_free_fw_stats_mem(struct bxe_softc *sc) { bxe_dma_free(sc, &sc->fw_stats_dma); sc->fw_stats_num = 0; sc->fw_stats_req_size = 0; sc->fw_stats_req = NULL; sc->fw_stats_req_mapping = 0; sc->fw_stats_data_size = 0; sc->fw_stats_data = NULL; sc->fw_stats_data_mapping = 0; } static int bxe_alloc_fw_stats_mem(struct bxe_softc *sc) { uint8_t num_queue_stats; int num_groups; /* number of queues for statistics is number of eth queues */ num_queue_stats = BXE_NUM_ETH_QUEUES(sc); /* * Total number of FW statistics requests = * 1 for port stats + 1 for PF stats + num of queues */ sc->fw_stats_num = (2 + num_queue_stats); /* * Request is built from stats_query_header and an array of * stats_query_cmd_group each of which contains STATS_QUERY_CMD_COUNT * rules. The real number or requests is configured in the * stats_query_header. */ num_groups = ((sc->fw_stats_num / STATS_QUERY_CMD_COUNT) + ((sc->fw_stats_num % STATS_QUERY_CMD_COUNT) ? 1 : 0)); BLOGD(sc, DBG_LOAD, "stats fw_stats_num %d num_groups %d\n", sc->fw_stats_num, num_groups); sc->fw_stats_req_size = (sizeof(struct stats_query_header) + (num_groups * sizeof(struct stats_query_cmd_group))); /* * Data for statistics requests + stats_counter. * stats_counter holds per-STORM counters that are incremented when * STORM has finished with the current request. Memory for FCoE * offloaded statistics are counted anyway, even if they will not be sent. * VF stats are not accounted for here as the data of VF stats is stored * in memory allocated by the VF, not here. */ sc->fw_stats_data_size = (sizeof(struct stats_counter) + sizeof(struct per_port_stats) + sizeof(struct per_pf_stats) + /* sizeof(struct fcoe_statistics_params) + */ (sizeof(struct per_queue_stats) * num_queue_stats)); if (bxe_dma_alloc(sc, (sc->fw_stats_req_size + sc->fw_stats_data_size), &sc->fw_stats_dma, "fw stats") != 0) { bxe_free_fw_stats_mem(sc); return (-1); } /* set up the shortcuts */ sc->fw_stats_req = (struct bxe_fw_stats_req *)sc->fw_stats_dma.vaddr; sc->fw_stats_req_mapping = sc->fw_stats_dma.paddr; sc->fw_stats_data = (struct bxe_fw_stats_data *)((uint8_t *)sc->fw_stats_dma.vaddr + sc->fw_stats_req_size); sc->fw_stats_data_mapping = (sc->fw_stats_dma.paddr + sc->fw_stats_req_size); BLOGD(sc, DBG_LOAD, "statistics request base address set to %#jx\n", (uintmax_t)sc->fw_stats_req_mapping); BLOGD(sc, DBG_LOAD, "statistics data base address set to %#jx\n", (uintmax_t)sc->fw_stats_data_mapping); return (0); } /* * Bits map: * 0-7 - Engine0 load counter. * 8-15 - Engine1 load counter. * 16 - Engine0 RESET_IN_PROGRESS bit. * 17 - Engine1 RESET_IN_PROGRESS bit. * 18 - Engine0 ONE_IS_LOADED. Set when there is at least one active * function on the engine * 19 - Engine1 ONE_IS_LOADED. * 20 - Chip reset flow bit. When set none-leader must wait for both engines * leader to complete (check for both RESET_IN_PROGRESS bits and not * for just the one belonging to its engine). */ #define BXE_RECOVERY_GLOB_REG MISC_REG_GENERIC_POR_1 #define BXE_PATH0_LOAD_CNT_MASK 0x000000ff #define BXE_PATH0_LOAD_CNT_SHIFT 0 #define BXE_PATH1_LOAD_CNT_MASK 0x0000ff00 #define BXE_PATH1_LOAD_CNT_SHIFT 8 #define BXE_PATH0_RST_IN_PROG_BIT 0x00010000 #define BXE_PATH1_RST_IN_PROG_BIT 0x00020000 #define BXE_GLOBAL_RESET_BIT 0x00040000 /* set the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_set_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val | BXE_GLOBAL_RESET_BIT); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear the GLOBAL_RESET bit, should be run under rtnl lock */ static void bxe_clear_reset_global(struct bxe_softc *sc) { uint32_t val; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val & (~BXE_GLOBAL_RESET_BIT)); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* checks the GLOBAL_RESET bit, should be run under rtnl lock */ static uint8_t bxe_reset_is_global(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "GLOB_REG=0x%08x\n", val); return (val & BXE_GLOBAL_RESET_BIT) ? TRUE : FALSE; } /* clear RESET_IN_PROGRESS bit for the engine, should be run under rtnl lock */ static void bxe_set_reset_done(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Clear the bit */ val &= ~bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* set RESET_IN_PROGRESS for the engine, should be run under rtnl lock */ static void bxe_set_reset_in_progress(struct bxe_softc *sc) { uint32_t val; uint32_t bit = SC_PATH(sc) ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); /* Set the bit */ val |= bit; REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* check RESET_IN_PROGRESS bit for an engine, should be run under rtnl lock */ static uint8_t bxe_reset_is_done(struct bxe_softc *sc, int engine) { uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); uint32_t bit = engine ? BXE_PATH1_RST_IN_PROG_BIT : BXE_PATH0_RST_IN_PROG_BIT; /* return false if bit is set */ return (val & bit) ? FALSE : TRUE; } /* get the load status for an engine, should be run under rtnl lock */ static uint8_t bxe_get_load_status(struct bxe_softc *sc, int engine) { uint32_t mask = engine ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = engine ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; uint32_t val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); val = ((val & mask) >> shift); BLOGD(sc, DBG_LOAD, "Load mask engine %d = 0x%08x\n", engine, val); return (val != 0); } /* set pf load mark */ /* XXX needs to be under rtnl lock */ static void bxe_set_pf_load(struct bxe_softc *sc) { uint32_t val; uint32_t val1; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old value for GLOB_REG=0x%08x\n", val); /* get the current counter value */ val1 = ((val & mask) >> shift); /* set bit of this PF */ val1 |= (1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); } /* clear pf load mark */ /* XXX needs to be under rtnl lock */ static uint8_t bxe_clear_pf_load(struct bxe_softc *sc) { uint32_t val1, val; uint32_t mask = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_MASK : BXE_PATH0_LOAD_CNT_MASK; uint32_t shift = SC_PATH(sc) ? BXE_PATH1_LOAD_CNT_SHIFT : BXE_PATH0_LOAD_CNT_SHIFT; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); val = REG_RD(sc, BXE_RECOVERY_GLOB_REG); BLOGD(sc, DBG_LOAD, "Old GEN_REG_VAL=0x%08x\n", val); /* get the current counter value */ val1 = (val & mask) >> shift; /* clear bit of that PF */ val1 &= ~(1 << SC_ABS_FUNC(sc)); /* clear the old value */ val &= ~mask; /* set the new one */ val |= ((val1 << shift) & mask); REG_WR(sc, BXE_RECOVERY_GLOB_REG, val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG); return (val1 != 0); } /* send load requrest to mcp and analyze response */ static int bxe_nic_load_request(struct bxe_softc *sc, uint32_t *load_code) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "initial fw_seq 0x%04x\n", sc->fw_seq); /* get the current FW pulse sequence */ sc->fw_drv_pulse_wr_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_pulse_mb) & DRV_PULSE_SEQ_MASK); BLOGD(sc, DBG_LOAD, "initial drv_pulse 0x%04x\n", sc->fw_drv_pulse_wr_seq); /* load request */ (*load_code) = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); /* if the MCP fails to respond we must abort */ if (!(*load_code)) { BLOGE(sc, "MCP response failure!\n"); return (-1); } /* if MCP refused then must abort */ if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) { BLOGE(sc, "MCP refused load request\n"); return (-1); } return (0); } /* * Check whether another PF has already loaded FW to chip. In virtualized * environments a pf from anoth VM may have already initialized the device * including loading FW. */ static int bxe_nic_load_analyze_req(struct bxe_softc *sc, uint32_t load_code) { uint32_t my_fw, loaded_fw; /* is another pf loaded on this engine? */ if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { /* build my FW version dword */ my_fw = (BCM_5710_FW_MAJOR_VERSION + (BCM_5710_FW_MINOR_VERSION << 8 ) + (BCM_5710_FW_REVISION_VERSION << 16) + (BCM_5710_FW_ENGINEERING_VERSION << 24)); /* read loaded FW from chip */ loaded_fw = REG_RD(sc, XSEM_REG_PRAM); BLOGD(sc, DBG_LOAD, "loaded FW 0x%08x / my FW 0x%08x\n", loaded_fw, my_fw); /* abort nic load if version mismatch */ if (my_fw != loaded_fw) { BLOGE(sc, "FW 0x%08x already loaded (mine is 0x%08x)", loaded_fw, my_fw); return (-1); } } return (0); } /* mark PMF if applicable */ static void bxe_nic_load_pmf(struct bxe_softc *sc, uint32_t load_code) { uint32_t ncsi_oem_data_addr; if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) || (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) || (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) { /* * Barrier here for ordering between the writing to sc->port.pmf here * and reading it from the periodic task. */ sc->port.pmf = 1; mb(); } else { sc->port.pmf = 0; } BLOGD(sc, DBG_LOAD, "pmf %d\n", sc->port.pmf); /* XXX needed? */ if (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) { if (SHMEM2_HAS(sc, ncsi_oem_data_addr)) { ncsi_oem_data_addr = SHMEM2_RD(sc, ncsi_oem_data_addr); if (ncsi_oem_data_addr) { REG_WR(sc, (ncsi_oem_data_addr + offsetof(struct glob_ncsi_oem_data, driver_version)), 0); } } } } static void bxe_read_mf_cfg(struct bxe_softc *sc) { int n = (CHIP_IS_MODE_4_PORT(sc) ? 2 : 1); int abs_func; int vn; if (BXE_NOMCP(sc)) { return; /* what should be the default bvalue in this case */ } /* * The formula for computing the absolute function number is... * For 2 port configuration (4 functions per port): * abs_func = 2 * vn + SC_PORT + SC_PATH * For 4 port configuration (2 functions per port): * abs_func = 4 * vn + 2 * SC_PORT + SC_PATH */ for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { abs_func = (n * (2 * vn + SC_PORT(sc)) + SC_PATH(sc)); if (abs_func >= E1H_FUNC_MAX) { break; } sc->devinfo.mf_info.mf_config[vn] = MFCFG_RD(sc, func_mf_config[abs_func].config); } if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_LOAD, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; } else { BLOGD(sc, DBG_LOAD, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; } } /* acquire split MCP access lock register */ static int bxe_acquire_alr(struct bxe_softc *sc) { uint32_t j, val; for (j = 0; j < 1000; j++) { val = (1UL << 31); REG_WR(sc, GRCBASE_MCP + 0x9c, val); val = REG_RD(sc, GRCBASE_MCP + 0x9c); if (val & (1L << 31)) break; DELAY(5000); } if (!(val & (1L << 31))) { BLOGE(sc, "Cannot acquire MCP access lock register\n"); return (-1); } return (0); } /* release split MCP access lock register */ static void bxe_release_alr(struct bxe_softc *sc) { REG_WR(sc, GRCBASE_MCP + 0x9c, 0); } static void bxe_fan_failure(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t ext_phy_config; /* mark the failure */ ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); ext_phy_config &= ~PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK; ext_phy_config |= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE; SHMEM_WR(sc, dev_info.port_hw_config[port].external_phy_config, ext_phy_config); /* log the failure */ BLOGW(sc, "Fan Failure has caused the driver to shutdown " "the card to prevent permanent damage. " "Please contact OEM Support for assistance\n"); /* XXX */ #if 1 bxe_panic(sc, ("Schedule task to handle fan failure\n")); #else /* * Schedule device reset (unload) * This is due to some boards consuming sufficient power when driver is * up to overheat if fan fails. */ bxe_set_bit(BXE_SP_RTNL_FAN_FAILURE, &sc->sp_rtnl_state); schedule_delayed_work(&sc->sp_rtnl_task, 0); #endif } /* this function is called upon a link interrupt */ static void bxe_link_attn(struct bxe_softc *sc) { uint32_t pause_enabled = 0; struct host_port_stats *pstats; int cmng_fns; struct bxe_fastpath *fp; int i; /* Make sure that we are synced with the current statistics */ bxe_stats_handle(sc, STATS_EVENT_STOP); BLOGI(sc, "link_vars phy_flags : %x\n", sc->link_vars.phy_flags); elink_link_update(&sc->link_params, &sc->link_vars); if (sc->link_vars.link_up) { /* dropless flow control */ if (!CHIP_IS_E1(sc) && sc->dropless_fc) { pause_enabled = 0; if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { pause_enabled = 1; } REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_ETH_PAUSE_ENABLED_OFFSET(SC_PORT(sc))), pause_enabled); } if (sc->link_vars.mac_type != ELINK_MAC_TYPE_EMAC) { pstats = BXE_SP(sc, port_stats); /* reset old mac stats */ memset(&(pstats->mac_stx[0]), 0, sizeof(struct mac_stx)); } if (sc->state == BXE_STATE_OPEN) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } /* Restart tx when the link comes back. */ FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; taskqueue_enqueue(fp->tq, &fp->tx_task); } } if (sc->link_vars.link_up && sc->link_vars.line_speed) { cmng_fns = bxe_get_cmng_fns_mode(sc); if (cmng_fns != CMNG_FNS_NONE) { bxe_cmng_fns_init(sc, FALSE, cmng_fns); storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } else { /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "single function mode without fairness\n"); } } bxe_link_report_locked(sc); if (IS_MF(sc)) { ; // XXX bxe_link_sync_notify(sc); } } static void bxe_attn_int_asserted(struct bxe_softc *sc, uint32_t asserted) { int port = SC_PORT(sc); uint32_t aeu_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; uint32_t nig_int_mask_addr = port ? NIG_REG_MASK_INTERRUPT_PORT1 : NIG_REG_MASK_INTERRUPT_PORT0; uint32_t aeu_mask; uint32_t nig_mask = 0; uint32_t reg_addr; uint32_t igu_acked; uint32_t cnt; if (sc->attn_state & asserted) { BLOGE(sc, "IGU ERROR attn=0x%08x\n", asserted); } bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, aeu_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly asserted 0x%08x\n", aeu_mask, asserted); aeu_mask &= ~(asserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, aeu_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state |= asserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); if (asserted & ATTN_HARD_WIRED_MASK) { if (asserted & ATTN_NIG_FOR_FUNC) { bxe_acquire_phy_lock(sc); /* save nig interrupt mask */ nig_mask = REG_RD(sc, nig_int_mask_addr); /* If nig_mask is not set, no need to call the update function */ if (nig_mask) { REG_WR(sc, nig_int_mask_addr, 0); bxe_link_attn(sc); } /* handle unicore attn? */ } if (asserted & ATTN_SW_TIMER_4_FUNC) { BLOGD(sc, DBG_INTR, "ATTN_SW_TIMER_4_FUNC!\n"); } if (asserted & GPIO_2_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_2_FUNC!\n"); } if (asserted & GPIO_3_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_3_FUNC!\n"); } if (asserted & GPIO_4_FUNC) { BLOGD(sc, DBG_INTR, "GPIO_4_FUNC!\n"); } if (port == 0) { if (asserted & ATTN_GENERAL_ATTN_1) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_1!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_1, 0x0); } if (asserted & ATTN_GENERAL_ATTN_2) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_2!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_2, 0x0); } if (asserted & ATTN_GENERAL_ATTN_3) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_3!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_3, 0x0); } } else { if (asserted & ATTN_GENERAL_ATTN_4) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_4!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_4, 0x0); } if (asserted & ATTN_GENERAL_ATTN_5) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_5!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_5, 0x0); } if (asserted & ATTN_GENERAL_ATTN_6) { BLOGD(sc, DBG_INTR, "ATTN_GENERAL_ATTN_6!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_6, 0x0); } } } /* hardwired */ if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_SET); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_SET_UPPER*8); } BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", asserted, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, asserted); /* now set back the mask */ if (asserted & ATTN_NIG_FOR_FUNC) { /* * Verify that IGU ack through BAR was written before restoring * NIG mask. This loop should exit after 2-3 iterations max. */ if (sc->devinfo.int_block != INT_BLOCK_HC) { cnt = 0; do { igu_acked = REG_RD(sc, IGU_REG_ATTENTION_ACK_BITS); } while (((igu_acked & ATTN_NIG_FOR_FUNC) == 0) && (++cnt < MAX_IGU_ATTN_ACK_TO)); if (!igu_acked) { BLOGE(sc, "Failed to verify IGU ack on time\n"); } mb(); } REG_WR(sc, nig_int_mask_addr, nig_mask); bxe_release_phy_lock(sc); } } static void bxe_print_next_block(struct bxe_softc *sc, int idx, const char *blk) { BLOGI(sc, "%s%s", idx ? ", " : "", blk); } static int bxe_check_blocks_with_parity0(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "BRB"); break; case AEU_INPUTS_ATTN_BITS_PARSER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PARSER"); break; case AEU_INPUTS_ATTN_BITS_TSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSDM"); break; case AEU_INPUTS_ATTN_BITS_SEARCHER_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "SEARCHER"); break; case AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TCM"); break; case AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TSEMI"); break; case AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XPB"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity1(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { int i = 0; uint32_t cur_bit = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PBF_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PBF"); break; case AEU_INPUTS_ATTN_BITS_QM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "QM"); break; case AEU_INPUTS_ATTN_BITS_TIMERS_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "TM"); break; case AEU_INPUTS_ATTN_BITS_XSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSDM"); break; case AEU_INPUTS_ATTN_BITS_XCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XCM"); break; case AEU_INPUTS_ATTN_BITS_XSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "XSEMI"); break; case AEU_INPUTS_ATTN_BITS_DOORBELLQ_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DOORBELLQ"); break; case AEU_INPUTS_ATTN_BITS_NIG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "NIG"); break; case AEU_INPUTS_ATTN_BITS_VAUX_PCI_CORE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "VAUX PCI CORE"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_DEBUG_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DEBUG"); break; case AEU_INPUTS_ATTN_BITS_USDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USDM"); break; case AEU_INPUTS_ATTN_BITS_UCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UCM"); break; case AEU_INPUTS_ATTN_BITS_USEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "USEMI"); break; case AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "UPB"); break; case AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSDM"); break; case AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CCM"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity2(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_CSEMI_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CSEMI"); break; case AEU_INPUTS_ATTN_BITS_PXP_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXP"); break; case AEU_IN_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PXPPCICLOCKCLIENT"); break; case AEU_INPUTS_ATTN_BITS_CFC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CFC"); break; case AEU_INPUTS_ATTN_BITS_CDU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "CDU"); break; case AEU_INPUTS_ATTN_BITS_DMAE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "DMAE"); break; case AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "IGU"); break; case AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "MISC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity3(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t *global, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP ROM"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP RX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP UMP TX"); *global = TRUE; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY: if (print) bxe_print_next_block(sc, par_num++, "MCP SCPAD"); *global = TRUE; break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static int bxe_check_blocks_with_parity4(struct bxe_softc *sc, uint32_t sig, int par_num, uint8_t print) { uint32_t cur_bit = 0; int i = 0; for (i = 0; sig; i++) { cur_bit = ((uint32_t)0x1 << i); if (sig & cur_bit) { switch (cur_bit) { case AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "PGLUE_B"); break; case AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR: if (print) bxe_print_next_block(sc, par_num++, "ATC"); break; } /* Clear the bit */ sig &= ~cur_bit; } } return (par_num); } static uint8_t bxe_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print, uint32_t *sig) { int par_num = 0; if ((sig[0] & HW_PRTY_ASSERT_SET_0) || (sig[1] & HW_PRTY_ASSERT_SET_1) || (sig[2] & HW_PRTY_ASSERT_SET_2) || (sig[3] & HW_PRTY_ASSERT_SET_3) || (sig[4] & HW_PRTY_ASSERT_SET_4)) { BLOGE(sc, "Parity error: HW block parity attention:\n" "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n", (uint32_t)(sig[0] & HW_PRTY_ASSERT_SET_0), (uint32_t)(sig[1] & HW_PRTY_ASSERT_SET_1), (uint32_t)(sig[2] & HW_PRTY_ASSERT_SET_2), (uint32_t)(sig[3] & HW_PRTY_ASSERT_SET_3), (uint32_t)(sig[4] & HW_PRTY_ASSERT_SET_4)); if (print) BLOGI(sc, "Parity errors detected in blocks: "); par_num = bxe_check_blocks_with_parity0(sc, sig[0] & HW_PRTY_ASSERT_SET_0, par_num, print); par_num = bxe_check_blocks_with_parity1(sc, sig[1] & HW_PRTY_ASSERT_SET_1, par_num, global, print); par_num = bxe_check_blocks_with_parity2(sc, sig[2] & HW_PRTY_ASSERT_SET_2, par_num, print); par_num = bxe_check_blocks_with_parity3(sc, sig[3] & HW_PRTY_ASSERT_SET_3, par_num, global, print); par_num = bxe_check_blocks_with_parity4(sc, sig[4] & HW_PRTY_ASSERT_SET_4, par_num, print); if (print) BLOGI(sc, "\n"); return (TRUE); } return (FALSE); } static uint8_t bxe_chk_parity_attn(struct bxe_softc *sc, uint8_t *global, uint8_t print) { struct attn_route attn = { {0} }; int port = SC_PORT(sc); attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); /* * Since MCP attentions can't be disabled inside the block, we need to * read AEU registers to see whether they're currently disabled */ attn.sig[3] &= ((REG_RD(sc, (!port ? MISC_REG_AEU_ENABLE4_FUNC_0_OUT_0 : MISC_REG_AEU_ENABLE4_FUNC_1_OUT_0)) & MISC_AEU_ENABLE_MCP_PRTY_BITS) | ~MISC_AEU_ENABLE_MCP_PRTY_BITS); if (!CHIP_IS_E1x(sc)) attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); return (bxe_parity_attn(sc, global, print, attn.sig)); } static void bxe_attn_int_deasserted4(struct bxe_softc *sc, uint32_t attn) { uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_PGLUE_HW_INTERRUPT) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS_CLR); BLOGE(sc, "PGLUE hw attention 0x%08x\n", val); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN\n"); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW) BLOGE(sc, "PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW\n"); } if (attn & AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT) { val = REG_RD(sc, ATC_REG_ATC_INT_STS_CLR); BLOGE(sc, "ATC hw attention 0x%08x\n", val); if (val & ATC_ATC_INT_STS_REG_ADDRESS_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ADDRESS_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND\n"); if (val & ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS\n"); if (val & ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT\n"); if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR\n"); if (val & ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU) BLOGE(sc, "ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU\n"); } if (attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)) { BLOGE(sc, "FATAL parity attention set4 0x%08x\n", (uint32_t)(attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR))); } } static void bxe_e1h_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); bxe_tx_disable(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 0); } static void bxe_e1h_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); // XXX bxe_tx_enable(sc); } /* * called due to MCP event (on pmf): * reread new bandwidth configuration * configure FW * notify others function about the change */ static void bxe_config_mf_bw(struct bxe_softc *sc) { if (sc->link_vars.link_up) { bxe_cmng_fns_init(sc, TRUE, CMNG_FNS_MINMAX); // XXX bxe_link_sync_notify(sc); } storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } static void bxe_set_mf_bw(struct bxe_softc *sc) { bxe_config_mf_bw(sc); bxe_fw_command(sc, DRV_MSG_CODE_SET_MF_BW_ACK, 0); } static void bxe_handle_eee_event(struct bxe_softc *sc) { BLOGD(sc, DBG_INTR, "EEE - LLDP event\n"); bxe_fw_command(sc, DRV_MSG_CODE_EEE_RESULTS_ACK, 0); } #define DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED 3 static void bxe_drv_info_ether_stat(struct bxe_softc *sc) { struct eth_stats_info *ether_stat = &sc->sp->drv_info_to_mcp.ether_stat; strlcpy(ether_stat->version, BXE_DRIVER_VERSION, ETH_STAT_INFO_VERSION_LEN); /* XXX (+ MAC_PAD) taken from other driver... verify this is right */ sc->sp_objs[0].mac_obj.get_n_elements(sc, &sc->sp_objs[0].mac_obj, DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, ether_stat->mac_local + MAC_PAD, MAC_PAD, ETH_ALEN); ether_stat->mtu_size = sc->mtu; ether_stat->feature_flags |= FEATURE_ETH_CHKSUM_OFFLOAD_MASK; if (if_getcapenable(sc->ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) { ether_stat->feature_flags |= FEATURE_ETH_LSO_MASK; } // XXX ether_stat->feature_flags |= ???; ether_stat->promiscuous_mode = 0; // (flags & PROMISC) ? 1 : 0; ether_stat->txq_size = sc->tx_ring_size; ether_stat->rxq_size = sc->rx_ring_size; } static void bxe_handle_drv_info_req(struct bxe_softc *sc) { enum drv_info_opcode op_code; uint32_t drv_info_ctl = SHMEM2_RD(sc, drv_info_control); /* if drv_info version supported by MFW doesn't match - send NACK */ if ((drv_info_ctl & DRV_INFO_CONTROL_VER_MASK) != DRV_INFO_CUR_VER) { bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } op_code = ((drv_info_ctl & DRV_INFO_CONTROL_OP_CODE_MASK) >> DRV_INFO_CONTROL_OP_CODE_SHIFT); memset(&sc->sp->drv_info_to_mcp, 0, sizeof(union drv_info_to_mcp)); switch (op_code) { case ETH_STATS_OPCODE: bxe_drv_info_ether_stat(sc); break; case FCOE_STATS_OPCODE: case ISCSI_STATS_OPCODE: default: /* if op code isn't supported - send NACK */ bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_NACK, 0); return; } /* * If we got drv_info attn from MFW then these fields are defined in * shmem2 for sure */ SHMEM2_WR(sc, drv_info_host_addr_lo, U64_LO(BXE_SP_MAPPING(sc, drv_info_to_mcp))); SHMEM2_WR(sc, drv_info_host_addr_hi, U64_HI(BXE_SP_MAPPING(sc, drv_info_to_mcp))); bxe_fw_command(sc, DRV_MSG_CODE_DRV_INFO_ACK, 0); } static void bxe_dcc_event(struct bxe_softc *sc, uint32_t dcc_event) { BLOGD(sc, DBG_INTR, "dcc_event 0x%08x\n", dcc_event); if (dcc_event & DRV_STATUS_DCC_DISABLE_ENABLE_PF) { /* * This is the only place besides the function initialization * where the sc->flags can change so it is done without any * locks */ if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { BLOGD(sc, DBG_INTR, "mf_cfg function disabled\n"); sc->flags |= BXE_MF_FUNC_DIS; bxe_e1h_disable(sc); } else { BLOGD(sc, DBG_INTR, "mf_cfg function enabled\n"); sc->flags &= ~BXE_MF_FUNC_DIS; bxe_e1h_enable(sc); } dcc_event &= ~DRV_STATUS_DCC_DISABLE_ENABLE_PF; } if (dcc_event & DRV_STATUS_DCC_BANDWIDTH_ALLOCATION) { bxe_config_mf_bw(sc); dcc_event &= ~DRV_STATUS_DCC_BANDWIDTH_ALLOCATION; } /* Report results to MCP */ if (dcc_event) bxe_fw_command(sc, DRV_MSG_CODE_DCC_FAILURE, 0); else bxe_fw_command(sc, DRV_MSG_CODE_DCC_OK, 0); } static void bxe_pmf_update(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; sc->port.pmf = 1; BLOGD(sc, DBG_INTR, "pmf %d\n", sc->port.pmf); /* * We need the mb() to ensure the ordering between the writing to * sc->port.pmf here and reading it from the bxe_periodic_task(). */ mb(); /* queue a periodic task */ // XXX schedule task... // XXX bxe_dcbx_pmf_update(sc); /* enable nig attention */ val = (0xff0f | (1 << (SC_VN(sc) + 4))); if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, val); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, val); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); } bxe_stats_handle(sc, STATS_EVENT_PMF); } static int bxe_mc_assert(struct bxe_softc *sc) { char last_idx; int i, rc = 0; uint32_t row0, row1, row2, row3; /* XSTORM */ last_idx = REG_RD8(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) BLOGE(sc, "XSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "XSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* TSTORM */ last_idx = REG_RD8(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "TSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "TSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* CSTORM */ last_idx = REG_RD8(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "CSTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "CSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } /* USTORM */ last_idx = REG_RD8(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_INDEX_OFFSET); if (last_idx) { BLOGE(sc, "USTORM_ASSERT_LIST_INDEX 0x%x\n", last_idx); } /* print the asserts */ for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) { row0 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i)); row1 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 4); row2 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 8); row3 = REG_RD(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_OFFSET(i) + 12); if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) { BLOGE(sc, "USTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x\n", i, row3, row2, row1, row0); rc++; } else { break; } } return (rc); } static void bxe_attn_int_deasserted3(struct bxe_softc *sc, uint32_t attn) { int func = SC_FUNC(sc); uint32_t val; if (attn & EVEREST_GEN_ATTN_IN_USE_MASK) { if (attn & BXE_PMF_LINK_ASSERT(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); bxe_read_mf_cfg(sc); sc->devinfo.mf_info.mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); val = SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_status); if (val & DRV_STATUS_DCC_EVENT_MASK) bxe_dcc_event(sc, (val & DRV_STATUS_DCC_EVENT_MASK)); if (val & DRV_STATUS_SET_MF_BW) bxe_set_mf_bw(sc); if (val & DRV_STATUS_DRV_INFO_REQ) bxe_handle_drv_info_req(sc); if ((sc->port.pmf == 0) && (val & DRV_STATUS_PMF)) bxe_pmf_update(sc); if (val & DRV_STATUS_EEE_NEGOTIATION_RESULTS) bxe_handle_eee_event(sc); if (sc->link_vars.periodic_flags & ELINK_PERIODIC_FLAGS_LINK_EVENT) { /* sync with link */ bxe_acquire_phy_lock(sc); sc->link_vars.periodic_flags &= ~ELINK_PERIODIC_FLAGS_LINK_EVENT; bxe_release_phy_lock(sc); if (IS_MF(sc)) ; // XXX bxe_link_sync_notify(sc); bxe_link_report(sc); } /* * Always call it here: bxe_link_report() will * prevent the link indication duplication. */ bxe_link_status_update(sc); } else if (attn & BXE_MC_ASSERT_BITS) { BLOGE(sc, "MC assert!\n"); bxe_mc_assert(sc); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_10, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_9, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_8, 0); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_7, 0); bxe_panic(sc, ("MC assert!\n")); } else if (attn & BXE_MCP_ASSERT) { BLOGE(sc, "MCP assert!\n"); REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_11, 0); // XXX bxe_fw_dump(sc); } else { BLOGE(sc, "Unknown HW assert! (attn 0x%08x)\n", attn); } } if (attn & EVEREST_LATCHED_ATTN_IN_USE_MASK) { BLOGE(sc, "LATCHED attention 0x%08x (masked)\n", attn); if (attn & BXE_GRC_TIMEOUT) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_TIMEOUT_ATTN); BLOGE(sc, "GRC time-out 0x%08x\n", val); } if (attn & BXE_GRC_RSV) { val = CHIP_IS_E1(sc) ? 0 : REG_RD(sc, MISC_REG_GRC_RSV_ATTN); BLOGE(sc, "GRC reserved 0x%08x\n", val); } REG_WR(sc, MISC_REG_AEU_CLR_LATCH_SIGNAL, 0x7ff); } } static void bxe_attn_int_deasserted2(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val0, mask0, val1, mask1; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_CFC_HW_INTERRUPT) { val = REG_RD(sc, CFC_REG_CFC_INT_STS_CLR); BLOGE(sc, "CFC hw attention 0x%08x\n", val); /* CFC error attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from CFC\n"); } } if (attn & AEU_INPUTS_ATTN_BITS_PXP_HW_INTERRUPT) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_0); BLOGE(sc, "PXP hw attention-0 0x%08x\n", val); /* RQ_USDMDP_FIFO_OVERFLOW */ if (val & 0x18000) { BLOGE(sc, "FATAL error from PXP\n"); } if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_1); BLOGE(sc, "PXP hw attention-1 0x%08x\n", val); } } #define PXP2_EOP_ERROR_BIT PXP2_PXP2_INT_STS_CLR_0_REG_WR_PGLUE_EOP_ERROR #define AEU_PXP2_HW_INT_BIT AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_HW_INTERRUPT if (attn & AEU_PXP2_HW_INT_BIT) { /* CQ47854 workaround do not panic on * PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR */ if (!CHIP_IS_E1x(sc)) { mask0 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_0); val1 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_1); mask1 = REG_RD(sc, PXP2_REG_PXP2_INT_MASK_1); val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_0); /* * If the only PXP2_EOP_ERROR_BIT is set in * STS0 and STS1 - clear it * * probably we lose additional attentions between * STS0 and STS_CLR0, in this case user will not * be notified about them */ if (val0 & mask0 & PXP2_EOP_ERROR_BIT && !(val1 & mask1)) val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); /* print the register, since no one can restore it */ BLOGE(sc, "PXP2_REG_PXP2_INT_STS_CLR_0 0x%08x\n", val0); /* * if PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR * then notify */ if (val0 & PXP2_EOP_ERROR_BIT) { BLOGE(sc, "PXP2_WR_PGLUE_EOP_ERROR\n"); /* * if only PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR is * set then clear attention from PXP2 block without panic */ if (((val0 & mask0) == PXP2_EOP_ERROR_BIT) && ((val1 & mask1) == 0)) attn &= ~AEU_PXP2_HW_INT_BIT; } } } if (attn & HW_INTERRUT_ASSERT_SET_2) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_2 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_2); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set2 0x%x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_2)); bxe_panic(sc, ("HW block attention set2\n")); } } static void bxe_attn_int_deasserted1(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; if (attn & AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT) { val = REG_RD(sc, DORQ_REG_DORQ_INT_STS_CLR); BLOGE(sc, "DB hw attention 0x%08x\n", val); /* DORQ discard attention */ if (val & 0x2) { BLOGE(sc, "FATAL error from DORQ\n"); } } if (attn & HW_INTERRUT_ASSERT_SET_1) { reg_offset = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_1 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1); val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_1); REG_WR(sc, reg_offset, val); BLOGE(sc, "FATAL HW block attention set1 0x%08x\n", (uint32_t)(attn & HW_INTERRUT_ASSERT_SET_1)); bxe_panic(sc, ("HW block attention set1\n")); } } static void bxe_attn_int_deasserted0(struct bxe_softc *sc, uint32_t attn) { int port = SC_PORT(sc); int reg_offset; uint32_t val; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; if (attn & AEU_INPUTS_ATTN_BITS_SPIO5) { val = REG_RD(sc, reg_offset); val &= ~AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_offset, val); BLOGW(sc, "SPIO5 hw attention\n"); /* Fan failure attention */ elink_hw_reset_phy(&sc->link_params); bxe_fan_failure(sc); } if ((attn & sc->link_vars.aeu_int_mask) && sc->port.pmf) { bxe_acquire_phy_lock(sc); elink_handle_module_detect_int(&sc->link_params); bxe_release_phy_lock(sc); } if (attn & HW_INTERRUT_ASSERT_SET_0) { val = REG_RD(sc, reg_offset); val &= ~(attn & HW_INTERRUT_ASSERT_SET_0); REG_WR(sc, reg_offset, val); bxe_panic(sc, ("FATAL HW block attention set0 0x%lx\n", (attn & HW_INTERRUT_ASSERT_SET_0))); } } static void bxe_attn_int_deasserted(struct bxe_softc *sc, uint32_t deasserted) { struct attn_route attn; struct attn_route *group_mask; int port = SC_PORT(sc); int index; uint32_t reg_addr; uint32_t val; uint32_t aeu_mask; uint8_t global = FALSE; /* * Need to take HW lock because MCP or other port might also * try to handle this event. */ bxe_acquire_alr(sc); if (bxe_chk_parity_attn(sc, &global, TRUE)) { /* XXX * In case of parity errors don't handle attentions so that * other function would "see" parity errors. */ sc->recovery_state = BXE_RECOVERY_INIT; // XXX schedule a recovery task... /* disable HW interrupts */ bxe_int_disable(sc); bxe_release_alr(sc); return; } attn.sig[0] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + port*4); attn.sig[1] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_2_FUNC_0 + port*4); attn.sig[2] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_3_FUNC_0 + port*4); attn.sig[3] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 + port*4); if (!CHIP_IS_E1x(sc)) { attn.sig[4] = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_5_FUNC_0 + port*4); } else { attn.sig[4] = 0; } BLOGD(sc, DBG_INTR, "attn: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", attn.sig[0], attn.sig[1], attn.sig[2], attn.sig[3], attn.sig[4]); for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { if (deasserted & (1 << index)) { group_mask = &sc->attn_group[index]; BLOGD(sc, DBG_INTR, "group[%d]: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", index, group_mask->sig[0], group_mask->sig[1], group_mask->sig[2], group_mask->sig[3], group_mask->sig[4]); bxe_attn_int_deasserted4(sc, attn.sig[4] & group_mask->sig[4]); bxe_attn_int_deasserted3(sc, attn.sig[3] & group_mask->sig[3]); bxe_attn_int_deasserted1(sc, attn.sig[1] & group_mask->sig[1]); bxe_attn_int_deasserted2(sc, attn.sig[2] & group_mask->sig[2]); bxe_attn_int_deasserted0(sc, attn.sig[0] & group_mask->sig[0]); } } bxe_release_alr(sc); if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_addr = (HC_REG_COMMAND_REG + port*32 + COMMAND_REG_ATTN_BITS_CLR); } else { reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_CLR_UPPER*8); } val = ~deasserted; BLOGD(sc, DBG_INTR, "about to mask 0x%08x at %s addr 0x%08x\n", val, (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU", reg_addr); REG_WR(sc, reg_addr, val); if (~sc->attn_state & deasserted) { BLOGE(sc, "IGU error\n"); } reg_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 : MISC_REG_AEU_MASK_ATTN_FUNC_0; bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); aeu_mask = REG_RD(sc, reg_addr); BLOGD(sc, DBG_INTR, "aeu_mask 0x%08x newly deasserted 0x%08x\n", aeu_mask, deasserted); aeu_mask |= (deasserted & 0x3ff); BLOGD(sc, DBG_INTR, "new mask 0x%08x\n", aeu_mask); REG_WR(sc, reg_addr, aeu_mask); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port); BLOGD(sc, DBG_INTR, "attn_state 0x%08x\n", sc->attn_state); sc->attn_state &= ~deasserted; BLOGD(sc, DBG_INTR, "new state 0x%08x\n", sc->attn_state); } static void bxe_attn_int(struct bxe_softc *sc) { /* read local copy of bits */ uint32_t attn_bits = le32toh(sc->def_sb->atten_status_block.attn_bits); uint32_t attn_ack = le32toh(sc->def_sb->atten_status_block.attn_bits_ack); uint32_t attn_state = sc->attn_state; /* look for changed bits */ uint32_t asserted = attn_bits & ~attn_ack & ~attn_state; uint32_t deasserted = ~attn_bits & attn_ack & attn_state; BLOGD(sc, DBG_INTR, "attn_bits 0x%08x attn_ack 0x%08x asserted 0x%08x deasserted 0x%08x\n", attn_bits, attn_ack, asserted, deasserted); if (~(attn_bits ^ attn_ack) & (attn_bits ^ attn_state)) { BLOGE(sc, "BAD attention state\n"); } /* handle bits that were raised */ if (asserted) { bxe_attn_int_asserted(sc, asserted); } if (deasserted) { bxe_attn_int_deasserted(sc, deasserted); } } static uint16_t bxe_update_dsb_idx(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; uint16_t rc = 0; mb(); /* status block is written to by the chip */ if (sc->def_att_idx != def_sb->atten_status_block.attn_bits_index) { sc->def_att_idx = def_sb->atten_status_block.attn_bits_index; rc |= BXE_DEF_SB_ATT_IDX; } if (sc->def_idx != def_sb->sp_sb.running_index) { sc->def_idx = def_sb->sp_sb.running_index; rc |= BXE_DEF_SB_IDX; } mb(); return (rc); } static inline struct ecore_queue_sp_obj * bxe_cid_to_q_obj(struct bxe_softc *sc, uint32_t cid) { BLOGD(sc, DBG_SP, "retrieving fp from cid %d\n", cid); return (&sc->sp_objs[CID_TO_FP(cid, sc)].q_obj); } static void bxe_handle_mcast_eqe(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam; int rc; memset(&rparam, 0, sizeof(rparam)); rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* clear pending state for the last command */ sc->mcast_obj.raw.clear_pending(&sc->mcast_obj.raw); /* if there are pending mcast commands - send them */ if (sc->mcast_obj.check_pending(&sc->mcast_obj)) { rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT); if (rc < 0) { BLOGD(sc, DBG_SP, "ERROR: Failed to send pending mcast commands (%d)\n", rc); } } BXE_MCAST_UNLOCK(sc); } static void bxe_handle_classification_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { unsigned long ramrod_flags = 0; int rc = 0; uint32_t cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; struct ecore_vlan_mac_obj *vlan_mac_obj; /* always push next commands out, don't wait here */ bit_set(&ramrod_flags, RAMROD_CONT); switch (le32toh(elem->message.data.eth_event.echo) >> BXE_SWCID_SHIFT) { case ECORE_FILTER_MAC_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MAC completions\n"); vlan_mac_obj = &sc->sp_objs[cid].mac_obj; break; case ECORE_FILTER_MCAST_PENDING: BLOGD(sc, DBG_SP, "Got SETUP_MCAST completions\n"); /* * This is only relevant for 57710 where multicast MACs are * configured as unicast MACs using the same ramrod. */ bxe_handle_mcast_eqe(sc); return; default: BLOGE(sc, "Unsupported classification command: %d\n", elem->message.data.eth_event.echo); return; } rc = vlan_mac_obj->complete(sc, vlan_mac_obj, elem, &ramrod_flags); if (rc < 0) { BLOGE(sc, "Failed to schedule new commands (%d)\n", rc); } else if (rc > 0) { BLOGD(sc, DBG_SP, "Scheduled next pending commands...\n"); } } static void bxe_handle_rx_mode_eqe(struct bxe_softc *sc, union event_ring_elem *elem) { bxe_clear_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state); /* send rx_mode command again if was requested */ if (bxe_test_and_clear_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state)) { bxe_set_storm_rx_mode(sc); } } static void bxe_update_eq_prod(struct bxe_softc *sc, uint16_t prod) { storm_memset_eq_prod(sc, prod, SC_FUNC(sc)); wmb(); /* keep prod updates ordered */ } static void bxe_eq_int(struct bxe_softc *sc) { uint16_t hw_cons, sw_cons, sw_prod; union event_ring_elem *elem; uint8_t echo; uint32_t cid; uint8_t opcode; int spqe_cnt = 0; struct ecore_queue_sp_obj *q_obj; struct ecore_func_sp_obj *f_obj = &sc->func_obj; struct ecore_raw_obj *rss_raw = &sc->rss_conf_obj.raw; hw_cons = le16toh(*sc->eq_cons_sb); /* * The hw_cons range is 1-255, 257 - the sw_cons range is 0-254, 256. * when we get to the next-page we need to adjust so the loop * condition below will be met. The next element is the size of a * regular element and hence incrementing by 1 */ if ((hw_cons & EQ_DESC_MAX_PAGE) == EQ_DESC_MAX_PAGE) { hw_cons++; } /* * This function may never run in parallel with itself for a * specific sc and no need for a read memory barrier here. */ sw_cons = sc->eq_cons; sw_prod = sc->eq_prod; BLOGD(sc, DBG_SP,"EQ: hw_cons=%u sw_cons=%u eq_spq_left=0x%lx\n", hw_cons, sw_cons, atomic_load_acq_long(&sc->eq_spq_left)); for (; sw_cons != hw_cons; sw_prod = NEXT_EQ_IDX(sw_prod), sw_cons = NEXT_EQ_IDX(sw_cons)) { elem = &sc->eq[EQ_DESC(sw_cons)]; /* elem CID originates from FW, actually LE */ cid = SW_CID(elem->message.data.cfc_del_event.cid); opcode = elem->message.opcode; /* handle eq element */ switch (opcode) { case EVENT_RING_OPCODE_STAT_QUERY: BLOGD(sc, DBG_SP, "got statistics completion event %d\n", sc->stats_comp++); /* nothing to do with stats comp */ goto next_spqe; case EVENT_RING_OPCODE_CFC_DEL: /* handle according to cid range */ /* we may want to verify here that the sc state is HALTING */ BLOGD(sc, DBG_SP, "got delete ramrod for MULTI[%d]\n", cid); q_obj = bxe_cid_to_q_obj(sc, cid); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_CFC_DEL)) { break; } goto next_spqe; case EVENT_RING_OPCODE_STOP_TRAFFIC: BLOGD(sc, DBG_SP, "got STOP TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_STOP)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_PAUSED); goto next_spqe; case EVENT_RING_OPCODE_START_TRAFFIC: BLOGD(sc, DBG_SP, "got START TRAFFIC\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_START)) { break; } // XXX bxe_dcbx_set_params(sc, BXE_DCBX_STATE_TX_RELEASED); goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_UPDATE: echo = elem->message.data.function_update_event.echo; if (echo == SWITCH_UPDATE) { BLOGD(sc, DBG_SP, "got FUNC_SWITCH_UPDATE ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_SWITCH_UPDATE)) { break; } } else { BLOGD(sc, DBG_SP, "AFEX: ramrod completed FUNCTION_UPDATE\n"); } goto next_spqe; case EVENT_RING_OPCODE_FORWARD_SETUP: q_obj = &bxe_fwd_sp_obj(sc, q_obj); if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_SETUP_TX_ONLY)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_START: BLOGD(sc, DBG_SP, "got FUNC_START ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_START)) { break; } goto next_spqe; case EVENT_RING_OPCODE_FUNCTION_STOP: BLOGD(sc, DBG_SP, "got FUNC_STOP ramrod\n"); if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_STOP)) { break; } goto next_spqe; } switch (opcode | sc->state) { case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BXE_STATE_OPENING_WAITING_PORT): cid = elem->message.data.eth_event.echo & BXE_SWCID_MASK; BLOGD(sc, DBG_SP, "got RSS_UPDATE ramrod. CID %d\n", cid); rss_raw->clear_pending(rss_raw); break; case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_SET_MAC | BXE_STATE_CLOSING_WAITING_HALT): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got (un)set mac ramrod\n"); bxe_handle_classification_eqe(sc, elem); break; case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_MULTICAST_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got mcast ramrod\n"); bxe_handle_mcast_eqe(sc); break; case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_OPEN): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_DIAG): case (EVENT_RING_OPCODE_FILTERS_RULES | BXE_STATE_CLOSING_WAITING_HALT): BLOGD(sc, DBG_SP, "got rx_mode ramrod\n"); bxe_handle_rx_mode_eqe(sc, elem); break; default: /* unknown event log error and continue */ BLOGE(sc, "Unknown EQ event %d, sc->state 0x%x\n", elem->message.opcode, sc->state); } next_spqe: spqe_cnt++; } /* for */ mb(); atomic_add_acq_long(&sc->eq_spq_left, spqe_cnt); sc->eq_cons = sw_cons; sc->eq_prod = sw_prod; /* make sure that above mem writes were issued towards the memory */ wmb(); /* update producer */ bxe_update_eq_prod(sc, sc->eq_prod); } static void bxe_handle_sp_tq(void *context, int pending) { struct bxe_softc *sc = (struct bxe_softc *)context; uint16_t status; BLOGD(sc, DBG_SP, "---> SP TASK <---\n"); /* what work needs to be performed? */ status = bxe_update_dsb_idx(sc); BLOGD(sc, DBG_SP, "dsb status 0x%04x\n", status); /* HW attentions */ if (status & BXE_DEF_SB_ATT_IDX) { BLOGD(sc, DBG_SP, "---> ATTN INTR <---\n"); bxe_attn_int(sc); status &= ~BXE_DEF_SB_ATT_IDX; } /* SP events: STAT_QUERY and others */ if (status & BXE_DEF_SB_IDX) { /* handle EQ completions */ BLOGD(sc, DBG_SP, "---> EQ INTR <---\n"); bxe_eq_int(sc); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, le16toh(sc->def_idx), IGU_INT_NOP, 1); status &= ~BXE_DEF_SB_IDX; } /* if status is non zero then something went wrong */ if (__predict_false(status)) { BLOGE(sc, "Got an unknown SP interrupt! (0x%04x)\n", status); } /* ack status block only if something was actually handled */ bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, le16toh(sc->def_att_idx), IGU_INT_ENABLE, 1); /* * Must be called after the EQ processing (since eq leads to sriov * ramrod completion flows). * This flow may have been scheduled by the arrival of a ramrod * completion, or by the sriov code rescheduling itself. */ // XXX bxe_iov_sp_task(sc); } static void bxe_handle_fp_tq(void *context, int pending) { struct bxe_fastpath *fp = (struct bxe_fastpath *)context; struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK QUEUE (%d) <---\n", fp->index); /* XXX * IFF_DRV_RUNNING state can't be checked here since we process * slowpath events on a client queue during setup. Instead * we need to add a "process/continue" flag here that the driver * can use to tell the task here not to do anything. */ #if 0 if (!(if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING)) { return; } #endif /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do */ taskqueue_enqueue(fp->tq, &fp->tq_task); return; } bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } static void bxe_task_fp(struct bxe_fastpath *fp) { struct bxe_softc *sc = fp->sc; uint8_t more_tx = FALSE; uint8_t more_rx = FALSE; BLOGD(sc, DBG_INTR, "---> FP TASK ISR (%d) <---\n", fp->index); /* update the fastpath index */ bxe_update_fp_sb_idx(fp); /* XXX add loop here if ever support multiple tx CoS */ /* fp->txdata[cos] */ if (bxe_has_tx_work(fp)) { BXE_FP_TX_LOCK(fp); more_tx = bxe_txeof(sc, fp); BXE_FP_TX_UNLOCK(fp); } if (bxe_has_rx_work(fp)) { more_rx = bxe_rxeof(sc, fp); } if (more_rx /*|| more_tx*/) { /* still more work to do, bail out if this ISR and process later */ taskqueue_enqueue(fp->tq, &fp->tq_task); return; } /* * Here we write the fastpath index taken before doing any tx or rx work. * It is very well possible other hw events occurred up to this point and * they were actually processed accordingly above. Since we're going to * write an older fastpath index, an interrupt is coming which we might * not do any work in. */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); } /* * Legacy interrupt entry point. * * Verifies that the controller generated the interrupt and * then calls a separate routine to handle the various * interrupt causes: link, RX, and TX. */ static void bxe_intr_legacy(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; struct bxe_fastpath *fp; uint16_t status, mask; int i; BLOGD(sc, DBG_INTR, "---> BXE INTx <---\n"); /* * 0 for ustorm, 1 for cstorm * the bits returned from ack_int() are 0-15 * bit 0 = attention status block * bit 1 = fast path status block * a mask of 0x2 or more = tx/rx event * a mask of 1 = slow path event */ status = bxe_ack_int(sc); /* the interrupt is not for us */ if (__predict_false(status == 0)) { BLOGD(sc, DBG_INTR, "Not our interrupt!\n"); return; } BLOGD(sc, DBG_INTR, "Interrupt status 0x%04x\n", status); FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; mask = (0x2 << (fp->index + CNIC_SUPPORT(sc))); if (status & mask) { /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); status &= ~mask; } } if (__predict_false(status & 0x1)) { /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue(sc->sp_tq, &sc->sp_tq_task); status &= ~0x1; } if (__predict_false(status)) { BLOGW(sc, "Unexpected fastpath status (0x%08x)!\n", status); } } /* slowpath interrupt entry point */ static void bxe_intr_sp(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BLOGD(sc, (DBG_INTR | DBG_SP), "---> SP INTR <---\n"); /* acknowledge and disable further slowpath interrupts */ bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); /* schedule slowpath handler */ taskqueue_enqueue(sc->sp_tq, &sc->sp_tq_task); } /* fastpath interrupt entry point */ static void bxe_intr_fp(void *xfp) { struct bxe_fastpath *fp = (struct bxe_fastpath *)xfp; struct bxe_softc *sc = fp->sc; BLOGD(sc, DBG_INTR, "---> FP INTR %d <---\n", fp->index); BLOGD(sc, DBG_INTR, "(cpu=%d) MSI-X fp=%d fw_sb=%d igu_sb=%d\n", curcpu, fp->index, fp->fw_sb_id, fp->igu_sb_id); /* acknowledge and disable further fastpath interrupts */ bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0); bxe_task_fp(fp); } /* Release all interrupts allocated by the driver. */ static void bxe_interrupt_free(struct bxe_softc *sc) { int i; switch (sc->interrupt_mode) { case INTR_MODE_INTX: BLOGD(sc, DBG_LOAD, "Releasing legacy INTx vector\n"); if (sc->intr[0].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[0].rid, sc->intr[0].resource); } break; case INTR_MODE_MSI: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; case INTR_MODE_MSIX: for (i = 0; i < sc->intr_count; i++) { BLOGD(sc, DBG_LOAD, "Releasing MSI-X vector %d\n", i); if (sc->intr[i].resource && sc->intr[i].rid) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[i].rid, sc->intr[i].resource); } } pci_release_msi(sc->dev); break; default: /* nothing to do as initial allocation failed */ break; } } /* * This function determines and allocates the appropriate * interrupt based on system capabilites and user request. * * The user may force a particular interrupt mode, specify * the number of receive queues, specify the method for * distribuitng received frames to receive queues, or use * the default settings which will automatically select the * best supported combination. In addition, the OS may or * may not support certain combinations of these settings. * This routine attempts to reconcile the settings requested * by the user with the capabilites available from the system * to select the optimal combination of features. * * Returns: * 0 = Success, !0 = Failure. */ static int bxe_interrupt_alloc(struct bxe_softc *sc) { int msix_count = 0; int msi_count = 0; int num_requested = 0; int num_allocated = 0; int rid, i, j; int rc; /* get the number of available MSI/MSI-X interrupts from the OS */ if (sc->interrupt_mode > 0) { if (sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) { msix_count = pci_msix_count(sc->dev); } if (sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) { msi_count = pci_msi_count(sc->dev); } BLOGD(sc, DBG_LOAD, "%d MSI and %d MSI-X vectors available\n", msi_count, msix_count); } do { /* try allocating MSI-X interrupt resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSIX) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSIX_CAPABLE_FLAG) == 0) || (msix_count < 2)) { sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } /* ask for the necessary number of MSI-X vectors */ num_requested = min((sc->num_queues + 1), msix_count); BLOGD(sc, DBG_LOAD, "Requesting %d MSI-X vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msix(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI-X alloc failed! (%d)\n", rc); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ break; } if (num_allocated < 2) { /* possible? */ BLOGE(sc, "MSI-X allocation less than 2!\n"); sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI-X vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated - 1; rid = 1; /* initial resource identifier */ /* allocate the MSI-X vectors */ for (i = 0; i < num_allocated; i++) { sc->intr[i].rid = (rid + i); if ((sc->intr[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[i].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI-X[%d] (rid=%d)!\n", i, (rid + i)); for (j = (i - 1); j >= 0; j--) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr[j].rid, sc->intr[j].resource); } sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_MSI; /* try MSI next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI-X[%d] (rid=%d)\n", i, (rid + i)); } } while (0); do { /* try allocating MSI vector resources (at least 2) */ if (sc->interrupt_mode != INTR_MODE_MSI) { break; } if (((sc->devinfo.pcie_cap_flags & BXE_MSI_CAPABLE_FLAG) == 0) || (msi_count < 1)) { sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } /* ask for a single MSI vector */ num_requested = 1; BLOGD(sc, DBG_LOAD, "Requesting %d MSI vectors\n", num_requested); num_allocated = num_requested; if ((rc = pci_alloc_msi(sc->dev, &num_allocated)) != 0) { BLOGE(sc, "MSI alloc failed (%d)!\n", rc); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ break; } if (num_allocated != 1) { /* possible? */ BLOGE(sc, "MSI allocation is not 1!\n"); sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGI(sc, "MSI vectors Requested %d and Allocated %d\n", num_requested, num_allocated); /* best effort so use the number of vectors allocated to us */ sc->intr_count = num_allocated; sc->num_queues = num_allocated; rid = 1; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, RF_ACTIVE)) == NULL) { BLOGE(sc, "Failed to map MSI[0] (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = INTR_MODE_INTX; /* try INTx next */ pci_release_msi(sc->dev); break; } BLOGD(sc, DBG_LOAD, "Mapped MSI[0] (rid=%d)\n", rid); } while (0); do { /* try allocating INTx vector resources */ if (sc->interrupt_mode != INTR_MODE_INTX) { break; } BLOGD(sc, DBG_LOAD, "Requesting legacy INTx interrupt\n"); /* only one vector for INTx */ sc->intr_count = 1; sc->num_queues = 1; rid = 0; /* initial resource identifier */ sc->intr[0].rid = rid; if ((sc->intr[0].resource = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->intr[0].rid, (RF_ACTIVE | RF_SHAREABLE))) == NULL) { BLOGE(sc, "Failed to map INTx (rid=%d)!\n", rid); sc->intr_count = 0; sc->num_queues = 0; sc->interrupt_mode = -1; /* Failed! */ break; } BLOGD(sc, DBG_LOAD, "Mapped INTx (rid=%d)\n", rid); } while (0); if (sc->interrupt_mode == -1) { BLOGE(sc, "Interrupt Allocation: FAILED!!!\n"); rc = 1; } else { BLOGD(sc, DBG_LOAD, "Interrupt Allocation: interrupt_mode=%d, num_queues=%d\n", sc->interrupt_mode, sc->num_queues); rc = 0; } return (rc); } static void bxe_interrupt_detach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; /* release interrupt resources */ for (i = 0; i < sc->intr_count; i++) { if (sc->intr[i].resource && sc->intr[i].tag) { BLOGD(sc, DBG_LOAD, "Disabling interrupt vector %d\n", i); bus_teardown_intr(sc->dev, sc->intr[i].resource, sc->intr[i].tag); } } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->tq) { taskqueue_drain(fp->tq, &fp->tq_task); taskqueue_drain(fp->tq, &fp->tx_task); while (taskqueue_cancel_timeout(fp->tq, &fp->tx_timeout_task, NULL)) taskqueue_drain_timeout(fp->tq, &fp->tx_timeout_task); taskqueue_free(fp->tq); fp->tq = NULL; } } if (sc->sp_tq) { taskqueue_drain(sc->sp_tq, &sc->sp_tq_task); taskqueue_free(sc->sp_tq); sc->sp_tq = NULL; } } /* * Enables interrupts and attach to the ISR. * * When using multiple MSI/MSI-X vectors the first vector * is used for slowpath operations while all remaining * vectors are used for fastpath operations. If only a * single MSI/MSI-X vector is used (SINGLE_ISR) then the * ISR must look for both slowpath and fastpath completions. */ static int bxe_interrupt_attach(struct bxe_softc *sc) { struct bxe_fastpath *fp; int rc = 0; int i; snprintf(sc->sp_tq_name, sizeof(sc->sp_tq_name), "bxe%d_sp_tq", sc->unit); TASK_INIT(&sc->sp_tq_task, 0, bxe_handle_sp_tq, sc); sc->sp_tq = taskqueue_create(sc->sp_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->sp_tq); taskqueue_start_threads(&sc->sp_tq, 1, PWAIT, /* lower priority */ "%s", sc->sp_tq_name); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; snprintf(fp->tq_name, sizeof(fp->tq_name), "bxe%d_fp%d_tq", sc->unit, i); TASK_INIT(&fp->tq_task, 0, bxe_handle_fp_tq, fp); TASK_INIT(&fp->tx_task, 0, bxe_tx_mq_start_deferred, fp); fp->tq = taskqueue_create(fp->tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->tq); TIMEOUT_TASK_INIT(fp->tq, &fp->tx_timeout_task, 0, bxe_tx_mq_start_deferred, fp); taskqueue_start_threads(&fp->tq, 1, PI_NET, /* higher priority */ "%s", fp->tq_name); } /* setup interrupt handlers */ if (sc->interrupt_mode == INTR_MODE_MSIX) { BLOGD(sc, DBG_LOAD, "Enabling slowpath MSI-X[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the driver instance * to the interrupt handler for the slowpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_sp, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[0].resource, sc->intr[0].tag, "sp"); /* bus_bind_intr(sc->dev, sc->intr[0].resource, 0); */ /* initialize the fastpath vectors (note the first was used for sp) */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; BLOGD(sc, DBG_LOAD, "Enabling MSI-X[%d] vector\n", (i + 1)); /* * Setup the interrupt handler. Note that we pass the * fastpath context to the interrupt handler in this * case. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[i + 1].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_fp, fp, &sc->intr[i + 1].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI-X[%d] vector (%d)\n", (i + 1), rc); goto bxe_interrupt_attach_exit; } bus_describe_intr(sc->dev, sc->intr[i + 1].resource, sc->intr[i + 1].tag, "fp%02d", i); /* bind the fastpath instance to a cpu */ if (sc->num_queues > 1) { bus_bind_intr(sc->dev, sc->intr[i + 1].resource, i); } fp->state = BXE_FP_STATE_IRQ; } } else if (sc->interrupt_mode == INTR_MODE_MSI) { BLOGD(sc, DBG_LOAD, "Enabling MSI[0] vector\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate MSI[0] vector (%d)\n", rc); goto bxe_interrupt_attach_exit; } } else { /* (sc->interrupt_mode == INTR_MODE_INTX) */ BLOGD(sc, DBG_LOAD, "Enabling INTx interrupts\n"); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ if ((rc = bus_setup_intr(sc->dev, sc->intr[0].resource, (INTR_TYPE_NET | INTR_MPSAFE), NULL, bxe_intr_legacy, sc, &sc->intr[0].tag)) != 0) { BLOGE(sc, "Failed to allocate INTx interrupt (%d)\n", rc); goto bxe_interrupt_attach_exit; } } bxe_interrupt_attach_exit: return (rc); } static int bxe_init_hw_common_chip(struct bxe_softc *sc); static int bxe_init_hw_common(struct bxe_softc *sc); static int bxe_init_hw_port(struct bxe_softc *sc); static int bxe_init_hw_func(struct bxe_softc *sc); static void bxe_reset_common(struct bxe_softc *sc); static void bxe_reset_port(struct bxe_softc *sc); static void bxe_reset_func(struct bxe_softc *sc); static int bxe_gunzip_init(struct bxe_softc *sc); static void bxe_gunzip_end(struct bxe_softc *sc); static int bxe_init_firmware(struct bxe_softc *sc); static void bxe_release_firmware(struct bxe_softc *sc); static struct ecore_func_sp_drv_ops bxe_func_sp_drv = { .init_hw_cmn_chip = bxe_init_hw_common_chip, .init_hw_cmn = bxe_init_hw_common, .init_hw_port = bxe_init_hw_port, .init_hw_func = bxe_init_hw_func, .reset_hw_cmn = bxe_reset_common, .reset_hw_port = bxe_reset_port, .reset_hw_func = bxe_reset_func, .gunzip_init = bxe_gunzip_init, .gunzip_end = bxe_gunzip_end, .init_fw = bxe_init_firmware, .release_fw = bxe_release_firmware, }; static void bxe_init_func_obj(struct bxe_softc *sc) { sc->dmae_ready = 0; ecore_init_func_obj(sc, &sc->func_obj, BXE_SP(sc, func_rdata), BXE_SP_MAPPING(sc, func_rdata), BXE_SP(sc, func_afex_rdata), BXE_SP_MAPPING(sc, func_afex_rdata), &bxe_func_sp_drv); } static int bxe_init_hw(struct bxe_softc *sc, uint32_t load_code) { struct ecore_func_state_params func_params = { NULL }; int rc; /* prepare the parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_HW_INIT; func_params.params.hw_init.load_phase = load_code; /* * Via a plethora of function pointers, we will eventually reach * bxe_init_hw_common(), bxe_init_hw_port(), or bxe_init_hw_func(). */ rc = ecore_func_state_change(sc, &func_params); return (rc); } static void bxe_fill(struct bxe_softc *sc, uint32_t addr, int fill, uint32_t len) { uint32_t i; if (!(len % 4) && !(addr % 4)) { for (i = 0; i < len; i += 4) { REG_WR(sc, (addr + i), fill); } } else { for (i = 0; i < len; i++) { REG_WR8(sc, (addr + i), fill); } } } /* writes FP SP data to FW - data_size in dwords */ static void bxe_wr_fp_sb_data(struct bxe_softc *sc, int fw_sb_id, uint32_t *sb_data_p, uint32_t data_size) { int index; for (index = 0; index < data_size; index++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_OFFSET(fw_sb_id) + (sizeof(uint32_t) * index)), *(sb_data_p + index)); } } static void bxe_zero_fp_sb(struct bxe_softc *sc, int fw_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; uint32_t *sb_data_p; uint32_t data_size = 0; if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_DISABLED; sb_data_e2.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_DISABLED; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); } bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SYNC_BLOCK_OFFSET(fw_sb_id)), 0, CSTORM_SYNC_BLOCK_SIZE); } static void bxe_wr_sp_sb_data(struct bxe_softc *sc, struct hc_sp_status_block_data *sp_sb_data) { int i; for (i = 0; i < (sizeof(struct hc_sp_status_block_data) / sizeof(uint32_t)); i++) { REG_WR(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(SC_FUNC(sc)) + (i * sizeof(uint32_t))), *((uint32_t *)sp_sb_data + i)); } } static void bxe_zero_sp_sb(struct bxe_softc *sc) { struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); sp_sb_data.state = SB_DISABLED; sp_sb_data.p_func.vf_valid = FALSE; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_STATUS_BLOCK_SIZE); bxe_fill(sc, (BAR_CSTRORM_INTMEM + CSTORM_SP_SYNC_BLOCK_OFFSET(SC_FUNC(sc))), 0, CSTORM_SP_SYNC_BLOCK_SIZE); } static void bxe_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm, int igu_sb_id, int igu_seg_id) { hc_sm->igu_sb_id = igu_sb_id; hc_sm->igu_seg_id = igu_seg_id; hc_sm->timer_value = 0xFF; hc_sm->time_to_expire = 0xFFFFFFFF; } static void bxe_map_sb_state_machines(struct hc_index_data *index_data) { /* zero out state machine indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID; index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID; /* map indices */ /* rx indices */ index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |= (SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT); /* tx indices */ index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |= (SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT); } static void bxe_init_sb(struct bxe_softc *sc, bus_addr_t busaddr, int vfid, uint8_t vf_valid, int fw_sb_id, int igu_sb_id) { struct hc_status_block_data_e2 sb_data_e2; struct hc_status_block_data_e1x sb_data_e1x; struct hc_status_block_sm *hc_sm_p; uint32_t *sb_data_p; int igu_seg_id; int data_size; if (CHIP_INT_MODE_IS_BC(sc)) { igu_seg_id = HC_SEG_ACCESS_NORM; } else { igu_seg_id = IGU_SEG_ACCESS_NORM; } bxe_zero_fp_sb(sc, fw_sb_id); if (!CHIP_IS_E1x(sc)) { memset(&sb_data_e2, 0, sizeof(struct hc_status_block_data_e2)); sb_data_e2.common.state = SB_ENABLED; sb_data_e2.common.p_func.pf_id = SC_FUNC(sc); sb_data_e2.common.p_func.vf_id = vfid; sb_data_e2.common.p_func.vf_valid = vf_valid; sb_data_e2.common.p_func.vnic_id = SC_VN(sc); sb_data_e2.common.same_igu_sb_1b = TRUE; sb_data_e2.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e2.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e2.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e2; data_size = (sizeof(struct hc_status_block_data_e2) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e2.index_data); } else { memset(&sb_data_e1x, 0, sizeof(struct hc_status_block_data_e1x)); sb_data_e1x.common.state = SB_ENABLED; sb_data_e1x.common.p_func.pf_id = SC_FUNC(sc); sb_data_e1x.common.p_func.vf_id = 0xff; sb_data_e1x.common.p_func.vf_valid = FALSE; sb_data_e1x.common.p_func.vnic_id = SC_VN(sc); sb_data_e1x.common.same_igu_sb_1b = TRUE; sb_data_e1x.common.host_sb_addr.hi = U64_HI(busaddr); sb_data_e1x.common.host_sb_addr.lo = U64_LO(busaddr); hc_sm_p = sb_data_e1x.common.state_machine; sb_data_p = (uint32_t *)&sb_data_e1x; data_size = (sizeof(struct hc_status_block_data_e1x) / sizeof(uint32_t)); bxe_map_sb_state_machines(sb_data_e1x.index_data); } bxe_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID], igu_sb_id, igu_seg_id); bxe_setup_ndsb_state_machine(&hc_sm_p[SM_TX_ID], igu_sb_id, igu_seg_id); BLOGD(sc, DBG_LOAD, "Init FW SB %d\n", fw_sb_id); /* write indices to HW - PCI guarantees endianity of regpairs */ bxe_wr_fp_sb_data(sc, fw_sb_id, sb_data_p, data_size); } static inline uint8_t bxe_fp_qzone_id(struct bxe_fastpath *fp) { if (CHIP_IS_E1x(fp->sc)) { return (fp->cl_id + SC_PORT(fp->sc) * ETH_MAX_RX_CLIENTS_E1H); } else { return (fp->cl_id); } } static inline uint32_t bxe_rx_ustorm_prods_offset(struct bxe_softc *sc, struct bxe_fastpath *fp) { uint32_t offset = BAR_USTRORM_INTMEM; if (!CHIP_IS_E1x(sc)) { offset += USTORM_RX_PRODS_E2_OFFSET(fp->cl_qzone_id); } else { offset += USTORM_RX_PRODS_E1X_OFFSET(SC_PORT(sc), fp->cl_id); } return (offset); } static void bxe_init_eth_fp(struct bxe_softc *sc, int idx) { struct bxe_fastpath *fp = &sc->fp[idx]; uint32_t cids[ECORE_MULTI_TX_COS] = { 0 }; unsigned long q_type = 0; int cos; fp->sc = sc; fp->index = idx; fp->igu_sb_id = (sc->igu_base_sb + idx + CNIC_SUPPORT(sc)); fp->fw_sb_id = (sc->base_fw_ndsb + idx + CNIC_SUPPORT(sc)); fp->cl_id = (CHIP_IS_E1x(sc)) ? (SC_L_ID(sc) + idx) : /* want client ID same as IGU SB ID for non-E1 */ fp->igu_sb_id; fp->cl_qzone_id = bxe_fp_qzone_id(fp); /* setup sb indices */ if (!CHIP_IS_E1x(sc)) { fp->sb_index_values = fp->status_block.e2_sb->sb.index_values; fp->sb_running_index = fp->status_block.e2_sb->sb.running_index; } else { fp->sb_index_values = fp->status_block.e1x_sb->sb.index_values; fp->sb_running_index = fp->status_block.e1x_sb->sb.running_index; } /* init shortcut */ fp->ustorm_rx_prods_offset = bxe_rx_ustorm_prods_offset(sc, fp); fp->rx_cq_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS]; /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. */ for (cos = 0; cos < sc->max_cos; cos++) { cids[cos] = idx; } fp->tx_cons_sb = &fp->sb_index_values[HC_INDEX_ETH_TX_CQ_CONS_COS0]; /* nothing more for a VF to do */ if (IS_VF(sc)) { return; } bxe_init_sb(sc, fp->sb_dma.paddr, BXE_VF_ID_INVALID, FALSE, fp->fw_sb_id, fp->igu_sb_id); bxe_update_fp_sb_idx(fp); /* Configure Queue State object */ bit_set(&q_type, ECORE_Q_TYPE_HAS_RX); bit_set(&q_type, ECORE_Q_TYPE_HAS_TX); ecore_init_queue_obj(sc, &sc->sp_objs[idx].q_obj, fp->cl_id, cids, sc->max_cos, SC_FUNC(sc), BXE_SP(sc, q_rdata), BXE_SP_MAPPING(sc, q_rdata), q_type); /* configure classification DBs */ ecore_init_mac_obj(sc, &sc->sp_objs[idx].mac_obj, fp->cl_id, idx, SC_FUNC(sc), BXE_SP(sc, mac_rdata), BXE_SP_MAPPING(sc, mac_rdata), ECORE_FILTER_MAC_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX_TX, &sc->macs_pool); BLOGD(sc, DBG_LOAD, "fp[%d]: sb=%p cl_id=%d fw_sb=%d igu_sb=%d\n", idx, fp->status_block.e2_sb, fp->cl_id, fp->fw_sb_id, fp->igu_sb_id); } static inline void bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t rx_bd_prod, uint16_t rx_cq_prod, uint16_t rx_sge_prod) { struct ustorm_eth_rx_producers rx_prods = { 0 }; uint32_t i; /* update producers */ rx_prods.bd_prod = rx_bd_prod; rx_prods.cqe_prod = rx_cq_prod; rx_prods.sge_prod = rx_sge_prod; /* * Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated. * This is only applicable for weak-ordered memory model archs such * as IA-64. The following barrier is also mandatory since FW will * assumes BDs must have buffers. */ wmb(); for (i = 0; i < (sizeof(rx_prods) / 4); i++) { REG_WR(sc, (fp->ustorm_rx_prods_offset + (i * 4)), ((uint32_t *)&rx_prods)[i]); } wmb(); /* keep prod updates ordered */ BLOGD(sc, DBG_RX, "RX fp[%d]: wrote prods bd_prod=%u cqe_prod=%u sge_prod=%u\n", fp->index, rx_bd_prod, rx_cq_prod, rx_sge_prod); } static void bxe_init_rx_rings(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->rx_bd_cons = 0; /* * Activate the BD ring... * Warning, this will generate an interrupt (to the TSTORM) * so this can only be done after the chip is initialized */ bxe_update_rx_prod(sc, fp, fp->rx_bd_prod, fp->rx_cq_prod, fp->rx_sge_prod); if (i != 0) { continue; } if (CHIP_IS_E1(sc)) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc))), U64_LO(fp->rcq_dma.paddr)); REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(SC_FUNC(sc)) + 4), U64_HI(fp->rcq_dma.paddr)); } } } static void bxe_init_tx_ring_one(struct bxe_fastpath *fp) { SET_FLAG(fp->tx_db.data.header.data, DOORBELL_HDR_T_DB_TYPE, 1); fp->tx_db.data.zero_fill1 = 0; fp->tx_db.data.prod = 0; fp->tx_pkt_prod = 0; fp->tx_pkt_cons = 0; fp->tx_bd_prod = 0; fp->tx_bd_cons = 0; fp->eth_q_stats.tx_pkts = 0; } static inline void bxe_init_tx_rings(struct bxe_softc *sc) { int i; for (i = 0; i < sc->num_queues; i++) { bxe_init_tx_ring_one(&sc->fp[i]); } } static void bxe_init_def_sb(struct bxe_softc *sc) { struct host_sp_status_block *def_sb = sc->def_sb; bus_addr_t mapping = sc->def_sb_dma.paddr; int igu_sp_sb_index; int igu_seg_id; int port = SC_PORT(sc); int func = SC_FUNC(sc); int reg_offset, reg_offset_en5; uint64_t section; int index, sindex; struct hc_sp_status_block_data sp_sb_data; memset(&sp_sb_data, 0, sizeof(struct hc_sp_status_block_data)); if (CHIP_INT_MODE_IS_BC(sc)) { igu_sp_sb_index = DEF_SB_IGU_ID; igu_seg_id = HC_SEG_ACCESS_DEF; } else { igu_sp_sb_index = sc->igu_dsb_id; igu_seg_id = IGU_SEG_ACCESS_DEF; } /* attentions */ section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, atten_status_block)); def_sb->atten_status_block.status_block_id = igu_sp_sb_index; sc->attn_state = 0; reg_offset = (port) ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0; reg_offset_en5 = (port) ? MISC_REG_AEU_ENABLE5_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE5_FUNC_0_OUT_0; for (index = 0; index < MAX_DYNAMIC_ATTN_GRPS; index++) { /* take care of sig[0]..sig[4] */ for (sindex = 0; sindex < 4; sindex++) { sc->attn_group[index].sig[sindex] = REG_RD(sc, (reg_offset + (sindex * 0x4) + (0x10 * index))); } if (!CHIP_IS_E1x(sc)) { /* * enable5 is separate from the rest of the registers, * and the address skip is 4 and not 16 between the * different groups */ sc->attn_group[index].sig[4] = REG_RD(sc, (reg_offset_en5 + (0x4 * index))); } else { sc->attn_group[index].sig[4] = 0; } } if (sc->devinfo.int_block == INT_BLOCK_HC) { reg_offset = (port) ? HC_REG_ATTN_MSG1_ADDR_L : HC_REG_ATTN_MSG0_ADDR_L; REG_WR(sc, reg_offset, U64_LO(section)); REG_WR(sc, (reg_offset + 4), U64_HI(section)); } else if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_L, U64_LO(section)); REG_WR(sc, IGU_REG_ATTN_MSG_ADDR_H, U64_HI(section)); } section = ((uint64_t)mapping + offsetof(struct host_sp_status_block, sp_sb)); bxe_zero_sp_sb(sc); /* PCI guarantees endianity of regpair */ sp_sb_data.state = SB_ENABLED; sp_sb_data.host_sb_addr.lo = U64_LO(section); sp_sb_data.host_sb_addr.hi = U64_HI(section); sp_sb_data.igu_sb_id = igu_sp_sb_index; sp_sb_data.igu_seg_id = igu_seg_id; sp_sb_data.p_func.pf_id = func; sp_sb_data.p_func.vnic_id = SC_VN(sc); sp_sb_data.p_func.vf_id = 0xff; bxe_wr_sp_sb_data(sc, &sp_sb_data); bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); } static void bxe_init_sp_ring(struct bxe_softc *sc) { atomic_store_rel_long(&sc->cq_spq_left, MAX_SPQ_PENDING); sc->spq_prod_idx = 0; sc->dsb_sp_prod = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_ETH_DEF_CONS]; sc->spq_prod_bd = sc->spq; sc->spq_last_bd = (sc->spq_prod_bd + MAX_SP_DESC_CNT); } static void bxe_init_eq_ring(struct bxe_softc *sc) { union event_ring_elem *elem; int i; for (i = 1; i <= NUM_EQ_PAGES; i++) { elem = &sc->eq[EQ_DESC_CNT_PAGE * i - 1]; elem->next_page.addr.hi = htole32(U64_HI(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); elem->next_page.addr.lo = htole32(U64_LO(sc->eq_dma.paddr + BCM_PAGE_SIZE * (i % NUM_EQ_PAGES))); } sc->eq_cons = 0; sc->eq_prod = NUM_EQ_DESC; sc->eq_cons_sb = &sc->def_sb->sp_sb.index_values[HC_SP_INDEX_EQ_CONS]; atomic_store_rel_long(&sc->eq_spq_left, (min((MAX_SP_DESC_CNT - MAX_SPQ_PENDING), NUM_EQ_DESC) - 1)); } static void bxe_init_internal_common(struct bxe_softc *sc) { int i; /* * Zero this manually as its initialization is currently missing * in the initTool. */ for (i = 0; i < (USTORM_AGG_DATA_SIZE >> 2); i++) { REG_WR(sc, (BAR_USTRORM_INTMEM + USTORM_AGG_DATA_OFFSET + (i * 4)), 0); } if (!CHIP_IS_E1x(sc)) { REG_WR8(sc, (BAR_CSTRORM_INTMEM + CSTORM_IGU_MODE_OFFSET), CHIP_INT_MODE_IS_BC(sc) ? HC_IGU_BC_MODE : HC_IGU_NBC_MODE); } } static void bxe_init_internal(struct bxe_softc *sc, uint32_t load_code) { switch (load_code) { case FW_MSG_CODE_DRV_LOAD_COMMON: case FW_MSG_CODE_DRV_LOAD_COMMON_CHIP: bxe_init_internal_common(sc); /* no break */ case FW_MSG_CODE_DRV_LOAD_PORT: /* nothing to do */ /* no break */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: /* internal memory per function is initialized inside bxe_pf_init */ break; default: BLOGE(sc, "Unknown load_code (0x%x) from MCP\n", load_code); break; } } static void storm_memset_func_cfg(struct bxe_softc *sc, struct tstorm_eth_function_common_config *tcfg, uint16_t abs_fid) { uint32_t addr; size_t size; addr = (BAR_TSTRORM_INTMEM + TSTORM_FUNCTION_COMMON_CONFIG_OFFSET(abs_fid)); size = sizeof(struct tstorm_eth_function_common_config); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)tcfg); } static void bxe_func_init(struct bxe_softc *sc, struct bxe_func_init_params *p) { struct tstorm_eth_function_common_config tcfg = { 0 }; if (CHIP_IS_E1x(sc)) { storm_memset_func_cfg(sc, &tcfg, p->func_id); } /* Enable the function in the FW */ storm_memset_vf_to_pf(sc, p->func_id, p->pf_id); storm_memset_func_en(sc, p->func_id, 1); /* spq */ if (p->func_flgs & FUNC_FLG_SPQ) { storm_memset_spq_addr(sc, p->spq_map, p->func_id); REG_WR(sc, (XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PROD_OFFSET(p->func_id)), p->spq_prod); } } /* * Calculates the sum of vn_min_rates. * It's needed for further normalizing of the min_rates. * Returns: * sum of vn_min_rates. * or * 0 - if all the min_rates are 0. * In the later case fainess algorithm should be deactivated. * If all min rates are not zero then those that are zeroes will be set to 1. */ static void bxe_calc_vn_min(struct bxe_softc *sc, struct cmng_init_input *input) { uint32_t vn_cfg; uint32_t vn_min_rate; int all_zero = 1; int vn; for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { vn_cfg = sc->devinfo.mf_info.mf_config[vn]; vn_min_rate = (((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT) * 100); if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { /* skip hidden VNs */ vn_min_rate = 0; } else if (!vn_min_rate) { /* If min rate is zero - set it to 100 */ vn_min_rate = DEF_MIN_RATE; } else { all_zero = 0; } input->vnic_min_rate[vn] = vn_min_rate; } /* if ETS or all min rates are zeros - disable fairness */ if (BXE_IS_ETS_ENABLED(sc)) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fairness disabled (ETS)\n"); } else if (all_zero) { input->flags.cmng_enables &= ~CMNG_FLAGS_PER_PORT_FAIRNESS_VN; BLOGD(sc, DBG_LOAD, "Fariness disabled (all MIN values are zeroes)\n"); } else { input->flags.cmng_enables |= CMNG_FLAGS_PER_PORT_FAIRNESS_VN; } } static inline uint16_t bxe_extract_max_cfg(struct bxe_softc *sc, uint32_t mf_cfg) { uint16_t max_cfg = ((mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); if (!max_cfg) { BLOGD(sc, DBG_LOAD, "Max BW configured to 0 - using 100 instead\n"); max_cfg = 100; } return (max_cfg); } static void bxe_calc_vn_max(struct bxe_softc *sc, int vn, struct cmng_init_input *input) { uint16_t vn_max_rate; uint32_t vn_cfg = sc->devinfo.mf_info.mf_config[vn]; uint32_t max_cfg; if (vn_cfg & FUNC_MF_CFG_FUNC_HIDE) { vn_max_rate = 0; } else { max_cfg = bxe_extract_max_cfg(sc, vn_cfg); if (IS_MF_SI(sc)) { /* max_cfg in percents of linkspeed */ vn_max_rate = ((sc->link_vars.line_speed * max_cfg) / 100); } else { /* SD modes */ /* max_cfg is absolute in 100Mb units */ vn_max_rate = (max_cfg * 100); } } BLOGD(sc, DBG_LOAD, "vn %d: vn_max_rate %d\n", vn, vn_max_rate); input->vnic_max_rate[vn] = vn_max_rate; } static void bxe_cmng_fns_init(struct bxe_softc *sc, uint8_t read_cfg, uint8_t cmng_type) { struct cmng_init_input input; int vn; memset(&input, 0, sizeof(struct cmng_init_input)); input.port_rate = sc->link_vars.line_speed; if (cmng_type == CMNG_FNS_MINMAX) { /* read mf conf from shmem */ if (read_cfg) { bxe_read_mf_cfg(sc); } /* get VN min rate and enable fairness if not 0 */ bxe_calc_vn_min(sc, &input); /* get VN max rate */ if (sc->port.pmf) { for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { bxe_calc_vn_max(sc, vn, &input); } } /* always enable rate shaping and fairness */ input.flags.cmng_enables |= CMNG_FLAGS_PER_PORT_RATE_SHAPING_VN; ecore_init_cmng(&input, &sc->cmng); return; } /* rate shaping and fairness are disabled */ BLOGD(sc, DBG_LOAD, "rate shaping and fairness have been disabled\n"); } static int bxe_get_cmng_fns_mode(struct bxe_softc *sc) { if (CHIP_REV_IS_SLOW(sc)) { return (CMNG_FNS_NONE); } if (IS_MF(sc)) { return (CMNG_FNS_MINMAX); } return (CMNG_FNS_NONE); } static void storm_memset_cmng(struct bxe_softc *sc, struct cmng_init *cmng, uint8_t port) { int vn; int func; uint32_t addr; size_t size; addr = (BAR_XSTRORM_INTMEM + XSTORM_CMNG_PER_PORT_VARS_OFFSET(port)); size = sizeof(struct cmng_struct_per_port); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->port); for (vn = VN_0; vn < SC_MAX_VN_NUM(sc); vn++) { func = func_by_vn(sc, vn); addr = (BAR_XSTRORM_INTMEM + XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(func)); size = sizeof(struct rate_shaping_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_max_rate[vn]); addr = (BAR_XSTRORM_INTMEM + XSTORM_FAIRNESS_PER_VN_VARS_OFFSET(func)); size = sizeof(struct fairness_vars_per_vn); ecore_storm_memset_struct(sc, addr, size, (uint32_t *)&cmng->vnic.vnic_min_rate[vn]); } } static void bxe_pf_init(struct bxe_softc *sc) { struct bxe_func_init_params func_init = { 0 }; struct event_ring_data eq_data = { { 0 } }; uint16_t flags; if (!CHIP_IS_E1x(sc)) { /* reset IGU PF statistics: MSIX + ATTN */ /* PF */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); /* ATTN */ REG_WR(sc, (IGU_REG_STATISTIC_NUM_MESSAGE_SENT + (BXE_IGU_STAS_MSG_VF_CNT * 4) + (BXE_IGU_STAS_MSG_PF_CNT * 4) + ((CHIP_IS_MODE_4_PORT(sc) ? SC_FUNC(sc) : SC_VN(sc)) * 4)), 0); } /* function setup flags */ flags = (FUNC_FLG_STATS | FUNC_FLG_LEADING | FUNC_FLG_SPQ); /* * This flag is relevant for E1x only. * E2 doesn't have a TPA configuration in a function level. */ flags |= (if_getcapenable(sc->ifp) & IFCAP_LRO) ? FUNC_FLG_TPA : 0; func_init.func_flgs = flags; func_init.pf_id = SC_FUNC(sc); func_init.func_id = SC_FUNC(sc); func_init.spq_map = sc->spq_dma.paddr; func_init.spq_prod = sc->spq_prod_idx; bxe_func_init(sc, &func_init); memset(&sc->cmng, 0, sizeof(struct cmng_struct_per_port)); /* * Congestion management values depend on the link rate. * There is no active link so initial link rate is set to 10Gbps. * When the link comes up the congestion management values are * re-calculated according to the actual link rate. */ sc->link_vars.line_speed = SPEED_10000; bxe_cmng_fns_init(sc, TRUE, bxe_get_cmng_fns_mode(sc)); /* Only the PMF sets the HW */ if (sc->port.pmf) { storm_memset_cmng(sc, &sc->cmng, SC_PORT(sc)); } /* init Event Queue - PCI bus guarantees correct endainity */ eq_data.base_addr.hi = U64_HI(sc->eq_dma.paddr); eq_data.base_addr.lo = U64_LO(sc->eq_dma.paddr); eq_data.producer = sc->eq_prod; eq_data.index_id = HC_SP_INDEX_EQ_CONS; eq_data.sb_id = DEF_SB_ID; storm_memset_eq_data(sc, &eq_data, SC_FUNC(sc)); } static void bxe_hc_int_enable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; if (msix) { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0); val |= (HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (single_msix) { val |= HC_CONFIG_0_REG_SINGLE_ISR_EN_0; } } else if (msi) { val &= ~HC_CONFIG_0_REG_INT_LINE_EN_0; val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val |= (HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); if (!CHIP_IS_E1(sc)) { BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); REG_WR(sc, addr, val); val &= ~HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0; } } if (CHIP_IS_E1(sc)) { REG_WR(sc, (HC_REG_INT_MASK + port*4), 0x1FFFF); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x) mode %s\n", val, port, addr, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, addr, val); /* ensure that HC_CONFIG is written before leading/trailing edge config */ mb(); if (!CHIP_IS_E1(sc)) { /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, (HC_REG_TRAILING_EDGE_0 + port*8), val); REG_WR(sc, (HC_REG_LEADING_EDGE_0 + port*8), val); } /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_igu_int_enable(struct bxe_softc *sc) { uint32_t val; uint8_t msix = (sc->interrupt_mode == INTR_MODE_MSIX) ? TRUE : FALSE; uint8_t single_msix = ((sc->interrupt_mode == INTR_MODE_MSIX) && (sc->intr_count == 1)) ? TRUE : FALSE; uint8_t msi = (sc->interrupt_mode == INTR_MODE_MSI) ? TRUE : FALSE; val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); if (msix) { val &= ~(IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_SINGLE_ISR_EN); val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN); if (single_msix) { val |= IGU_PF_CONF_SINGLE_ISR_EN; } } else if (msi) { val &= ~IGU_PF_CONF_INT_LINE_EN; val |= (IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } else { val &= ~IGU_PF_CONF_MSI_MSIX_EN; val |= (IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN | IGU_PF_CONF_SINGLE_ISR_EN); } /* clean previous status - need to configure igu prior to ack*/ if ((!msix) || single_msix) { REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); bxe_ack_int(sc); } val |= IGU_PF_CONF_FUNC_EN; BLOGD(sc, DBG_INTR, "write 0x%x to IGU mode %s\n", val, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx"))); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); mb(); /* init leading/trailing edge */ if (IS_MF(sc)) { val = (0xee0f | (1 << (SC_VN(sc) + 4))); if (sc->port.pmf) { /* enable nig and gpio3 attention */ val |= 0x1100; } } else { val = 0xffff; } REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, val); REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, val); /* make sure that interrupts are indeed enabled from here on */ mb(); } static void bxe_int_enable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_enable(sc); } else { bxe_igu_int_enable(sc); } } static void bxe_hc_int_disable(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t addr = (port) ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; uint32_t val = REG_RD(sc, addr); /* * In E1 we must use only PCI configuration space to disable MSI/MSIX * capablility. It's forbidden to disable IGU_PF_CONF_MSI_MSIX_EN in HC * block */ if (CHIP_IS_E1(sc)) { /* * Since IGU_PF_CONF_MSI_MSIX_EN still always on use mask register * to prevent from HC sending interrupts after we exit the function */ REG_WR(sc, (HC_REG_INT_MASK + port*4), 0); val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } else { val &= ~(HC_CONFIG_0_REG_SINGLE_ISR_EN_0 | HC_CONFIG_0_REG_MSI_MSIX_INT_EN_0 | HC_CONFIG_0_REG_INT_LINE_EN_0 | HC_CONFIG_0_REG_ATTN_BIT_EN_0); } BLOGD(sc, DBG_INTR, "write %x to HC %d (addr 0x%x)\n", val, port, addr); /* flush all outstanding writes */ mb(); REG_WR(sc, addr, val); if (REG_RD(sc, addr) != val) { BLOGE(sc, "proper val not read from HC IGU!\n"); } } static void bxe_igu_int_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~(IGU_PF_CONF_MSI_MSIX_EN | IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN); BLOGD(sc, DBG_INTR, "write %x to IGU\n", val); /* flush all outstanding writes */ mb(); REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); if (REG_RD(sc, IGU_REG_PF_CONFIGURATION) != val) { BLOGE(sc, "proper val not read from IGU!\n"); } } static void bxe_int_disable(struct bxe_softc *sc) { if (sc->devinfo.int_block == INT_BLOCK_HC) { bxe_hc_int_disable(sc); } else { bxe_igu_int_disable(sc); } } static void bxe_nic_init(struct bxe_softc *sc, int load_code) { int i; for (i = 0; i < sc->num_queues; i++) { bxe_init_eth_fp(sc, i); } rmb(); /* ensure status block indices were read */ bxe_init_rx_rings(sc); bxe_init_tx_rings(sc); if (IS_VF(sc)) { return; } /* initialize MOD_ABS interrupts */ elink_init_mod_abs_int(sc, &sc->link_vars, sc->devinfo.chip_id, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, SC_PORT(sc)); bxe_init_def_sb(sc); bxe_update_dsb_idx(sc); bxe_init_sp_ring(sc); bxe_init_eq_ring(sc); bxe_init_internal(sc, load_code); bxe_pf_init(sc); bxe_stats_init(sc); /* flush all before enabling interrupts */ mb(); bxe_int_enable(sc); /* check for SPIO5 */ bxe_attn_int_deasserted0(sc, REG_RD(sc, (MISC_REG_AEU_AFTER_INVERT_1_FUNC_0 + SC_PORT(sc)*4)) & AEU_INPUTS_ATTN_BITS_SPIO5); } static inline void bxe_init_objs(struct bxe_softc *sc) { /* mcast rules must be added to tx if tx switching is enabled */ ecore_obj_type o_type = (sc->flags & BXE_TX_SWITCHING) ? ECORE_OBJ_TYPE_RX_TX : ECORE_OBJ_TYPE_RX; /* RX_MODE controlling object */ ecore_init_rx_mode_obj(sc, &sc->rx_mode_obj); /* multicast configuration controlling object */ ecore_init_mcast_obj(sc, &sc->mcast_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, mcast_rdata), BXE_SP_MAPPING(sc, mcast_rdata), ECORE_FILTER_MCAST_PENDING, &sc->sp_state, o_type); /* Setup CAM credit pools */ ecore_init_mac_credit_pool(sc, &sc->macs_pool, SC_FUNC(sc), CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); ecore_init_vlan_credit_pool(sc, &sc->vlans_pool, SC_ABS_FUNC(sc) >> 1, CHIP_IS_E1x(sc) ? VNICS_PER_PORT(sc) : VNICS_PER_PATH(sc)); /* RSS configuration object */ ecore_init_rss_config_obj(sc, &sc->rss_conf_obj, sc->fp[0].cl_id, sc->fp[0].index, SC_FUNC(sc), SC_FUNC(sc), BXE_SP(sc, rss_rdata), BXE_SP_MAPPING(sc, rss_rdata), ECORE_FILTER_RSS_CONF_PENDING, &sc->sp_state, ECORE_OBJ_TYPE_RX); } /* * Initialize the function. This must be called before sending CLIENT_SETUP * for the first client. */ static inline int bxe_func_start(struct bxe_softc *sc) { struct ecore_func_state_params func_params = { NULL }; struct ecore_func_start_params *start_params = &func_params.params.start; /* Prepare parameters for function state transitions */ bit_set(&func_params.ramrod_flags, RAMROD_COMP_WAIT); func_params.f_obj = &sc->func_obj; func_params.cmd = ECORE_F_CMD_START; /* Function parameters */ start_params->mf_mode = sc->devinfo.mf_info.mf_mode; start_params->sd_vlan_tag = OVLAN(sc); if (CHIP_IS_E2(sc) || CHIP_IS_E3(sc)) { start_params->network_cos_mode = STATIC_COS; } else { /* CHIP_IS_E1X */ start_params->network_cos_mode = FW_WRR; } //start_params->gre_tunnel_mode = 0; //start_params->gre_tunnel_rss = 0; return (ecore_func_state_change(sc, &func_params)); } static int bxe_set_power_state(struct bxe_softc *sc, uint8_t state) { uint16_t pmcsr; /* If there is no power capability, silently succeed */ if (!(sc->devinfo.pcie_cap_flags & BXE_PM_CAPABLE_FLAG)) { BLOGW(sc, "No power capability\n"); return (0); } pmcsr = pci_read_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), 2); switch (state) { case PCI_PM_D0: pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), ((pmcsr & ~PCIM_PSTAT_DMASK) | PCIM_PSTAT_PME), 2); if (pmcsr & PCIM_PSTAT_DMASK) { /* delay required during transition out of D3hot */ DELAY(20000); } break; case PCI_PM_D3hot: /* XXX if there are other clients above don't shut down the power */ /* don't shut down the power for emulation and FPGA */ if (CHIP_REV_IS_SLOW(sc)) { return (0); } pmcsr &= ~PCIM_PSTAT_DMASK; pmcsr |= PCIM_PSTAT_D3; if (sc->wol) { pmcsr |= PCIM_PSTAT_PMEENABLE; } pci_write_config(sc->dev, (sc->devinfo.pcie_pm_cap_reg + PCIR_POWER_STATUS), pmcsr, 4); /* * No more memory access after this point until device is brought back * to D0 state. */ break; default: BLOGE(sc, "Can't support PCI power state = 0x%x pmcsr 0x%x\n", state, pmcsr); return (-1); } return (0); } /* return true if succeeded to acquire the lock */ static uint8_t bxe_trylock_hw_lock(struct bxe_softc *sc, uint32_t resource) { uint32_t lock_status; uint32_t resource_bit = (1 << resource); int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; BLOGD(sc, DBG_LOAD, "Trying to take a resource lock 0x%x\n", resource); /* Validating that the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { BLOGD(sc, DBG_LOAD, "resource(0x%x) > HW_LOCK_MAX_RESOURCE_VALUE(0x%x)\n", resource, HW_LOCK_MAX_RESOURCE_VALUE); return (FALSE); } if (func <= 5) { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_1 + func*8); } else { hw_lock_control_reg = (MISC_REG_DRIVER_CONTROL_7 + (func - 6)*8); } /* try to acquire the lock */ REG_WR(sc, hw_lock_control_reg + 4, resource_bit); lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { return (TRUE); } BLOGE(sc, "Failed to get a resource lock 0x%x func %d " "lock_status 0x%x resource_bit 0x%x\n", resource, func, lock_status, resource_bit); return (FALSE); } /* * Get the recovery leader resource id according to the engine this function * belongs to. Currently only only 2 engines is supported. */ static int bxe_get_leader_lock_resource(struct bxe_softc *sc) { if (SC_PATH(sc)) { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_1); } else { return (HW_LOCK_RESOURCE_RECOVERY_LEADER_0); } } /* try to acquire a leader lock for current engine */ static uint8_t bxe_trylock_leader_lock(struct bxe_softc *sc) { return (bxe_trylock_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } static int bxe_release_leader_lock(struct bxe_softc *sc) { return (bxe_release_hw_lock(sc, bxe_get_leader_lock_resource(sc))); } /* close gates #2, #3 and #4 */ static void bxe_set_234_gates(struct bxe_softc *sc, uint8_t close) { uint32_t val; /* gates #2 and #4a are closed/opened for "not E1" only */ if (!CHIP_IS_E1(sc)) { /* #4 */ REG_WR(sc, PXP_REG_HST_DISCARD_DOORBELLS, !!close); /* #2 */ REG_WR(sc, PXP_REG_HST_DISCARD_INTERNAL_WRITES, !!close); } /* #3 */ if (CHIP_IS_E1x(sc)) { /* prevent interrupts from HC on both ports */ val = REG_RD(sc, HC_REG_CONFIG_1); REG_WR(sc, HC_REG_CONFIG_1, (!close) ? (val | HC_CONFIG_1_REG_BLOCK_DISABLE_1) : (val & ~(uint32_t)HC_CONFIG_1_REG_BLOCK_DISABLE_1)); val = REG_RD(sc, HC_REG_CONFIG_0); REG_WR(sc, HC_REG_CONFIG_0, (!close) ? (val | HC_CONFIG_0_REG_BLOCK_DISABLE_0) : (val & ~(uint32_t)HC_CONFIG_0_REG_BLOCK_DISABLE_0)); } else { /* Prevent incoming interrupts in IGU */ val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, (!close) ? (val | IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE) : (val & ~(uint32_t)IGU_BLOCK_CONFIGURATION_REG_BLOCK_ENABLE)); } BLOGD(sc, DBG_LOAD, "%s gates #2, #3 and #4\n", close ? "closing" : "opening"); wmb(); } /* poll for pending writes bit, it should get cleared in no more than 1s */ static int bxe_er_poll_igu_vq(struct bxe_softc *sc) { uint32_t cnt = 1000; uint32_t pend_bits = 0; do { pend_bits = REG_RD(sc, IGU_REG_PENDING_BITS_STATUS); if (pend_bits == 0) { break; } DELAY(1000); } while (--cnt > 0); if (cnt == 0) { BLOGE(sc, "Still pending IGU requests bits=0x%08x!\n", pend_bits); return (-1); } return (0); } #define SHARED_MF_CLP_MAGIC 0x80000000 /* 'magic' bit */ static void bxe_clp_reset_prep(struct bxe_softc *sc, uint32_t *magic_val) { /* Do some magic... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); *magic_val = val & SHARED_MF_CLP_MAGIC; MFCFG_WR(sc, shared_mf_config.clp_mb, val | SHARED_MF_CLP_MAGIC); } /* restore the value of the 'magic' bit */ static void bxe_clp_reset_done(struct bxe_softc *sc, uint32_t magic_val) { /* Restore the 'magic' bit value... */ uint32_t val = MFCFG_RD(sc, shared_mf_config.clp_mb); MFCFG_WR(sc, shared_mf_config.clp_mb, (val & (~SHARED_MF_CLP_MAGIC)) | magic_val); } /* prepare for MCP reset, takes care of CLP configurations */ static void bxe_reset_mcp_prep(struct bxe_softc *sc, uint32_t *magic_val) { uint32_t shmem; uint32_t validity_offset; /* set `magic' bit in order to save MF config */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_prep(sc, magic_val); } /* get shmem offset */ shmem = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); validity_offset = offsetof(struct shmem_region, validity_map[SC_PORT(sc)]); /* Clear validity map flags */ if (shmem > 0) { REG_WR(sc, shmem + validity_offset, 0); } } #define MCP_TIMEOUT 5000 /* 5 seconds (in ms) */ #define MCP_ONE_TIMEOUT 100 /* 100 ms */ static void bxe_mcp_wait_one(struct bxe_softc *sc) { /* special handling for emulation and FPGA (10 times longer) */ if (CHIP_REV_IS_SLOW(sc)) { DELAY((MCP_ONE_TIMEOUT*10) * 1000); } else { DELAY((MCP_ONE_TIMEOUT) * 1000); } } /* initialize shmem_base and waits for validity signature to appear */ static int bxe_init_shmem(struct bxe_softc *sc) { int cnt = 0; uint32_t val = 0; do { sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); if (sc->devinfo.shmem_base) { val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if (val & SHR_MEM_VALIDITY_MB) return (0); } bxe_mcp_wait_one(sc); } while (cnt++ < (MCP_TIMEOUT / MCP_ONE_TIMEOUT)); BLOGE(sc, "BAD MCP validity signature\n"); return (-1); } static int bxe_reset_mcp_comp(struct bxe_softc *sc, uint32_t magic_val) { int rc = bxe_init_shmem(sc); /* Restore the `magic' bit value */ if (!CHIP_IS_E1(sc)) { bxe_clp_reset_done(sc, magic_val); } return (rc); } static void bxe_pxp_prep(struct bxe_softc *sc) { if (!CHIP_IS_E1(sc)) { REG_WR(sc, PXP2_REG_RD_START_INIT, 0); REG_WR(sc, PXP2_REG_RQ_RBC_DONE, 0); wmb(); } } /* * Reset the whole chip except for: * - PCIE core * - PCI Glue, PSWHST, PXP/PXP2 RF (all controlled by one reset bit) * - IGU * - MISC (including AEU) * - GRC * - RBCN, RBCP */ static void bxe_process_kill_chip_reset(struct bxe_softc *sc, uint8_t global) { uint32_t not_reset_mask1, reset_mask1, not_reset_mask2, reset_mask2; uint32_t global_bits2, stay_reset2; /* * Bits that have to be set in reset_mask2 if we want to reset 'global' * (per chip) blocks. */ global_bits2 = MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CPU | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_CMN_CORE; /* * Don't reset the following blocks. * Important: per port blocks (such as EMAC, BMAC, UMAC) can't be * reset, as in 4 port device they might still be owned * by the MCP (there is only one leader per path). */ not_reset_mask1 = MISC_REGISTERS_RESET_REG_1_RST_HC | MISC_REGISTERS_RESET_REG_1_RST_PXPV | MISC_REGISTERS_RESET_REG_1_RST_PXP; not_reset_mask2 = MISC_REGISTERS_RESET_REG_2_RST_PCI_MDIO | MISC_REGISTERS_RESET_REG_2_RST_EMAC0_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_EMAC1_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MISC_CORE | MISC_REGISTERS_RESET_REG_2_RST_RBCN | MISC_REGISTERS_RESET_REG_2_RST_GRC | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_RESET_REG_HARD_CORE | MISC_REGISTERS_RESET_REG_2_RST_MCP_N_HARD_CORE_RST_B | MISC_REGISTERS_RESET_REG_2_RST_ATC | MISC_REGISTERS_RESET_REG_2_PGLC | MISC_REGISTERS_RESET_REG_2_RST_BMAC0 | MISC_REGISTERS_RESET_REG_2_RST_BMAC1 | MISC_REGISTERS_RESET_REG_2_RST_EMAC0 | MISC_REGISTERS_RESET_REG_2_RST_EMAC1 | MISC_REGISTERS_RESET_REG_2_UMAC0 | MISC_REGISTERS_RESET_REG_2_UMAC1; /* * Keep the following blocks in reset: * - all xxMACs are handled by the elink code. */ stay_reset2 = MISC_REGISTERS_RESET_REG_2_XMAC | MISC_REGISTERS_RESET_REG_2_XMAC_SOFT; /* Full reset masks according to the chip */ reset_mask1 = 0xffffffff; if (CHIP_IS_E1(sc)) reset_mask2 = 0xffff; else if (CHIP_IS_E1H(sc)) reset_mask2 = 0x1ffff; else if (CHIP_IS_E2(sc)) reset_mask2 = 0xfffff; else /* CHIP_IS_E3 */ reset_mask2 = 0x3ffffff; /* Don't reset global blocks unless we need to */ if (!global) reset_mask2 &= ~global_bits2; /* * In case of attention in the QM, we need to reset PXP * (MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR) before QM * because otherwise QM reset would release 'close the gates' shortly * before resetting the PXP, then the PSWRQ would send a write * request to PGLUE. Then when PXP is reset, PGLUE would try to * read the payload data from PSWWR, but PSWWR would not * respond. The write queue in PGLUE would stuck, dmae commands * would not return. Therefore it's important to reset the second * reset register (containing the * MISC_REGISTERS_RESET_REG_2_RST_PXP_RQ_RD_WR bit) before the * first one (containing the MISC_REGISTERS_RESET_REG_1_RST_QM * bit). */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, reset_mask2 & (~not_reset_mask2)); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, reset_mask1 & (~not_reset_mask1)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, reset_mask2 & (~stay_reset2)); mb(); wmb(); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1); wmb(); } static int bxe_process_kill(struct bxe_softc *sc, uint8_t global) { int cnt = 1000; uint32_t val = 0; uint32_t sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2; uint32_t tags_63_32 = 0; /* Empty the Tetris buffer, wait for 1s */ do { sr_cnt = REG_RD(sc, PXP2_REG_RD_SR_CNT); blk_cnt = REG_RD(sc, PXP2_REG_RD_BLK_CNT); port_is_idle_0 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_0); port_is_idle_1 = REG_RD(sc, PXP2_REG_RD_PORT_IS_IDLE_1); pgl_exp_rom2 = REG_RD(sc, PXP2_REG_PGL_EXP_ROM2); if (CHIP_IS_E3(sc)) { tags_63_32 = REG_RD(sc, PGLUE_B_REG_TAGS_63_32); } if ((sr_cnt == 0x7e) && (blk_cnt == 0xa0) && ((port_is_idle_0 & 0x1) == 0x1) && ((port_is_idle_1 & 0x1) == 0x1) && (pgl_exp_rom2 == 0xffffffff) && (!CHIP_IS_E3(sc) || (tags_63_32 == 0xffffffff))) break; DELAY(1000); } while (cnt-- > 0); if (cnt <= 0) { BLOGE(sc, "ERROR: Tetris buffer didn't get empty or there " "are still outstanding read requests after 1s! " "sr_cnt=0x%08x, blk_cnt=0x%08x, port_is_idle_0=0x%08x, " "port_is_idle_1=0x%08x, pgl_exp_rom2=0x%08x\n", sr_cnt, blk_cnt, port_is_idle_0, port_is_idle_1, pgl_exp_rom2); return (-1); } mb(); /* Close gates #2, #3 and #4 */ bxe_set_234_gates(sc, TRUE); /* Poll for IGU VQs for 57712 and newer chips */ if (!CHIP_IS_E1x(sc) && bxe_er_poll_igu_vq(sc)) { return (-1); } /* XXX indicate that "process kill" is in progress to MCP */ /* clear "unprepared" bit */ REG_WR(sc, MISC_REG_UNPREPARED, 0); mb(); /* Make sure all is written to the chip before the reset */ wmb(); /* * Wait for 1ms to empty GLUE and PCI-E core queues, * PSWHST, GRC and PSWRD Tetris buffer. */ DELAY(1000); /* Prepare to chip reset: */ /* MCP */ if (global) { bxe_reset_mcp_prep(sc, &val); } /* PXP */ bxe_pxp_prep(sc); mb(); /* reset the chip */ bxe_process_kill_chip_reset(sc, global); mb(); /* clear errors in PGB */ if (!CHIP_IS_E1(sc)) REG_WR(sc, PGLUE_B_REG_LATCHED_ERRORS_CLR, 0x7f); /* Recover after reset: */ /* MCP */ if (global && bxe_reset_mcp_comp(sc, val)) { return (-1); } /* XXX add resetting the NO_MCP mode DB here */ /* Open the gates #2, #3 and #4 */ bxe_set_234_gates(sc, FALSE); /* XXX * IGU/AEU preparation bring back the AEU/IGU to a reset state * re-enable attentions */ return (0); } static int bxe_leader_reset(struct bxe_softc *sc) { int rc = 0; uint8_t global = bxe_reset_is_global(sc); uint32_t load_code; /* * If not going to reset MCP, load "fake" driver to reset HW while * driver is owner of the HW. */ if (!global && !BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_REQ, DRV_MSG_CODE_LOAD_REQ_WITH_LFA); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset; } if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) && (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) { BLOGE(sc, "MCP unexpected response, aborting\n"); rc = -1; goto exit_leader_reset2; } load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; goto exit_leader_reset2; } } /* try to recover after the failure */ if (bxe_process_kill(sc, global)) { BLOGE(sc, "Something bad occurred on engine %d!\n", SC_PATH(sc)); rc = -1; goto exit_leader_reset2; } /* * Clear the RESET_IN_PROGRESS and RESET_GLOBAL bits and update the driver * state. */ bxe_set_reset_done(sc); if (global) { bxe_clear_reset_global(sc); } exit_leader_reset2: /* unload "fake driver" if it was loaded */ if (!global && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } exit_leader_reset: sc->is_leader = 0; bxe_release_leader_lock(sc); mb(); return (rc); } /* * prepare INIT transition, parameters configured: * - HC configuration * - Queue's CDU context */ static void bxe_pf_q_prep_init(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_queue_init_params *init_params) { uint8_t cos; int cxt_index, cxt_offset; bxe_set_bit(ECORE_Q_FLG_HC, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC, &init_params->tx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->rx.flags); bxe_set_bit(ECORE_Q_FLG_HC_EN, &init_params->tx.flags); /* HC rate */ init_params->rx.hc_rate = sc->hc_rx_ticks ? (1000000 / sc->hc_rx_ticks) : 0; init_params->tx.hc_rate = sc->hc_tx_ticks ? (1000000 / sc->hc_tx_ticks) : 0; /* FW SB ID */ init_params->rx.fw_sb_id = init_params->tx.fw_sb_id = fp->fw_sb_id; /* CQ index among the SB indices */ init_params->rx.sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; init_params->tx.sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS; /* set maximum number of COSs supported by this queue */ init_params->max_cos = sc->max_cos; BLOGD(sc, DBG_LOAD, "fp %d setting queue params max cos to %d\n", fp->index, init_params->max_cos); /* set the context pointers queue object */ for (cos = FIRST_TX_COS_INDEX; cos < init_params->max_cos; cos++) { /* XXX change index/cid here if ever support multiple tx CoS */ /* fp->txdata[cos]->cid */ cxt_index = fp->index / ILT_PAGE_CIDS; cxt_offset = fp->index - (cxt_index * ILT_PAGE_CIDS); init_params->cxts[cos] = &sc->context[cxt_index].vcxt[cxt_offset].eth; } } /* set flags that are common for the Tx-only and not normal connections */ static unsigned long bxe_get_common_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t zero_stats) { unsigned long flags = 0; /* PF driver will always initialize the Queue to an ACTIVE state */ bxe_set_bit(ECORE_Q_FLG_ACTIVE, &flags); /* * tx only connections collect statistics (on the same index as the * parent connection). The statistics are zeroed when the parent * connection is initialized. */ bxe_set_bit(ECORE_Q_FLG_STATS, &flags); if (zero_stats) { bxe_set_bit(ECORE_Q_FLG_ZERO_STATS, &flags); } /* * tx only connections can support tx-switching, though their * CoS-ness doesn't survive the loopback */ if (sc->flags & BXE_TX_SWITCHING) { bxe_set_bit(ECORE_Q_FLG_TX_SWITCH, &flags); } bxe_set_bit(ECORE_Q_FLG_PCSUM_ON_PKT, &flags); return (flags); } static unsigned long bxe_get_q_flags(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { unsigned long flags = 0; if (IS_MF_SD(sc)) { bxe_set_bit(ECORE_Q_FLG_OV, &flags); } if (if_getcapenable(sc->ifp) & IFCAP_LRO) { bxe_set_bit(ECORE_Q_FLG_TPA, &flags); #if __FreeBSD_version >= 800000 bxe_set_bit(ECORE_Q_FLG_TPA_IPV6, &flags); #endif } if (leading) { bxe_set_bit(ECORE_Q_FLG_LEADING_RSS, &flags); bxe_set_bit(ECORE_Q_FLG_MCAST, &flags); } bxe_set_bit(ECORE_Q_FLG_VLAN, &flags); /* merge with common flags */ return (flags | bxe_get_common_flags(sc, fp, TRUE)); } static void bxe_pf_q_prep_general(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_general_setup_params *gen_init, uint8_t cos) { gen_init->stat_id = bxe_stats_id(fp); gen_init->spcl_id = fp->cl_id; gen_init->mtu = sc->mtu; gen_init->cos = cos; } static void bxe_pf_rx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct rxq_pause_params *pause, struct ecore_rxq_setup_params *rxq_init) { uint8_t max_sge = 0; uint16_t sge_sz = 0; uint16_t tpa_agg_size = 0; pause->sge_th_lo = SGE_TH_LO(sc); pause->sge_th_hi = SGE_TH_HI(sc); /* validate SGE ring has enough to cross high threshold */ if (sc->dropless_fc && (pause->sge_th_hi + FW_PREFETCH_CNT) > (RX_SGE_USABLE_PER_PAGE * RX_SGE_NUM_PAGES)) { BLOGW(sc, "sge ring threshold limit\n"); } /* minimum max_aggregation_size is 2*MTU (two full buffers) */ tpa_agg_size = (2 * sc->mtu); if (tpa_agg_size < sc->max_aggregation_size) { tpa_agg_size = sc->max_aggregation_size; } max_sge = SGE_PAGE_ALIGN(sc->mtu) >> SGE_PAGE_SHIFT; max_sge = ((max_sge + PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >> PAGES_PER_SGE_SHIFT; sge_sz = (uint16_t)min(SGE_PAGES, 0xffff); /* pause - not for e1 */ if (!CHIP_IS_E1(sc)) { pause->bd_th_lo = BD_TH_LO(sc); pause->bd_th_hi = BD_TH_HI(sc); pause->rcq_th_lo = RCQ_TH_LO(sc); pause->rcq_th_hi = RCQ_TH_HI(sc); /* validate rings have enough entries to cross high thresholds */ if (sc->dropless_fc && pause->bd_th_hi + FW_PREFETCH_CNT > sc->rx_ring_size) { BLOGW(sc, "rx bd ring threshold limit\n"); } if (sc->dropless_fc && pause->rcq_th_hi + FW_PREFETCH_CNT > RCQ_NUM_PAGES * RCQ_USABLE_PER_PAGE) { BLOGW(sc, "rcq ring threshold limit\n"); } pause->pri_map = 1; } /* rxq setup */ rxq_init->dscr_map = fp->rx_dma.paddr; rxq_init->sge_map = fp->rx_sge_dma.paddr; rxq_init->rcq_map = fp->rcq_dma.paddr; rxq_init->rcq_np_map = (fp->rcq_dma.paddr + BCM_PAGE_SIZE); /* * This should be a maximum number of data bytes that may be * placed on the BD (not including paddings). */ rxq_init->buf_sz = (fp->rx_buf_size - IP_HEADER_ALIGNMENT_PADDING); rxq_init->cl_qzone_id = fp->cl_qzone_id; rxq_init->tpa_agg_sz = tpa_agg_size; rxq_init->sge_buf_sz = sge_sz; rxq_init->max_sges_pkt = max_sge; rxq_init->rss_engine_id = SC_FUNC(sc); rxq_init->mcast_engine_id = SC_FUNC(sc); /* * Maximum number or simultaneous TPA aggregation for this Queue. * For PF Clients it should be the maximum available number. * VF driver(s) may want to define it to a smaller value. */ rxq_init->max_tpa_queues = MAX_AGG_QS(sc); rxq_init->cache_line_log = BXE_RX_ALIGN_SHIFT; rxq_init->fw_sb_id = fp->fw_sb_id; rxq_init->sb_cq_index = HC_INDEX_ETH_RX_CQ_CONS; /* * configure silent vlan removal * if multi function mode is afex, then mask default vlan */ if (IS_MF_AFEX(sc)) { rxq_init->silent_removal_value = sc->devinfo.mf_info.afex_def_vlan_tag; rxq_init->silent_removal_mask = EVL_VLID_MASK; } } static void bxe_pf_tx_q_prep(struct bxe_softc *sc, struct bxe_fastpath *fp, struct ecore_txq_setup_params *txq_init, uint8_t cos) { /* * XXX If multiple CoS is ever supported then each fastpath structure * will need to maintain tx producer/consumer/dma/etc values *per* CoS. * fp->txdata[cos]->tx_dma.paddr; */ txq_init->dscr_map = fp->tx_dma.paddr; txq_init->sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS + cos; txq_init->traffic_type = LLFC_TRAFFIC_TYPE_NW; txq_init->fw_sb_id = fp->fw_sb_id; /* * set the TSS leading client id for TX classfication to the * leading RSS client id */ txq_init->tss_leading_cl_id = BXE_FP(sc, 0, cl_id); } /* * This function performs 2 steps in a queue state machine: * 1) RESET->INIT * 2) INIT->SETUP */ static int bxe_setup_queue(struct bxe_softc *sc, struct bxe_fastpath *fp, uint8_t leading) { struct ecore_queue_state_params q_params = { NULL }; struct ecore_queue_setup_params *setup_params = &q_params.params.setup; int rc; BLOGD(sc, DBG_LOAD, "setting up queue %d\n", fp->index); bxe_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); q_params.q_obj = &BXE_SP_OBJ(sc, fp).q_obj; /* we want to wait for completion in this context */ bxe_set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); /* prepare the INIT parameters */ bxe_pf_q_prep_init(sc, fp, &q_params.params.init); /* Set the command */ q_params.cmd = ECORE_Q_CMD_INIT; /* Change the state to INIT */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) INIT failed rc = %d\n", fp->index, rc); return (rc); } BLOGD(sc, DBG_LOAD, "init complete\n"); /* now move the Queue to the SETUP state */ memset(setup_params, 0, sizeof(*setup_params)); /* set Queue flags */ setup_params->flags = bxe_get_q_flags(sc, fp, leading); /* set general SETUP parameters */ bxe_pf_q_prep_general(sc, fp, &setup_params->gen_params, FIRST_TX_COS_INDEX); bxe_pf_rx_q_prep(sc, fp, &setup_params->pause_params, &setup_params->rxq_params); bxe_pf_tx_q_prep(sc, fp, &setup_params->txq_params, FIRST_TX_COS_INDEX); /* Set the command */ q_params.cmd = ECORE_Q_CMD_SETUP; /* change the state to SETUP */ rc = ecore_queue_state_change(sc, &q_params); if (rc) { BLOGE(sc, "Queue(%d) SETUP failed (rc = %d)\n", fp->index, rc); return (rc); } return (rc); } static int bxe_setup_leading(struct bxe_softc *sc) { return (bxe_setup_queue(sc, &sc->fp[0], TRUE)); } static int bxe_config_rss_pf(struct bxe_softc *sc, struct ecore_rss_config_obj *rss_obj, uint8_t config_hash) { struct ecore_config_rss_params params = { NULL }; int i; /* * Although RSS is meaningless when there is a single HW queue we * still need it enabled in order to have HW Rx hash generated. */ params.rss_obj = rss_obj; bxe_set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags); bxe_set_bit(ECORE_RSS_MODE_REGULAR, ¶ms.rss_flags); /* RSS configuration */ bxe_set_bit(ECORE_RSS_IPV4, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV4_TCP, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6, ¶ms.rss_flags); bxe_set_bit(ECORE_RSS_IPV6_TCP, ¶ms.rss_flags); if (rss_obj->udp_rss_v4) { bxe_set_bit(ECORE_RSS_IPV4_UDP, ¶ms.rss_flags); } if (rss_obj->udp_rss_v6) { bxe_set_bit(ECORE_RSS_IPV6_UDP, ¶ms.rss_flags); } /* Hash bits */ params.rss_result_mask = MULTI_MASK; memcpy(params.ind_table, rss_obj->ind_table, sizeof(params.ind_table)); if (config_hash) { /* RSS keys */ for (i = 0; i < sizeof(params.rss_key) / 4; i++) { params.rss_key[i] = arc4random(); } bxe_set_bit(ECORE_RSS_SET_SRCH, ¶ms.rss_flags); } return (ecore_config_rss(sc, ¶ms)); } static int bxe_config_rss_eth(struct bxe_softc *sc, uint8_t config_hash) { return (bxe_config_rss_pf(sc, &sc->rss_conf_obj, config_hash)); } static int bxe_init_rss_pf(struct bxe_softc *sc) { uint8_t num_eth_queues = BXE_NUM_ETH_QUEUES(sc); int i; /* * Prepare the initial contents of the indirection table if * RSS is enabled */ for (i = 0; i < sizeof(sc->rss_conf_obj.ind_table); i++) { sc->rss_conf_obj.ind_table[i] = (sc->fp->cl_id + (i % num_eth_queues)); } if (sc->udp_rss) { sc->rss_conf_obj.udp_rss_v4 = sc->rss_conf_obj.udp_rss_v6 = 1; } /* * For 57710 and 57711 SEARCHER configuration (rss_keys) is * per-port, so if explicit configuration is needed, do it only * for a PMF. * * For 57712 and newer it's a per-function configuration. */ return (bxe_config_rss_eth(sc, sc->port.pmf || !CHIP_IS_E1x(sc))); } static int bxe_set_mac_one(struct bxe_softc *sc, uint8_t *mac, struct ecore_vlan_mac_obj *obj, uint8_t set, int mac_type, unsigned long *ramrod_flags) { struct ecore_vlan_mac_ramrod_params ramrod_param; int rc; memset(&ramrod_param, 0, sizeof(ramrod_param)); /* fill in general parameters */ ramrod_param.vlan_mac_obj = obj; ramrod_param.ramrod_flags = *ramrod_flags; /* fill a user request section if needed */ if (!bxe_test_bit(RAMROD_CONT, ramrod_flags)) { memcpy(ramrod_param.user_req.u.mac.mac, mac, ETH_ALEN); bxe_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags); /* Set the command: ADD or DEL */ ramrod_param.user_req.cmd = (set) ? ECORE_VLAN_MAC_ADD : ECORE_VLAN_MAC_DEL; } rc = ecore_config_vlan_mac(sc, &ramrod_param); if (rc == ECORE_EXISTS) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "%s MAC failed (%d)\n", (set ? "Set" : "Delete"), rc); } return (rc); } static int bxe_set_eth_mac(struct bxe_softc *sc, uint8_t set) { unsigned long ramrod_flags = 0; BLOGD(sc, DBG_LOAD, "Adding Ethernet MAC\n"); bxe_set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Eth MAC is set on RSS leading client (fp[0]) */ return (bxe_set_mac_one(sc, sc->link_params.mac_addr, &sc->sp_objs->mac_obj, set, ECORE_ETH_MAC, &ramrod_flags)); } static int bxe_get_cur_phy_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = 0; if (sc->link_params.num_phys <= 1) { return (ELINK_INT_PHY); } if (sc->link_vars.link_up) { sel_phy_idx = ELINK_EXT_PHY1; /* In case link is SERDES, check if the ELINK_EXT_PHY2 is the one */ if ((sc->link_vars.link_status & LINK_STATUS_SERDES_LINK) && (sc->link_params.phy[ELINK_EXT_PHY2].supported & ELINK_SUPPORTED_FIBRE)) sel_phy_idx = ELINK_EXT_PHY2; } else { switch (elink_phy_selection(&sc->link_params)) { case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY1; break; case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY: case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY: sel_phy_idx = ELINK_EXT_PHY2; break; } } return (sel_phy_idx); } static int bxe_get_link_cfg_idx(struct bxe_softc *sc) { uint32_t sel_phy_idx = bxe_get_cur_phy_idx(sc); /* * The selected activated PHY is always after swapping (in case PHY * swapping is enabled). So when swapping is enabled, we need to reverse * the configuration */ if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { if (sel_phy_idx == ELINK_EXT_PHY1) sel_phy_idx = ELINK_EXT_PHY2; else if (sel_phy_idx == ELINK_EXT_PHY2) sel_phy_idx = ELINK_EXT_PHY1; } return (ELINK_LINK_CONFIG_IDX(sel_phy_idx)); } static void bxe_set_requested_fc(struct bxe_softc *sc) { /* * Initialize link parameters structure variables * It is recommended to turn off RX FC for jumbo frames * for better performance */ if (CHIP_IS_E1x(sc) && (sc->mtu > 5000)) { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_TX; } else { sc->link_params.req_fc_auto_adv = ELINK_FLOW_CTRL_BOTH; } } static void bxe_calc_fc_adv(struct bxe_softc *sc) { uint8_t cfg_idx = bxe_get_link_cfg_idx(sc); sc->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | ADVERTISED_Pause); switch (sc->link_vars.ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) { case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH: sc->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause | ADVERTISED_Pause); break; case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC: sc->port.advertising[cfg_idx] |= ADVERTISED_Asym_Pause; break; default: break; } } static uint16_t bxe_get_mf_speed(struct bxe_softc *sc) { uint16_t line_speed = sc->link_vars.line_speed; if (IS_MF(sc)) { uint16_t maxCfg = bxe_extract_max_cfg(sc, sc->devinfo.mf_info.mf_config[SC_VN(sc)]); /* calculate the current MAX line speed limit for the MF devices */ if (IS_MF_SI(sc)) { line_speed = (line_speed * maxCfg) / 100; } else { /* SD mode */ uint16_t vn_max_rate = maxCfg * 100; if (vn_max_rate < line_speed) { line_speed = vn_max_rate; } } } return (line_speed); } static void bxe_fill_report_data(struct bxe_softc *sc, struct bxe_link_report_data *data) { uint16_t line_speed = bxe_get_mf_speed(sc); memset(data, 0, sizeof(*data)); /* fill the report data with the effective line speed */ data->line_speed = line_speed; /* Link is down */ if (!sc->link_vars.link_up || (sc->flags & BXE_MF_FUNC_DIS)) { bxe_set_bit(BXE_LINK_REPORT_LINK_DOWN, &data->link_report_flags); } /* Full DUPLEX */ if (sc->link_vars.duplex == DUPLEX_FULL) { bxe_set_bit(BXE_LINK_REPORT_FULL_DUPLEX, &data->link_report_flags); } /* Rx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_RX) { bxe_set_bit(BXE_LINK_REPORT_RX_FC_ON, &data->link_report_flags); } /* Tx Flow Control is ON */ if (sc->link_vars.flow_ctrl & ELINK_FLOW_CTRL_TX) { bxe_set_bit(BXE_LINK_REPORT_TX_FC_ON, &data->link_report_flags); } } /* report link status to OS, should be called under phy_lock */ static void bxe_link_report_locked(struct bxe_softc *sc) { struct bxe_link_report_data cur_data; /* reread mf_cfg */ if (IS_PF(sc) && !CHIP_IS_E1(sc)) { bxe_read_mf_cfg(sc); } /* Read the current link report info */ bxe_fill_report_data(sc, &cur_data); /* Don't report link down or exactly the same link status twice */ if (!memcmp(&cur_data, &sc->last_reported_link, sizeof(cur_data)) || (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &sc->last_reported_link.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags))) { return; } ELINK_DEBUG_P2(sc, "Change in link status : cur_data = %x, last_reported_link = %x\n", cur_data.link_report_flags, sc->last_reported_link.link_report_flags); sc->link_cnt++; ELINK_DEBUG_P1(sc, "link status change count = %x\n", sc->link_cnt); /* report new link params and remember the state for the next time */ memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data)); if (bxe_test_bit(BXE_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags)) { if_link_state_change(sc->ifp, LINK_STATE_DOWN); } else { const char *duplex; const char *flow; if (bxe_test_and_clear_bit(BXE_LINK_REPORT_FULL_DUPLEX, &cur_data.link_report_flags)) { duplex = "full"; ELINK_DEBUG_P0(sc, "link set to full duplex\n"); } else { duplex = "half"; ELINK_DEBUG_P0(sc, "link set to half duplex\n"); } /* * Handle the FC at the end so that only these flags would be * possibly set. This way we may easily check if there is no FC * enabled. */ if (cur_data.link_report_flags) { if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive & transmit"; } else if (bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && !bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - receive"; } else if (!bxe_test_bit(BXE_LINK_REPORT_RX_FC_ON, &cur_data.link_report_flags) && bxe_test_bit(BXE_LINK_REPORT_TX_FC_ON, &cur_data.link_report_flags)) { flow = "ON - transmit"; } else { flow = "none"; /* possible? */ } } else { flow = "none"; } if_link_state_change(sc->ifp, LINK_STATE_UP); BLOGI(sc, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n", cur_data.line_speed, duplex, flow); } } static void bxe_link_report(struct bxe_softc *sc) { bxe_acquire_phy_lock(sc); bxe_link_report_locked(sc); bxe_release_phy_lock(sc); } static void bxe_link_status_update(struct bxe_softc *sc) { if (sc->state != BXE_STATE_OPEN) { return; } if (IS_PF(sc) && !CHIP_REV_IS_SLOW(sc)) { elink_link_status_update(&sc->link_params, &sc->link_vars); } else { sc->port.supported[0] |= (ELINK_SUPPORTED_10baseT_Half | ELINK_SUPPORTED_10baseT_Full | ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full | ELINK_SUPPORTED_1000baseT_Full | ELINK_SUPPORTED_2500baseX_Full | ELINK_SUPPORTED_10000baseT_Full | ELINK_SUPPORTED_TP | ELINK_SUPPORTED_FIBRE | ELINK_SUPPORTED_Autoneg | ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause); sc->port.advertising[0] = sc->port.supported[0]; sc->link_params.sc = sc; sc->link_params.port = SC_PORT(sc); sc->link_params.req_duplex[0] = DUPLEX_FULL; sc->link_params.req_flow_ctrl[0] = ELINK_FLOW_CTRL_NONE; sc->link_params.req_line_speed[0] = SPEED_10000; sc->link_params.speed_cap_mask[0] = 0x7f0000; sc->link_params.switch_cfg = ELINK_SWITCH_CFG_10G; if (CHIP_REV_IS_FPGA(sc)) { sc->link_vars.mac_type = ELINK_MAC_TYPE_EMAC; sc->link_vars.line_speed = ELINK_SPEED_1000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_1000TFD); } else { sc->link_vars.mac_type = ELINK_MAC_TYPE_BMAC; sc->link_vars.line_speed = ELINK_SPEED_10000; sc->link_vars.link_status = (LINK_STATUS_LINK_UP | LINK_STATUS_SPEED_AND_DUPLEX_10GTFD); } sc->link_vars.link_up = 1; sc->link_vars.duplex = DUPLEX_FULL; sc->link_vars.flow_ctrl = ELINK_FLOW_CTRL_NONE; if (IS_PF(sc)) { REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + sc->link_params.port*4, 0); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } } if (IS_PF(sc)) { if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } else { bxe_stats_handle(sc, STATS_EVENT_STOP); } bxe_link_report(sc); } else { bxe_link_report(sc); bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } } static int bxe_initial_phy_init(struct bxe_softc *sc, int load_mode) { int rc, cfg_idx = bxe_get_link_cfg_idx(sc); uint16_t req_line_speed = sc->link_params.req_line_speed[cfg_idx]; struct elink_params *lp = &sc->link_params; bxe_set_requested_fc(sc); if (CHIP_REV_IS_SLOW(sc)) { uint32_t bond = CHIP_BOND_ID(sc); uint32_t feat = 0; if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } else if (bond & 0x4) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; } } else if (bond & 0x8) { if (CHIP_IS_E3(sc)) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC; } else { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } } /* disable EMAC for E3 and above */ if (bond & 0x2) { feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; } sc->link_params.feature_config_flags |= feat; } bxe_acquire_phy_lock(sc); if (load_mode == LOAD_DIAG) { lp->loopback_mode = ELINK_LOOPBACK_XGXS; /* Prefer doing PHY loopback at 10G speed, if possible */ if (lp->req_line_speed[cfg_idx] < ELINK_SPEED_10000) { if (lp->speed_cap_mask[cfg_idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { lp->req_line_speed[cfg_idx] = ELINK_SPEED_10000; } else { lp->req_line_speed[cfg_idx] = ELINK_SPEED_1000; } } } if (load_mode == LOAD_LOOPBACK_EXT) { lp->loopback_mode = ELINK_LOOPBACK_EXT; } rc = elink_phy_init(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); bxe_calc_fc_adv(sc); if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_LINK_UP); bxe_link_report(sc); } if (!CHIP_REV_IS_SLOW(sc)) { bxe_periodic_start(sc); } sc->link_params.req_line_speed[cfg_idx] = req_line_speed; return (rc); } /* must be called under IF_ADDR_LOCK */ static int bxe_init_mcast_macs_list(struct bxe_softc *sc, struct ecore_mcast_ramrod_params *p) { if_t ifp = sc->ifp; int mc_count = 0; struct ifmultiaddr *ifma; struct ecore_mcast_list_elem *mc_mac; CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) { continue; } mc_count++; } ECORE_LIST_INIT(&p->mcast_list); p->mcast_list_len = 0; if (!mc_count) { return (0); } mc_mac = malloc(sizeof(*mc_mac) * mc_count, M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!mc_mac) { BLOGE(sc, "Failed to allocate temp mcast list\n"); return (-1); } bzero(mc_mac, (sizeof(*mc_mac) * mc_count)); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) { continue; } mc_mac->mac = (uint8_t *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); ECORE_LIST_PUSH_TAIL(&mc_mac->link, &p->mcast_list); BLOGD(sc, DBG_LOAD, "Setting MCAST %02X:%02X:%02X:%02X:%02X:%02X and mc_count %d\n", mc_mac->mac[0], mc_mac->mac[1], mc_mac->mac[2], mc_mac->mac[3], mc_mac->mac[4], mc_mac->mac[5], mc_count); mc_mac++; } p->mcast_list_len = mc_count; return (0); } static void bxe_free_mcast_macs_list(struct ecore_mcast_ramrod_params *p) { struct ecore_mcast_list_elem *mc_mac = ECORE_LIST_FIRST_ENTRY(&p->mcast_list, struct ecore_mcast_list_elem, link); if (mc_mac) { /* only a single free as all mc_macs are in the same heap array */ free(mc_mac, M_DEVBUF); } } static int bxe_set_mc_list(struct bxe_softc *sc) { struct ecore_mcast_ramrod_params rparam = { NULL }; int rc = 0; rparam.mcast_obj = &sc->mcast_obj; BXE_MCAST_LOCK(sc); /* first, clear all configured multicast MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL); if (rc < 0) { BLOGE(sc, "Failed to clear multicast configuration: %d\n", rc); /* Manual backport parts of FreeBSD upstream r284470. */ BXE_MCAST_UNLOCK(sc); return (rc); } /* configure a new MACs list */ rc = bxe_init_mcast_macs_list(sc, &rparam); if (rc) { BLOGE(sc, "Failed to create mcast MACs list (%d)\n", rc); BXE_MCAST_UNLOCK(sc); return (rc); } /* Now add the new MACs */ rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_ADD); if (rc < 0) { BLOGE(sc, "Failed to set new mcast config (%d)\n", rc); } bxe_free_mcast_macs_list(&rparam); BXE_MCAST_UNLOCK(sc); return (rc); } static int bxe_set_uc_list(struct bxe_softc *sc) { if_t ifp = sc->ifp; struct ecore_vlan_mac_obj *mac_obj = &sc->sp_objs->mac_obj; struct ifaddr *ifa; unsigned long ramrod_flags = 0; int rc; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_addr_rlock(ifp); #endif /* first schedule a cleanup up of old configuration */ rc = bxe_del_all_macs(sc, mac_obj, ECORE_UC_LIST_MAC, FALSE); if (rc < 0) { BLOGE(sc, "Failed to schedule delete of all ETH MACs (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = if_getifaddr(ifp); /* XXX Is this structure */ while (ifa) { if (ifa->ifa_addr->sa_family != AF_LINK) { ifa = CK_STAILQ_NEXT(ifa, ifa_link); continue; } rc = bxe_set_mac_one(sc, (uint8_t *)LLADDR((struct sockaddr_dl *)ifa->ifa_addr), mac_obj, TRUE, ECORE_UC_LIST_MAC, &ramrod_flags); if (rc == -EEXIST) { BLOGD(sc, DBG_SP, "Failed to schedule ADD operations (EEXIST)\n"); /* do not treat adding same MAC as an error */ rc = 0; } else if (rc < 0) { BLOGE(sc, "Failed to schedule ADD operations (%d)\n", rc); #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif return (rc); } ifa = CK_STAILQ_NEXT(ifa, ifa_link); } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_addr_runlock(ifp); #endif /* Execute the pending commands */ bit_set(&ramrod_flags, RAMROD_CONT); return (bxe_set_mac_one(sc, NULL, mac_obj, FALSE /* don't care */, ECORE_UC_LIST_MAC, &ramrod_flags)); } static void bxe_set_rx_mode(struct bxe_softc *sc) { if_t ifp = sc->ifp; uint32_t rx_mode = BXE_RX_MODE_NORMAL; if (sc->state != BXE_STATE_OPEN) { BLOGD(sc, DBG_SP, "state is %x, returning\n", sc->state); return; } BLOGD(sc, DBG_SP, "if_flags(ifp)=0x%x\n", if_getflags(sc->ifp)); if (if_getflags(ifp) & IFF_PROMISC) { rx_mode = BXE_RX_MODE_PROMISC; } else if ((if_getflags(ifp) & IFF_ALLMULTI) || ((if_getamcount(ifp) > BXE_MAX_MULTICAST) && CHIP_IS_E1(sc))) { rx_mode = BXE_RX_MODE_ALLMULTI; } else { if (IS_PF(sc)) { /* some multicasts */ if (bxe_set_mc_list(sc) < 0) { rx_mode = BXE_RX_MODE_ALLMULTI; } if (bxe_set_uc_list(sc) < 0) { rx_mode = BXE_RX_MODE_PROMISC; } } } sc->rx_mode = rx_mode; /* schedule the rx_mode command */ if (bxe_test_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state)) { BLOGD(sc, DBG_LOAD, "Scheduled setting rx_mode with ECORE...\n"); bxe_set_bit(ECORE_FILTER_RX_MODE_SCHED, &sc->sp_state); return; } if (IS_PF(sc)) { bxe_set_storm_rx_mode(sc); } } /* update flags in shmem */ static void bxe_update_drv_flags(struct bxe_softc *sc, uint32_t flags, uint32_t set) { uint32_t drv_flags; if (SHMEM2_HAS(sc, drv_flags)) { bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); drv_flags = SHMEM2_RD(sc, drv_flags); if (set) { SET_FLAGS(drv_flags, flags); } else { RESET_FLAGS(drv_flags, flags); } SHMEM2_WR(sc, drv_flags, drv_flags); BLOGD(sc, DBG_LOAD, "drv_flags 0x%08x\n", drv_flags); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_DRV_FLAGS); } } /* periodic timer callout routine, only runs when the interface is up */ static void bxe_periodic_callout_func(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; int i; if (!BXE_CORE_TRYLOCK(sc)) { /* just bail and try again next time */ if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } return; } if ((sc->state != BXE_STATE_OPEN) || (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) { BLOGW(sc, "periodic callout exit (state=0x%x)\n", sc->state); BXE_CORE_UNLOCK(sc); return; } /* Check for TX timeouts on any fastpath. */ FOR_EACH_QUEUE(sc, i) { if (bxe_watchdog(sc, &sc->fp[i]) != 0) { /* Ruh-Roh, chip was reset! */ break; } } if (!CHIP_REV_IS_SLOW(sc)) { /* * This barrier is needed to ensure the ordering between the writing * to the sc->port.pmf in the bxe_nic_load() or bxe_pmf_update() and * the reading here. */ mb(); if (sc->port.pmf) { bxe_acquire_phy_lock(sc); elink_period_func(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); } } if (IS_PF(sc) && !(sc->flags & BXE_NO_PULSE)) { int mb_idx = SC_FW_MB_IDX(sc); uint32_t drv_pulse; uint32_t mcp_pulse; ++sc->fw_drv_pulse_wr_seq; sc->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK; drv_pulse = sc->fw_drv_pulse_wr_seq; bxe_drv_pulse(sc); mcp_pulse = (SHMEM_RD(sc, func_mb[mb_idx].mcp_pulse_mb) & MCP_PULSE_SEQ_MASK); /* * The delta between driver pulse and mcp response should * be 1 (before mcp response) or 0 (after mcp response). */ if ((drv_pulse != mcp_pulse) && (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) { /* someone lost a heartbeat... */ BLOGE(sc, "drv_pulse (0x%x) != mcp_pulse (0x%x)\n", drv_pulse, mcp_pulse); } } /* state is BXE_STATE_OPEN */ bxe_stats_handle(sc, STATS_EVENT_UPDATE); BXE_CORE_UNLOCK(sc); if ((sc->state == BXE_STATE_OPEN) && (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_GO)) { /* schedule the next periodic callout */ callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } } static void bxe_periodic_start(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_GO); callout_reset(&sc->periodic_callout, hz, bxe_periodic_callout_func, sc); } static void bxe_periodic_stop(struct bxe_softc *sc) { atomic_store_rel_long(&sc->periodic_flags, PERIODIC_STOP); callout_drain(&sc->periodic_callout); } /* start the controller */ static __noinline int bxe_nic_load(struct bxe_softc *sc, int load_mode) { uint32_t val; int load_code = 0; int i, rc = 0; BXE_CORE_LOCK_ASSERT(sc); BLOGD(sc, DBG_LOAD, "Starting NIC load...\n"); sc->state = BXE_STATE_OPENING_WAITING_LOAD; if (IS_PF(sc)) { /* must be called before memory allocation and HW init */ bxe_ilt_set_info(sc); } sc->last_reported_link_state = LINK_STATE_UNKNOWN; bxe_set_fp_rx_buf_size(sc); if (bxe_alloc_fp_buffers(sc) != 0) { BLOGE(sc, "Failed to allocate fastpath memory\n"); sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (bxe_alloc_fw_stats_mem(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENOMEM; goto bxe_nic_load_error0; } if (IS_PF(sc)) { /* set pf load just before approaching the MCP */ bxe_set_pf_load(sc); /* if MCP exists send load request and analyze response */ if (!BXE_NOMCP(sc)) { /* attempt to load pf */ if (bxe_nic_load_request(sc, &load_code) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error1; } /* what did the MCP say? */ if (bxe_nic_load_analyze_req(sc, load_code) != 0) { bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } else { BLOGI(sc, "Device has no MCP!\n"); load_code = bxe_nic_load_no_mcp(sc); } /* mark PMF if applicable */ bxe_nic_load_pmf(sc, load_code); /* Init Function state controlling object */ bxe_init_func_obj(sc); /* Initialize HW */ if (bxe_init_hw(sc, load_code) != 0) { BLOGE(sc, "HW init failed\n"); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } } /* set ALWAYS_ALIVE bit in shmem */ sc->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bxe_drv_pulse(sc); sc->flags |= BXE_NO_PULSE; /* attach interrupts */ if (bxe_interrupt_attach(sc) != 0) { sc->state = BXE_STATE_CLOSED; rc = ENXIO; goto bxe_nic_load_error2; } bxe_nic_init(sc, load_code); /* Init per-function objects */ if (IS_PF(sc)) { bxe_init_objs(sc); // XXX bxe_iov_nic_init(sc); /* set AFEX default VLAN tag to an invalid value */ sc->devinfo.mf_info.afex_def_vlan_tag = -1; // XXX bxe_nic_load_afex_dcc(sc, load_code); sc->state = BXE_STATE_OPENING_WAITING_PORT; rc = bxe_func_start(sc); if (rc) { BLOGE(sc, "Function start failed! rc = %d\n", rc); bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } /* send LOAD_DONE command to MCP */ if (!BXE_NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0); if (!load_code) { BLOGE(sc, "MCP response failure, aborting\n"); sc->state = BXE_STATE_ERROR; rc = ENXIO; goto bxe_nic_load_error3; } } rc = bxe_setup_leading(sc); if (rc) { BLOGE(sc, "Setup leading failed! rc = %d\n", rc); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } FOR_EACH_NONDEFAULT_ETH_QUEUE(sc, i) { rc = bxe_setup_queue(sc, &sc->fp[i], FALSE); if (rc) { BLOGE(sc, "Queue(%d) setup failed rc = %d\n", i, rc); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } rc = bxe_init_rss_pf(sc); if (rc) { BLOGE(sc, "PF RSS init failed\n"); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } /* XXX VF */ /* now when Clients are configured we are ready to work */ sc->state = BXE_STATE_OPEN; /* Configure a ucast MAC */ if (IS_PF(sc)) { rc = bxe_set_eth_mac(sc, TRUE); } if (rc) { BLOGE(sc, "Setting Ethernet MAC failed rc = %d\n", rc); sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } if (sc->port.pmf) { rc = bxe_initial_phy_init(sc, /* XXX load_mode */LOAD_OPEN); if (rc) { sc->state = BXE_STATE_ERROR; goto bxe_nic_load_error3; } } sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_BOOT_FROM_SAN; /* start fast path */ /* Initialize Rx filter */ bxe_set_rx_mode(sc); /* start the Tx */ switch (/* XXX load_mode */LOAD_OPEN) { case LOAD_NORMAL: case LOAD_OPEN: break; case LOAD_DIAG: case LOAD_LOOPBACK_EXT: sc->state = BXE_STATE_DIAG; break; default: break; } if (sc->port.pmf) { bxe_update_drv_flags(sc, 1 << DRV_FLAGS_PORT_MASK, 0); } else { bxe_link_status_update(sc); } /* start the periodic timer callout */ bxe_periodic_start(sc); if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) { /* mark driver is loaded in shmem2 */ val = SHMEM2_RD(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)]); SHMEM2_WR(sc, drv_capabilities_flag[SC_FW_MB_IDX(sc)], (val | DRV_FLAGS_CAPABILITIES_LOADED_SUPPORTED | DRV_FLAGS_CAPABILITIES_LOADED_L2)); } /* wait for all pending SP commands to complete */ if (IS_PF(sc) && !bxe_wait_sp_comp(sc, ~0x0UL)) { BLOGE(sc, "Timeout waiting for all SPs to complete!\n"); bxe_periodic_stop(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, FALSE); return (ENXIO); } /* Tell the stack the driver is running! */ if_setdrvflags(sc->ifp, IFF_DRV_RUNNING); BLOGD(sc, DBG_LOAD, "NIC successfully loaded\n"); return (0); bxe_nic_load_error3: if (IS_PF(sc)) { bxe_int_disable_sync(sc, 1); /* clean out queued objects */ bxe_squeeze_objects(sc); } bxe_interrupt_detach(sc); bxe_nic_load_error2: if (IS_PF(sc) && !BXE_NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, 0); } sc->port.pmf = 0; bxe_nic_load_error1: /* clear pf_load status, as it was already set */ if (IS_PF(sc)) { bxe_clear_pf_load(sc); } bxe_nic_load_error0: bxe_free_fw_stats_mem(sc); bxe_free_fp_buffers(sc); bxe_free_mem(sc); return (rc); } static int bxe_init_locked(struct bxe_softc *sc) { int other_engine = SC_PATH(sc) ? 0 : 1; uint8_t other_load_status, load_status; uint8_t global = FALSE; int rc; BXE_CORE_LOCK_ASSERT(sc); /* check if the driver is already running */ if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) { BLOGD(sc, DBG_LOAD, "Init called while driver is running!\n"); return (0); } bxe_set_power_state(sc, PCI_PM_D0); /* * If parity occurred during the unload, then attentions and/or * RECOVERY_IN_PROGRES may still be set. If so we want the first function * loaded on the current engine to complete the recovery. Parity recovery * is only relevant for PF driver. */ if (IS_PF(sc)) { other_load_status = bxe_get_load_status(sc, other_engine); load_status = bxe_get_load_status(sc, SC_PATH(sc)); if (!bxe_reset_is_done(sc, SC_PATH(sc)) || bxe_chk_parity_attn(sc, &global, TRUE)) { do { /* * If there are attentions and they are in global blocks, set * the GLOBAL_RESET bit regardless whether it will be this * function that will complete the recovery or not. */ if (global) { bxe_set_reset_global(sc); } /* * Only the first function on the current engine should try * to recover in open. In case of attentions in global blocks * only the first in the chip should try to recover. */ if ((!load_status && (!global || !other_load_status)) && bxe_trylock_leader_lock(sc) && !bxe_leader_reset(sc)) { BLOGI(sc, "Recovered during init\n"); break; } /* recovery has failed... */ bxe_set_power_state(sc, PCI_PM_D3hot); sc->recovery_state = BXE_RECOVERY_FAILED; BLOGE(sc, "Recovery flow hasn't properly " "completed yet, try again later. " "If you still see this message after a " "few retries then power cycle is required.\n"); rc = ENXIO; goto bxe_init_locked_done; } while (0); } } sc->recovery_state = BXE_RECOVERY_DONE; rc = bxe_nic_load(sc, LOAD_OPEN); bxe_init_locked_done: if (rc) { /* Tell the stack the driver is NOT running! */ BLOGE(sc, "Initialization failed, " "stack notified driver is NOT running!\n"); if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); } return (rc); } static int bxe_stop_locked(struct bxe_softc *sc) { BXE_CORE_LOCK_ASSERT(sc); return (bxe_nic_unload(sc, UNLOAD_NORMAL, TRUE)); } /* * Handles controller initialization when called from an unlocked routine. * ifconfig calls this function. * * Returns: * void */ static void bxe_init(void *xsc) { struct bxe_softc *sc = (struct bxe_softc *)xsc; BXE_CORE_LOCK(sc); bxe_init_locked(sc); BXE_CORE_UNLOCK(sc); } static int bxe_init_ifnet(struct bxe_softc *sc) { if_t ifp; int capabilities; /* ifconfig entrypoint for media type/status reporting */ ifmedia_init(&sc->ifmedia, IFM_IMASK, bxe_ifmedia_update, bxe_ifmedia_status); /* set the default interface values */ ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_FDX | sc->media), 0, NULL); ifmedia_add(&sc->ifmedia, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&sc->ifmedia, (IFM_ETHER | IFM_AUTO)); sc->ifmedia.ifm_media = sc->ifmedia.ifm_cur->ifm_media; /* XXX ? */ BLOGI(sc, "IFMEDIA flags : %x\n", sc->ifmedia.ifm_media); /* allocate the ifnet structure */ if ((ifp = if_gethandle(IFT_ETHER)) == NULL) { BLOGE(sc, "Interface allocation failed!\n"); return (ENXIO); } if_setsoftc(ifp, sc); if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); if_setflags(ifp, (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST)); if_setioctlfn(ifp, bxe_ioctl); if_setstartfn(ifp, bxe_tx_start); if_setgetcounterfn(ifp, bxe_get_counter); #if __FreeBSD_version >= 901504 if_settransmitfn(ifp, bxe_tx_mq_start); if_setqflushfn(ifp, bxe_mq_flush); #endif #ifdef FreeBSD8_0 if_settimer(ifp, 0); #endif if_setinitfn(ifp, bxe_init); if_setmtu(ifp, sc->mtu); if_sethwassist(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)); capabilities = #if __FreeBSD_version < 700000 (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO); #else (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWCSUM | IFCAP_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_LRO | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_WOL_MAGIC); #endif if_setcapabilitiesbit(ifp, capabilities, 0); /* XXX */ if_setcapenable(ifp, if_getcapabilities(ifp)); if_setbaudrate(ifp, IF_Gbps(10)); /* XXX */ if_setsendqlen(ifp, sc->tx_ring_size); if_setsendqready(ifp); /* XXX */ sc->ifp = ifp; /* attach to the Ethernet interface list */ ether_ifattach(ifp, sc->link_params.mac_addr); /* Attach driver netdump methods. */ NETDUMP_SET(ifp, bxe); return (0); } static void bxe_deallocate_bars(struct bxe_softc *sc) { int i; for (i = 0; i < MAX_BARS; i++) { if (sc->bar[i].resource != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->bar[i].rid, sc->bar[i].resource); BLOGD(sc, DBG_LOAD, "Released PCI BAR%d [%02x] memory\n", i, PCIR_BAR(i)); } } } static int bxe_allocate_bars(struct bxe_softc *sc) { u_int flags; int i; memset(sc->bar, 0, sizeof(sc->bar)); for (i = 0; i < MAX_BARS; i++) { /* memory resources reside at BARs 0, 2, 4 */ /* Run `pciconf -lb` to see mappings */ if ((i != 0) && (i != 2) && (i != 4)) { continue; } sc->bar[i].rid = PCIR_BAR(i); flags = RF_ACTIVE; if (i == 0) { flags |= RF_SHAREABLE; } if ((sc->bar[i].resource = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->bar[i].rid, flags)) == NULL) { return (0); } sc->bar[i].tag = rman_get_bustag(sc->bar[i].resource); sc->bar[i].handle = rman_get_bushandle(sc->bar[i].resource); sc->bar[i].kva = (vm_offset_t)rman_get_virtual(sc->bar[i].resource); - BLOGI(sc, "PCI BAR%d [%02x] memory allocated: %p-%p (%jd) -> %p\n", + BLOGI(sc, "PCI BAR%d [%02x] memory allocated: %#jx-%#jx (%jd) -> %#jx\n", i, PCIR_BAR(i), - (void *)rman_get_start(sc->bar[i].resource), - (void *)rman_get_end(sc->bar[i].resource), + rman_get_start(sc->bar[i].resource), + rman_get_end(sc->bar[i].resource), rman_get_size(sc->bar[i].resource), - (void *)sc->bar[i].kva); + (uintmax_t)sc->bar[i].kva); } return (0); } static void bxe_get_function_num(struct bxe_softc *sc) { uint32_t val = 0; /* * Read the ME register to get the function number. The ME register * holds the relative-function number and absolute-function number. The * absolute-function number appears only in E2 and above. Before that * these bits always contained zero, therefore we cannot blindly use them. */ val = REG_RD(sc, BAR_ME_REGISTER); sc->pfunc_rel = (uint8_t)((val & ME_REG_PF_NUM) >> ME_REG_PF_NUM_SHIFT); sc->path_id = (uint8_t)((val & ME_REG_ABS_PF_NUM) >> ME_REG_ABS_PF_NUM_SHIFT) & 1; if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { sc->pfunc_abs = ((sc->pfunc_rel << 1) | sc->path_id); } else { sc->pfunc_abs = (sc->pfunc_rel | sc->path_id); } BLOGD(sc, DBG_LOAD, "Relative function %d, Absolute function %d, Path %d\n", sc->pfunc_rel, sc->pfunc_abs, sc->path_id); } static uint32_t bxe_get_shmem_mf_cfg_base(struct bxe_softc *sc) { uint32_t shmem2_size; uint32_t offset; uint32_t mf_cfg_offset_value; /* Non 57712 */ offset = (SHMEM_RD(sc, func_mb) + (MAX_FUNC_NUM * sizeof(struct drv_func_mb))); /* 57712 plus */ if (sc->devinfo.shmem2_base != 0) { shmem2_size = SHMEM2_RD(sc, size); if (shmem2_size > offsetof(struct shmem2_region, mf_cfg_addr)) { mf_cfg_offset_value = SHMEM2_RD(sc, mf_cfg_addr); if (SHMEM_MF_CFG_ADDR_NONE != mf_cfg_offset_value) { offset = mf_cfg_offset_value; } } } return (offset); } static uint32_t bxe_pcie_capability_read(struct bxe_softc *sc, int reg, int width) { int pcie_reg; /* ensure PCIe capability is enabled */ if (pci_find_cap(sc->dev, PCIY_EXPRESS, &pcie_reg) == 0) { if (pcie_reg != 0) { BLOGD(sc, DBG_LOAD, "PCIe capability at 0x%04x\n", pcie_reg); return (pci_read_config(sc->dev, (pcie_reg + reg), width)); } } BLOGE(sc, "PCIe capability NOT FOUND!!!\n"); return (0); } static uint8_t bxe_is_pcie_pending(struct bxe_softc *sc) { return (bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_STA, 2) & PCIM_EXP_STA_TRANSACTION_PND); } /* * Walk the PCI capabiites list for the device to find what features are * supported. These capabilites may be enabled/disabled by firmware so it's * best to walk the list rather than make assumptions. */ static void bxe_probe_pci_caps(struct bxe_softc *sc) { uint16_t link_status; int reg; /* check if PCI Power Management is enabled */ if (pci_find_cap(sc->dev, PCIY_PMG, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found PM capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_PM_CAPABLE_FLAG; sc->devinfo.pcie_pm_cap_reg = (uint16_t)reg; } } link_status = bxe_pcie_capability_read(sc, PCIR_EXPRESS_LINK_STA, 2); /* handle PCIe 2.0 workarounds for 57710 */ if (CHIP_IS_E1(sc)) { /* workaround for 57710 errata E4_57710_27462 */ sc->devinfo.pcie_link_speed = (REG_RD(sc, 0x3d04) & (1 << 24)) ? 2 : 1; /* workaround for 57710 errata E4_57710_27488 */ sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); if (sc->devinfo.pcie_link_speed > 1) { sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4) >> 1; } } else { sc->devinfo.pcie_link_speed = (link_status & PCIM_LINK_STA_SPEED); sc->devinfo.pcie_link_width = ((link_status & PCIM_LINK_STA_WIDTH) >> 4); } BLOGD(sc, DBG_LOAD, "PCIe link speed=%d width=%d\n", sc->devinfo.pcie_link_speed, sc->devinfo.pcie_link_width); sc->devinfo.pcie_cap_flags |= BXE_PCIE_CAPABLE_FLAG; sc->devinfo.pcie_pcie_cap_reg = (uint16_t)reg; /* check if MSI capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSI, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSI_CAPABLE_FLAG; sc->devinfo.pcie_msi_cap_reg = (uint16_t)reg; } } /* check if MSI-X capability is enabled */ if (pci_find_cap(sc->dev, PCIY_MSIX, ®) == 0) { if (reg != 0) { BLOGD(sc, DBG_LOAD, "Found MSI-X capability at 0x%04x\n", reg); sc->devinfo.pcie_cap_flags |= BXE_MSIX_CAPABLE_FLAG; sc->devinfo.pcie_msix_cap_reg = (uint16_t)reg; } } } static int bxe_get_shmem_mf_cfg_info_sd(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* get the outer vlan if we're in switch-dependent mode */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); mf_info->ext_id = (uint16_t)val; mf_info->multi_vnics_mode = 1; if (!VALID_OVLAN(mf_info->ext_id)) { BLOGE(sc, "Invalid VLAN (%d)\n", mf_info->ext_id); return (1); } /* get the capabilities */ if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_ISCSI) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ISCSI; } else if ((mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_PROTOCOL_MASK) == FUNC_MF_CFG_PROTOCOL_FCOE) { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_FCOE; } else { mf_info->mf_protos_supported |= MF_PROTO_SUPPORT_ETHERNET; } mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static uint32_t bxe_get_shmem_ext_proto_support_flags(struct bxe_softc *sc) { uint32_t retval = 0; uint32_t val; val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); if (val & MACP_FUNC_CFG_FLAGS_ENABLED) { if (val & MACP_FUNC_CFG_FLAGS_ETHERNET) { retval |= MF_PROTO_SUPPORT_ETHERNET; } if (val & MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD) { retval |= MF_PROTO_SUPPORT_ISCSI; } if (val & MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD) { retval |= MF_PROTO_SUPPORT_FCOE; } } return (retval); } static int bxe_get_shmem_mf_cfg_info_si(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val; /* * There is no outer vlan if we're in switch-independent mode. * If the mac is valid then assume multi-function. */ val = MFCFG_RD(sc, func_ext_config[SC_ABS_FUNC(sc)].func_cfg); mf_info->multi_vnics_mode = ((val & MACP_FUNC_CFG_FLAGS_MASK) != 0); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_get_shmem_mf_cfg_info_niv(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t e1hov_tag; uint32_t func_config; uint32_t niv_config; mf_info->multi_vnics_mode = 1; e1hov_tag = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); func_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); niv_config = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].afex_config); mf_info->ext_id = (uint16_t)((e1hov_tag & FUNC_MF_CFG_E1HOV_TAG_MASK) >> FUNC_MF_CFG_E1HOV_TAG_SHIFT); mf_info->default_vlan = (uint16_t)((e1hov_tag & FUNC_MF_CFG_AFEX_VLAN_MASK) >> FUNC_MF_CFG_AFEX_VLAN_SHIFT); mf_info->niv_allowed_priorities = (uint8_t)((niv_config & FUNC_MF_CFG_AFEX_COS_FILTER_MASK) >> FUNC_MF_CFG_AFEX_COS_FILTER_SHIFT); mf_info->niv_default_cos = (uint8_t)((func_config & FUNC_MF_CFG_TRANSMIT_PRIORITY_MASK) >> FUNC_MF_CFG_TRANSMIT_PRIORITY_SHIFT); mf_info->afex_vlan_mode = ((niv_config & FUNC_MF_CFG_AFEX_VLAN_MODE_MASK) >> FUNC_MF_CFG_AFEX_VLAN_MODE_SHIFT); mf_info->niv_mba_enabled = ((niv_config & FUNC_MF_CFG_AFEX_MBA_ENABLED_MASK) >> FUNC_MF_CFG_AFEX_MBA_ENABLED_SHIFT); mf_info->mf_protos_supported = bxe_get_shmem_ext_proto_support_flags(sc); mf_info->vnics_per_port = (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) ? 2 : 4; return (0); } static int bxe_check_valid_mf_cfg(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t mf_cfg1; uint32_t mf_cfg2; uint32_t ovlan1; uint32_t ovlan2; uint8_t i, j; BLOGD(sc, DBG_LOAD, "MF config parameters for function %d\n", SC_PORT(sc)); BLOGD(sc, DBG_LOAD, "\tmf_config=0x%x\n", mf_info->mf_config[SC_VN(sc)]); BLOGD(sc, DBG_LOAD, "\tmulti_vnics_mode=%d\n", mf_info->multi_vnics_mode); BLOGD(sc, DBG_LOAD, "\tvnics_per_port=%d\n", mf_info->vnics_per_port); BLOGD(sc, DBG_LOAD, "\tovlan/vifid=%d\n", mf_info->ext_id); BLOGD(sc, DBG_LOAD, "\tmin_bw=%d/%d/%d/%d\n", mf_info->min_bw[0], mf_info->min_bw[1], mf_info->min_bw[2], mf_info->min_bw[3]); BLOGD(sc, DBG_LOAD, "\tmax_bw=%d/%d/%d/%d\n", mf_info->max_bw[0], mf_info->max_bw[1], mf_info->max_bw[2], mf_info->max_bw[3]); BLOGD(sc, DBG_LOAD, "\tmac_addr: %s\n", sc->mac_addr_str); /* various MF mode sanity checks... */ if (mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_HIDE) { BLOGE(sc, "Enumerated function %d is marked as hidden\n", SC_PORT(sc)); return (1); } if ((mf_info->vnics_per_port > 1) && !mf_info->multi_vnics_mode) { BLOGE(sc, "vnics_per_port=%d multi_vnics_mode=%d\n", mf_info->vnics_per_port, mf_info->multi_vnics_mode); return (1); } if (mf_info->mf_mode == MULTI_FUNCTION_SD) { /* vnic id > 0 must have valid ovlan in switch-dependent mode */ if ((SC_VN(sc) > 0) && !VALID_OVLAN(OVLAN(sc))) { BLOGE(sc, "mf_mode=SD vnic_id=%d ovlan=%d\n", SC_VN(sc), OVLAN(sc)); return (1); } if (!VALID_OVLAN(OVLAN(sc)) && mf_info->multi_vnics_mode) { BLOGE(sc, "mf_mode=SD multi_vnics_mode=%d ovlan=%d\n", mf_info->multi_vnics_mode, OVLAN(sc)); return (1); } /* * Verify all functions are either MF or SF mode. If MF, make sure * sure that all non-hidden functions have a valid ovlan. If SF, * make sure that all non-hidden functions have an invalid ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && (((mf_info->multi_vnics_mode) && !VALID_OVLAN(ovlan1)) || ((!mf_info->multi_vnics_mode) && VALID_OVLAN(ovlan1)))) { BLOGE(sc, "mf_mode=SD function %d MF config " "mismatch, multi_vnics_mode=%d ovlan=%d\n", i, mf_info->multi_vnics_mode, ovlan1); return (1); } } /* Verify all funcs on the same port each have a different ovlan. */ FOREACH_ABS_FUNC_IN_PORT(sc, i) { mf_cfg1 = MFCFG_RD(sc, func_mf_config[i].config); ovlan1 = MFCFG_RD(sc, func_mf_config[i].e1hov_tag); /* iterate from the next function on the port to the max func */ for (j = i + 2; j < MAX_FUNC_NUM; j += 2) { mf_cfg2 = MFCFG_RD(sc, func_mf_config[j].config); ovlan2 = MFCFG_RD(sc, func_mf_config[j].e1hov_tag); if (!(mf_cfg1 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan1) && !(mf_cfg2 & FUNC_MF_CFG_FUNC_HIDE) && VALID_OVLAN(ovlan2) && (ovlan1 == ovlan2)) { BLOGE(sc, "mf_mode=SD functions %d and %d " "have the same ovlan (%d)\n", i, j, ovlan1); return (1); } } } } /* MULTI_FUNCTION_SD */ return (0); } static int bxe_get_mf_cfg_info(struct bxe_softc *sc) { struct bxe_mf_info *mf_info = &sc->devinfo.mf_info; uint32_t val, mac_upper; uint8_t i, vnic; /* initialize mf_info defaults */ mf_info->vnics_per_port = 1; mf_info->multi_vnics_mode = FALSE; mf_info->path_has_ovlan = FALSE; mf_info->mf_mode = SINGLE_FUNCTION; if (!CHIP_IS_MF_CAP(sc)) { return (0); } if (sc->devinfo.mf_cfg_base == SHMEM_MF_CFG_ADDR_NONE) { BLOGE(sc, "Invalid mf_cfg_base!\n"); return (1); } /* get the MF mode (switch dependent / independent / single-function) */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); switch (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK) { case SHARED_FEAT_CFG_FORCE_SF_MODE_SWITCH_INDEPT: mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); /* check for legal upper mac bytes */ if (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SI; } else { BLOGE(sc, "Invalid config for Switch Independent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_MF_ALLOWED: case SHARED_FEAT_CFG_FORCE_SF_MODE_SPIO4: /* get outer vlan configuration */ val = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].e1hov_tag); if ((val & FUNC_MF_CFG_E1HOV_TAG_MASK) != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) { mf_info->mf_mode = MULTI_FUNCTION_SD; } else { BLOGE(sc, "Invalid config for Switch Dependent mode\n"); } break; case SHARED_FEAT_CFG_FORCE_SF_MODE_FORCED_SF: /* not in MF mode, vnics_per_port=1 and multi_vnics_mode=FALSE */ return (0); case SHARED_FEAT_CFG_FORCE_SF_MODE_AFEX_MODE: /* * Mark MF mode as NIV if MCP version includes NPAR-SD support * and the MAC address is valid. */ mac_upper = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); if ((SHMEM2_HAS(sc, afex_driver_support)) && (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT)) { mf_info->mf_mode = MULTI_FUNCTION_AFEX; } else { BLOGE(sc, "Invalid config for AFEX mode\n"); } break; default: BLOGE(sc, "Unknown MF mode (0x%08x)\n", (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK)); return (1); } /* set path mf_mode (which could be different than function mf_mode) */ if (mf_info->mf_mode == MULTI_FUNCTION_SD) { mf_info->path_has_ovlan = TRUE; } else if (mf_info->mf_mode == SINGLE_FUNCTION) { /* * Decide on path multi vnics mode. If we're not in MF mode and in * 4-port mode, this is good enough to check vnic-0 of the other port * on the same path */ if (CHIP_PORT_MODE(sc) == CHIP_4_PORT_MODE) { uint8_t other_port = !(PORT_ID(sc) & 1); uint8_t abs_func_other_port = (SC_PATH(sc) + (2 * other_port)); val = MFCFG_RD(sc, func_mf_config[abs_func_other_port].e1hov_tag); mf_info->path_has_ovlan = VALID_OVLAN((uint16_t)val) ? 1 : 0; } } if (mf_info->mf_mode == SINGLE_FUNCTION) { /* invalid MF config */ if (SC_VN(sc) >= 1) { BLOGE(sc, "VNIC ID >= 1 in SF mode\n"); return (1); } return (0); } /* get the MF configuration */ mf_info->mf_config[SC_VN(sc)] = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].config); switch(mf_info->mf_mode) { case MULTI_FUNCTION_SD: bxe_get_shmem_mf_cfg_info_sd(sc); break; case MULTI_FUNCTION_SI: bxe_get_shmem_mf_cfg_info_si(sc); break; case MULTI_FUNCTION_AFEX: bxe_get_shmem_mf_cfg_info_niv(sc); break; default: BLOGE(sc, "Get MF config failed (mf_mode=0x%08x)\n", mf_info->mf_mode); return (1); } /* get the congestion management parameters */ vnic = 0; FOREACH_ABS_FUNC_IN_PORT(sc, i) { /* get min/max bw */ val = MFCFG_RD(sc, func_mf_config[i].config); mf_info->min_bw[vnic] = ((val & FUNC_MF_CFG_MIN_BW_MASK) >> FUNC_MF_CFG_MIN_BW_SHIFT); mf_info->max_bw[vnic] = ((val & FUNC_MF_CFG_MAX_BW_MASK) >> FUNC_MF_CFG_MAX_BW_SHIFT); vnic++; } return (bxe_check_valid_mf_cfg(sc)); } static int bxe_get_shmem_info(struct bxe_softc *sc) { int port; uint32_t mac_hi, mac_lo, val; port = SC_PORT(sc); mac_hi = mac_lo = 0; sc->link_params.sc = sc; sc->link_params.port = port; /* get the hardware config info */ sc->devinfo.hw_config = SHMEM_RD(sc, dev_info.shared_hw_config.config); sc->devinfo.hw_config2 = SHMEM_RD(sc, dev_info.shared_hw_config.config2); sc->link_params.hw_led_mode = ((sc->devinfo.hw_config & SHARED_HW_CFG_LED_MODE_MASK) >> SHARED_HW_CFG_LED_MODE_SHIFT); /* get the port feature config */ sc->port.config = SHMEM_RD(sc, dev_info.port_feature_config[port].config); /* get the link params */ sc->link_params.speed_cap_mask[0] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask); sc->link_params.speed_cap_mask[1] = SHMEM_RD(sc, dev_info.port_hw_config[port].speed_capability_mask2); /* get the lane config */ sc->link_params.lane_config = SHMEM_RD(sc, dev_info.port_hw_config[port].lane_config); /* get the link config */ val = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config); sc->port.link_config[ELINK_INT_PHY] = val; sc->link_params.switch_cfg = (val & PORT_FEATURE_CONNECTED_SWITCH_MASK); sc->port.link_config[ELINK_EXT_PHY1] = SHMEM_RD(sc, dev_info.port_feature_config[port].link_config2); /* get the override preemphasis flag and enable it or turn it off */ val = SHMEM_RD(sc, dev_info.shared_feature_config.config); if (val & SHARED_FEAT_CFG_OVERRIDE_PREEMPHASIS_CFG_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } else { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED; } /* get the initial value of the link params */ sc->link_params.multi_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].multi_phy_config); /* get external phy info */ sc->port.ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); /* get the multifunction configuration */ bxe_get_mf_cfg_info(sc); /* get the mac address */ if (IS_MF(sc)) { mac_hi = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_upper); mac_lo = MFCFG_RD(sc, func_mf_config[SC_ABS_FUNC(sc)].mac_lower); } else { mac_hi = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_upper); mac_lo = SHMEM_RD(sc, dev_info.port_hw_config[port].mac_lower); } if ((mac_lo == 0) && (mac_hi == 0)) { *sc->mac_addr_str = 0; BLOGE(sc, "No Ethernet address programmed!\n"); } else { sc->link_params.mac_addr[0] = (uint8_t)(mac_hi >> 8); sc->link_params.mac_addr[1] = (uint8_t)(mac_hi); sc->link_params.mac_addr[2] = (uint8_t)(mac_lo >> 24); sc->link_params.mac_addr[3] = (uint8_t)(mac_lo >> 16); sc->link_params.mac_addr[4] = (uint8_t)(mac_lo >> 8); sc->link_params.mac_addr[5] = (uint8_t)(mac_lo); snprintf(sc->mac_addr_str, sizeof(sc->mac_addr_str), "%02x:%02x:%02x:%02x:%02x:%02x", sc->link_params.mac_addr[0], sc->link_params.mac_addr[1], sc->link_params.mac_addr[2], sc->link_params.mac_addr[3], sc->link_params.mac_addr[4], sc->link_params.mac_addr[5]); BLOGD(sc, DBG_LOAD, "Ethernet address: %s\n", sc->mac_addr_str); } return (0); } static void bxe_get_tunable_params(struct bxe_softc *sc) { /* sanity checks */ if ((bxe_interrupt_mode != INTR_MODE_INTX) && (bxe_interrupt_mode != INTR_MODE_MSI) && (bxe_interrupt_mode != INTR_MODE_MSIX)) { BLOGW(sc, "invalid interrupt_mode value (%d)\n", bxe_interrupt_mode); bxe_interrupt_mode = INTR_MODE_MSIX; } if ((bxe_queue_count < 0) || (bxe_queue_count > MAX_RSS_CHAINS)) { BLOGW(sc, "invalid queue_count value (%d)\n", bxe_queue_count); bxe_queue_count = 0; } if ((bxe_max_rx_bufs < 1) || (bxe_max_rx_bufs > RX_BD_USABLE)) { if (bxe_max_rx_bufs == 0) { bxe_max_rx_bufs = RX_BD_USABLE; } else { BLOGW(sc, "invalid max_rx_bufs (%d)\n", bxe_max_rx_bufs); bxe_max_rx_bufs = 2048; } } if ((bxe_hc_rx_ticks < 1) || (bxe_hc_rx_ticks > 100)) { BLOGW(sc, "invalid hc_rx_ticks (%d)\n", bxe_hc_rx_ticks); bxe_hc_rx_ticks = 25; } if ((bxe_hc_tx_ticks < 1) || (bxe_hc_tx_ticks > 100)) { BLOGW(sc, "invalid hc_tx_ticks (%d)\n", bxe_hc_tx_ticks); bxe_hc_tx_ticks = 50; } if (bxe_max_aggregation_size == 0) { bxe_max_aggregation_size = TPA_AGG_SIZE; } if (bxe_max_aggregation_size > 0xffff) { BLOGW(sc, "invalid max_aggregation_size (%d)\n", bxe_max_aggregation_size); bxe_max_aggregation_size = TPA_AGG_SIZE; } if ((bxe_mrrs < -1) || (bxe_mrrs > 3)) { BLOGW(sc, "invalid mrrs (%d)\n", bxe_mrrs); bxe_mrrs = -1; } if ((bxe_autogreeen < 0) || (bxe_autogreeen > 2)) { BLOGW(sc, "invalid autogreeen (%d)\n", bxe_autogreeen); bxe_autogreeen = 0; } if ((bxe_udp_rss < 0) || (bxe_udp_rss > 1)) { BLOGW(sc, "invalid udp_rss (%d)\n", bxe_udp_rss); bxe_udp_rss = 0; } /* pull in user settings */ sc->interrupt_mode = bxe_interrupt_mode; sc->max_rx_bufs = bxe_max_rx_bufs; sc->hc_rx_ticks = bxe_hc_rx_ticks; sc->hc_tx_ticks = bxe_hc_tx_ticks; sc->max_aggregation_size = bxe_max_aggregation_size; sc->mrrs = bxe_mrrs; sc->autogreeen = bxe_autogreeen; sc->udp_rss = bxe_udp_rss; if (bxe_interrupt_mode == INTR_MODE_INTX) { sc->num_queues = 1; } else { /* INTR_MODE_MSI or INTR_MODE_MSIX */ sc->num_queues = min((bxe_queue_count ? bxe_queue_count : mp_ncpus), MAX_RSS_CHAINS); if (sc->num_queues > mp_ncpus) { sc->num_queues = mp_ncpus; } } BLOGD(sc, DBG_LOAD, "User Config: " "debug=0x%lx " "interrupt_mode=%d " "queue_count=%d " "hc_rx_ticks=%d " "hc_tx_ticks=%d " "rx_budget=%d " "max_aggregation_size=%d " "mrrs=%d " "autogreeen=%d " "udp_rss=%d\n", bxe_debug, sc->interrupt_mode, sc->num_queues, sc->hc_rx_ticks, sc->hc_tx_ticks, bxe_rx_budget, sc->max_aggregation_size, sc->mrrs, sc->autogreeen, sc->udp_rss); } static int bxe_media_detect(struct bxe_softc *sc) { int port_type; uint32_t phy_idx = bxe_get_cur_phy_idx(sc); switch (sc->link_params.phy[phy_idx].media_type) { case ELINK_ETH_PHY_SFPP_10G_FIBER: case ELINK_ETH_PHY_XFP_FIBER: BLOGI(sc, "Found 10Gb Fiber media.\n"); sc->media = IFM_10G_SR; port_type = PORT_FIBRE; break; case ELINK_ETH_PHY_SFP_1G_FIBER: BLOGI(sc, "Found 1Gb Fiber media.\n"); sc->media = IFM_1000_SX; port_type = PORT_FIBRE; break; case ELINK_ETH_PHY_KR: case ELINK_ETH_PHY_CX4: BLOGI(sc, "Found 10GBase-CX4 media.\n"); sc->media = IFM_10G_CX4; port_type = PORT_FIBRE; break; case ELINK_ETH_PHY_DA_TWINAX: BLOGI(sc, "Found 10Gb Twinax media.\n"); sc->media = IFM_10G_TWINAX; port_type = PORT_DA; break; case ELINK_ETH_PHY_BASE_T: if (sc->link_params.speed_cap_mask[0] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) { BLOGI(sc, "Found 10GBase-T media.\n"); sc->media = IFM_10G_T; port_type = PORT_TP; } else { BLOGI(sc, "Found 1000Base-T media.\n"); sc->media = IFM_1000_T; port_type = PORT_TP; } break; case ELINK_ETH_PHY_NOT_PRESENT: BLOGI(sc, "Media not present.\n"); sc->media = 0; port_type = PORT_OTHER; break; case ELINK_ETH_PHY_UNSPECIFIED: default: BLOGI(sc, "Unknown media!\n"); sc->media = 0; port_type = PORT_OTHER; break; } return port_type; } #define GET_FIELD(value, fname) \ (((value) & (fname##_MASK)) >> (fname##_SHIFT)) #define IGU_FID(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID) #define IGU_VEC(val) GET_FIELD((val), IGU_REG_MAPPING_MEMORY_VECTOR) static int bxe_get_igu_cam_info(struct bxe_softc *sc) { int pfid = SC_FUNC(sc); int igu_sb_id; uint32_t val; uint8_t fid, igu_sb_cnt = 0; sc->igu_base_sb = 0xff; if (CHIP_INT_MODE_IS_BC(sc)) { int vn = SC_VN(sc); igu_sb_cnt = sc->igu_sb_cnt; sc->igu_base_sb = ((CHIP_IS_MODE_4_PORT(sc) ? pfid : vn) * FP_SB_MAX_E1x); sc->igu_dsb_id = (E1HVN_MAX * FP_SB_MAX_E1x + (CHIP_IS_MODE_4_PORT(sc) ? pfid : vn)); return (0); } /* IGU in normal mode - read CAM */ for (igu_sb_id = 0; igu_sb_id < IGU_REG_MAPPING_MEMORY_SIZE; igu_sb_id++) { val = REG_RD(sc, IGU_REG_MAPPING_MEMORY + igu_sb_id * 4); if (!(val & IGU_REG_MAPPING_MEMORY_VALID)) { continue; } fid = IGU_FID(val); if ((fid & IGU_FID_ENCODE_IS_PF)) { if ((fid & IGU_FID_PF_NUM_MASK) != pfid) { continue; } if (IGU_VEC(val) == 0) { /* default status block */ sc->igu_dsb_id = igu_sb_id; } else { if (sc->igu_base_sb == 0xff) { sc->igu_base_sb = igu_sb_id; } igu_sb_cnt++; } } } /* * Due to new PF resource allocation by MFW T7.4 and above, it's optional * that number of CAM entries will not be equal to the value advertised in * PCI. Driver should use the minimal value of both as the actual status * block count */ sc->igu_sb_cnt = min(sc->igu_sb_cnt, igu_sb_cnt); if (igu_sb_cnt == 0) { BLOGE(sc, "CAM configuration error\n"); return (-1); } return (0); } /* * Gather various information from the device config space, the device itself, * shmem, and the user input. */ static int bxe_get_device_info(struct bxe_softc *sc) { uint32_t val; int rc; /* Get the data for the device */ sc->devinfo.vendor_id = pci_get_vendor(sc->dev); sc->devinfo.device_id = pci_get_device(sc->dev); sc->devinfo.subvendor_id = pci_get_subvendor(sc->dev); sc->devinfo.subdevice_id = pci_get_subdevice(sc->dev); /* get the chip revision (chip metal comes from pci config space) */ sc->devinfo.chip_id = sc->link_params.chip_id = (((REG_RD(sc, MISC_REG_CHIP_NUM) & 0xffff) << 16) | ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12) | (((REG_RD(sc, PCICFG_OFFSET + PCI_ID_VAL3) >> 24) & 0xf) << 4) | ((REG_RD(sc, MISC_REG_BOND_ID) & 0xf) << 0)); /* force 57811 according to MISC register */ if (REG_RD(sc, MISC_REG_CHIP_TYPE) & MISC_REG_CHIP_TYPE_57811_MASK) { if (CHIP_IS_57810(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811 << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } else if (CHIP_IS_57810_MF(sc)) { sc->devinfo.chip_id = ((CHIP_NUM_57811_MF << 16) | (sc->devinfo.chip_id & 0x0000ffff)); } sc->devinfo.chip_id |= 0x1; } BLOGD(sc, DBG_LOAD, "chip_id=0x%08x (num=0x%04x rev=0x%01x metal=0x%02x bond=0x%01x)\n", sc->devinfo.chip_id, ((sc->devinfo.chip_id >> 16) & 0xffff), ((sc->devinfo.chip_id >> 12) & 0xf), ((sc->devinfo.chip_id >> 4) & 0xff), ((sc->devinfo.chip_id >> 0) & 0xf)); val = (REG_RD(sc, 0x2874) & 0x55); if ((sc->devinfo.chip_id & 0x1) || (CHIP_IS_E1(sc) && val) || (CHIP_IS_E1H(sc) && (val == 0x55))) { sc->flags |= BXE_ONE_PORT_FLAG; BLOGD(sc, DBG_LOAD, "single port device\n"); } /* set the doorbell size */ sc->doorbell_size = (1 << BXE_DB_SHIFT); /* determine whether the device is in 2 port or 4 port mode */ sc->devinfo.chip_port_mode = CHIP_PORT_MODE_NONE; /* E1 & E1h*/ if (CHIP_IS_E2E3(sc)) { /* * Read port4mode_en_ovwr[0]: * If 1, four port mode is in port4mode_en_ovwr[1]. * If 0, four port mode is in port4mode_en[0]. */ val = REG_RD(sc, MISC_REG_PORT4MODE_EN_OVWR); if (val & 1) { val = ((val >> 1) & 1); } else { val = REG_RD(sc, MISC_REG_PORT4MODE_EN); } sc->devinfo.chip_port_mode = (val) ? CHIP_4_PORT_MODE : CHIP_2_PORT_MODE; BLOGD(sc, DBG_LOAD, "Port mode = %s\n", (val) ? "4" : "2"); } /* get the function and path info for the device */ bxe_get_function_num(sc); /* get the shared memory base address */ sc->devinfo.shmem_base = sc->link_params.shmem_base = REG_RD(sc, MISC_REG_SHARED_MEM_ADDR); sc->devinfo.shmem2_base = REG_RD(sc, (SC_PATH(sc) ? MISC_REG_GENERIC_CR_1 : MISC_REG_GENERIC_CR_0)); BLOGD(sc, DBG_LOAD, "shmem_base=0x%08x, shmem2_base=0x%08x\n", sc->devinfo.shmem_base, sc->devinfo.shmem2_base); if (!sc->devinfo.shmem_base) { /* this should ONLY prevent upcoming shmem reads */ BLOGI(sc, "MCP not active\n"); sc->flags |= BXE_NO_MCP_FLAG; return (0); } /* make sure the shared memory contents are valid */ val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]); if ((val & (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) != (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) { BLOGE(sc, "Invalid SHMEM validity signature: 0x%08x\n", val); return (0); } BLOGD(sc, DBG_LOAD, "Valid SHMEM validity signature: 0x%08x\n", val); /* get the bootcode version */ sc->devinfo.bc_ver = SHMEM_RD(sc, dev_info.bc_rev); snprintf(sc->devinfo.bc_ver_str, sizeof(sc->devinfo.bc_ver_str), "%d.%d.%d", ((sc->devinfo.bc_ver >> 24) & 0xff), ((sc->devinfo.bc_ver >> 16) & 0xff), ((sc->devinfo.bc_ver >> 8) & 0xff)); BLOGD(sc, DBG_LOAD, "Bootcode version: %s\n", sc->devinfo.bc_ver_str); /* get the bootcode shmem address */ sc->devinfo.mf_cfg_base = bxe_get_shmem_mf_cfg_base(sc); BLOGD(sc, DBG_LOAD, "mf_cfg_base=0x08%x \n", sc->devinfo.mf_cfg_base); /* clean indirect addresses as they're not used */ pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, 0, 4); if (IS_PF(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F0, 0); if (CHIP_IS_E1x(sc)) { REG_WR(sc, PXP2_REG_PGL_ADDR_88_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_8C_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_90_F1, 0); REG_WR(sc, PXP2_REG_PGL_ADDR_94_F1, 0); } /* * Enable internal target-read (in case we are probed after PF * FLR). Must be done prior to any BAR read access. Only for * 57712 and up */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); } } /* get the nvram size */ val = REG_RD(sc, MCP_REG_MCPR_NVM_CFG4); sc->devinfo.flash_size = (NVRAM_1MB_SIZE << (val & MCPR_NVM_CFG4_FLASH_SIZE)); BLOGD(sc, DBG_LOAD, "nvram flash size: %d\n", sc->devinfo.flash_size); /* get PCI capabilites */ bxe_probe_pci_caps(sc); bxe_set_power_state(sc, PCI_PM_D0); /* get various configuration parameters from shmem */ bxe_get_shmem_info(sc); if (sc->devinfo.pcie_msix_cap_reg != 0) { val = pci_read_config(sc->dev, (sc->devinfo.pcie_msix_cap_reg + PCIR_MSIX_CTRL), 2); sc->igu_sb_cnt = (val & PCIM_MSIXCTRL_TABLE_SIZE); } else { sc->igu_sb_cnt = 1; } sc->igu_base_addr = BAR_IGU_INTMEM; /* initialize IGU parameters */ if (CHIP_IS_E1x(sc)) { sc->devinfo.int_block = INT_BLOCK_HC; sc->igu_dsb_id = DEF_SB_IGU_ID; sc->igu_base_sb = 0; } else { sc->devinfo.int_block = INT_BLOCK_IGU; /* do not allow device reset during IGU info preocessing */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { int tout = 5000; BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode\n"); val &= ~(IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN); REG_WR(sc, IGU_REG_BLOCK_CONFIGURATION, val); REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x7f); while (tout && REG_RD(sc, IGU_REG_RESET_MEMORIES)) { tout--; DELAY(1000); } if (REG_RD(sc, IGU_REG_RESET_MEMORIES)) { BLOGD(sc, DBG_LOAD, "FORCING IGU Normal Mode failed!!!\n"); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); return (-1); } } if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) { BLOGD(sc, DBG_LOAD, "IGU Backward Compatible Mode\n"); sc->devinfo.int_block |= INT_BLOCK_MODE_BW_COMP; } else { BLOGD(sc, DBG_LOAD, "IGU Normal Mode\n"); } rc = bxe_get_igu_cam_info(sc); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); if (rc) { return (rc); } } /* * Get base FW non-default (fast path) status block ID. This value is * used to initialize the fw_sb_id saved on the fp/queue structure to * determine the id used by the FW. */ if (CHIP_IS_E1x(sc)) { sc->base_fw_ndsb = ((SC_PORT(sc) * FP_SB_MAX_E1x) + SC_L_ID(sc)); } else { /* * 57712+ - We currently use one FW SB per IGU SB (Rx and Tx of * the same queue are indicated on the same IGU SB). So we prefer * FW and IGU SBs to be the same value. */ sc->base_fw_ndsb = sc->igu_base_sb; } BLOGD(sc, DBG_LOAD, "igu_dsb_id=%d igu_base_sb=%d igu_sb_cnt=%d base_fw_ndsb=%d\n", sc->igu_dsb_id, sc->igu_base_sb, sc->igu_sb_cnt, sc->base_fw_ndsb); elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_settings_supported(struct bxe_softc *sc, uint32_t switch_cfg) { uint32_t cfg_size = 0; uint32_t idx; uint8_t port = SC_PORT(sc); /* aggregation of supported attributes of all external phys */ sc->port.supported[0] = 0; sc->port.supported[1] = 0; switch (sc->link_params.num_phys) { case 1: sc->port.supported[0] = sc->link_params.phy[ELINK_INT_PHY].supported; cfg_size = 1; break; case 2: sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; cfg_size = 1; break; case 3: if (sc->link_params.multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED) { sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } else { sc->port.supported[0] = sc->link_params.phy[ELINK_EXT_PHY1].supported; sc->port.supported[1] = sc->link_params.phy[ELINK_EXT_PHY2].supported; } cfg_size = 2; break; } if (!(sc->port.supported[0] || sc->port.supported[1])) { BLOGE(sc, "Invalid phy config in NVRAM (PHY1=0x%08x PHY2=0x%08x)\n", SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config), SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config2)); return; } if (CHIP_IS_E3(sc)) sc->port.phy_addr = REG_RD(sc, MISC_REG_WC0_CTRL_PHY_ADDR); else { switch (switch_cfg) { case ELINK_SWITCH_CFG_1G: sc->port.phy_addr = REG_RD(sc, NIG_REG_SERDES0_CTRL_PHY_ADDR + port*0x10); break; case ELINK_SWITCH_CFG_10G: sc->port.phy_addr = REG_RD(sc, NIG_REG_XGXS0_CTRL_PHY_ADDR + port*0x18); break; default: BLOGE(sc, "Invalid switch config in link_config=0x%08x\n", sc->port.link_config[0]); return; } } BLOGD(sc, DBG_LOAD, "PHY addr 0x%08x\n", sc->port.phy_addr); /* mask what we support according to speed_cap_mask per configuration */ for (idx = 0; idx < cfg_size; idx++) { if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Half; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_100baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_1000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_2500baseX_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_10000baseT_Full; } if (!(sc->link_params.speed_cap_mask[idx] & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) { sc->port.supported[idx] &= ~ELINK_SUPPORTED_20000baseKR2_Full; } } BLOGD(sc, DBG_LOAD, "PHY supported 0=0x%08x 1=0x%08x\n", sc->port.supported[0], sc->port.supported[1]); ELINK_DEBUG_P2(sc, "PHY supported 0=0x%08x 1=0x%08x\n", sc->port.supported[0], sc->port.supported[1]); } static void bxe_link_settings_requested(struct bxe_softc *sc) { uint32_t link_config; uint32_t idx; uint32_t cfg_size = 0; sc->port.advertising[0] = 0; sc->port.advertising[1] = 0; switch (sc->link_params.num_phys) { case 1: case 2: cfg_size = 1; break; case 3: cfg_size = 2; break; } for (idx = 0; idx < cfg_size; idx++) { sc->link_params.req_duplex[idx] = DUPLEX_FULL; link_config = sc->port.link_config[idx]; switch (link_config & PORT_FEATURE_LINK_SPEED_MASK) { case PORT_FEATURE_LINK_SPEED_AUTO: if (sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] |= sc->port.supported[idx]; if (sc->link_params.phy[ELINK_EXT_PHY1].type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) sc->port.advertising[idx] |= (ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full); } else { /* force 10G, no AN */ sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); continue; } break; case PORT_FEATURE_LINK_SPEED_10M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_10baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_10baseT_Half | ADVERTISED_TP); ELINK_DEBUG_P1(sc, "driver requesting DUPLEX_HALF req_duplex = %x!\n", sc->link_params.req_duplex[idx]); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_FULL: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_100M_HALF: if (sc->port.supported[idx] & ELINK_SUPPORTED_100baseT_Half) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_100; sc->link_params.req_duplex[idx] = DUPLEX_HALF; sc->port.advertising[idx] |= (ADVERTISED_100baseT_Half | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_1G: if (sc->port.supported[idx] & ELINK_SUPPORTED_1000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_1000; sc->port.advertising[idx] |= (ADVERTISED_1000baseT_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_2_5G: if (sc->port.supported[idx] & ELINK_SUPPORTED_2500baseX_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_2500; sc->port.advertising[idx] |= (ADVERTISED_2500baseX_Full | ADVERTISED_TP); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_10G_CX4: if (sc->port.supported[idx] & ELINK_SUPPORTED_10000baseT_Full) { sc->link_params.req_line_speed[idx] = ELINK_SPEED_10000; sc->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); } else { BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); return; } break; case PORT_FEATURE_LINK_SPEED_20G: sc->link_params.req_line_speed[idx] = ELINK_SPEED_20000; break; default: BLOGE(sc, "Invalid NVRAM config link_config=0x%08x " "speed_cap_mask=0x%08x\n", link_config, sc->link_params.speed_cap_mask[idx]); sc->link_params.req_line_speed[idx] = ELINK_SPEED_AUTO_NEG; sc->port.advertising[idx] = sc->port.supported[idx]; break; } sc->link_params.req_flow_ctrl[idx] = (link_config & PORT_FEATURE_FLOW_CONTROL_MASK); if (sc->link_params.req_flow_ctrl[idx] == ELINK_FLOW_CTRL_AUTO) { if (!(sc->port.supported[idx] & ELINK_SUPPORTED_Autoneg)) { sc->link_params.req_flow_ctrl[idx] = ELINK_FLOW_CTRL_NONE; } else { bxe_set_requested_fc(sc); } } BLOGD(sc, DBG_LOAD, "req_line_speed=%d req_duplex=%d " "req_flow_ctrl=0x%x advertising=0x%x\n", sc->link_params.req_line_speed[idx], sc->link_params.req_duplex[idx], sc->link_params.req_flow_ctrl[idx], sc->port.advertising[idx]); ELINK_DEBUG_P3(sc, "req_line_speed=%d req_duplex=%d " "advertising=0x%x\n", sc->link_params.req_line_speed[idx], sc->link_params.req_duplex[idx], sc->port.advertising[idx]); } } static void bxe_get_phy_info(struct bxe_softc *sc) { uint8_t port = SC_PORT(sc); uint32_t config = sc->port.config; uint32_t eee_mode; /* shmem data already read in bxe_get_shmem_info() */ ELINK_DEBUG_P3(sc, "lane_config=0x%08x speed_cap_mask0=0x%08x " "link_config0=0x%08x\n", sc->link_params.lane_config, sc->link_params.speed_cap_mask[0], sc->port.link_config[0]); bxe_link_settings_supported(sc, sc->link_params.switch_cfg); bxe_link_settings_requested(sc); if (sc->autogreeen == AUTO_GREEN_FORCE_ON) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (sc->autogreeen == AUTO_GREEN_FORCE_OFF) { sc->link_params.feature_config_flags &= ~ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } else if (config & PORT_FEAT_CFG_AUTOGREEEN_ENABLED) { sc->link_params.feature_config_flags |= ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED; } /* configure link feature according to nvram value */ eee_mode = (((SHMEM_RD(sc, dev_info.port_feature_config[port].eee_power_mode)) & PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >> PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT); if (eee_mode != PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED) { sc->link_params.eee_mode = (ELINK_EEE_MODE_ADV_LPI | ELINK_EEE_MODE_ENABLE_LPI | ELINK_EEE_MODE_OUTPUT_TIME); } else { sc->link_params.eee_mode = 0; } /* get the media type */ bxe_media_detect(sc); ELINK_DEBUG_P1(sc, "detected media type\n", sc->media); } static void bxe_get_params(struct bxe_softc *sc) { /* get user tunable params */ bxe_get_tunable_params(sc); /* select the RX and TX ring sizes */ sc->tx_ring_size = TX_BD_USABLE; sc->rx_ring_size = RX_BD_USABLE; /* XXX disable WoL */ sc->wol = 0; } static void bxe_set_modes_bitmap(struct bxe_softc *sc) { uint32_t flags = 0; if (CHIP_REV_IS_FPGA(sc)) { SET_FLAGS(flags, MODE_FPGA); } else if (CHIP_REV_IS_EMUL(sc)) { SET_FLAGS(flags, MODE_EMUL); } else { SET_FLAGS(flags, MODE_ASIC); } if (CHIP_IS_MODE_4_PORT(sc)) { SET_FLAGS(flags, MODE_PORT4); } else { SET_FLAGS(flags, MODE_PORT2); } if (CHIP_IS_E2(sc)) { SET_FLAGS(flags, MODE_E2); } else if (CHIP_IS_E3(sc)) { SET_FLAGS(flags, MODE_E3); if (CHIP_REV(sc) == CHIP_REV_Ax) { SET_FLAGS(flags, MODE_E3_A0); } else /*if (CHIP_REV(sc) == CHIP_REV_Bx)*/ { SET_FLAGS(flags, MODE_E3_B0 | MODE_COS3); } } if (IS_MF(sc)) { SET_FLAGS(flags, MODE_MF); switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: SET_FLAGS(flags, MODE_MF_SD); break; case MULTI_FUNCTION_SI: SET_FLAGS(flags, MODE_MF_SI); break; case MULTI_FUNCTION_AFEX: SET_FLAGS(flags, MODE_MF_AFEX); break; } } else { SET_FLAGS(flags, MODE_SF); } #if defined(__LITTLE_ENDIAN) SET_FLAGS(flags, MODE_LITTLE_ENDIAN); #else /* __BIG_ENDIAN */ SET_FLAGS(flags, MODE_BIG_ENDIAN); #endif INIT_MODE_FLAGS(sc) = flags; } static int bxe_alloc_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; bus_addr_t busaddr; int max_agg_queues; int max_segments; bus_size_t max_size; bus_size_t max_seg_size; char buf[32]; int rc; int i, j; /* XXX zero out all vars here and call bxe_alloc_hsi_mem on error */ /* allocate the parent bus DMA tag */ rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BUS_SPACE_MAXSIZE_32BIT, /* max map size */ BUS_SPACE_UNRESTRICTED, /* num discontinuous */ BUS_SPACE_MAXSIZE_32BIT, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &sc->parent_dma_tag); /* returned dma tag */ if (rc != 0) { BLOGE(sc, "Failed to alloc parent DMA tag (%d)!\n", rc); return (1); } /************************/ /* DEFAULT STATUS BLOCK */ /************************/ if (bxe_dma_alloc(sc, sizeof(struct host_sp_status_block), &sc->def_sb_dma, "default status block") != 0) { /* XXX */ bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->def_sb = (struct host_sp_status_block *)sc->def_sb_dma.vaddr; /***************/ /* EVENT QUEUE */ /***************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->eq_dma, "event queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->eq = (union event_ring_elem * )sc->eq_dma.vaddr; /*************/ /* SLOW PATH */ /*************/ if (bxe_dma_alloc(sc, sizeof(struct bxe_slowpath), &sc->sp_dma, "slow path") != 0) { /* XXX */ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->sp = (struct bxe_slowpath *)sc->sp_dma.vaddr; /*******************/ /* SLOW PATH QUEUE */ /*******************/ if (bxe_dma_alloc(sc, BCM_PAGE_SIZE, &sc->spq_dma, "slow path queue") != 0) { /* XXX */ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->spq = (struct eth_spe *)sc->spq_dma.vaddr; /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ if (bxe_dma_alloc(sc, FW_BUF_SIZE, &sc->gz_buf_dma, "fw decompression buffer") != 0) { /* XXX */ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } sc->gz_buf = (void *)sc->gz_buf_dma.vaddr; if ((sc->gz_strm = malloc(sizeof(*sc->gz_strm), M_DEVBUF, M_NOWAIT)) == NULL) { /* XXX */ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); return (1); } /*************/ /* FASTPATHS */ /*************/ /* allocate DMA memory for each fastpath structure */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->sc = sc; fp->index = i; /*******************/ /* FP STATUS BLOCK */ /*******************/ snprintf(buf, sizeof(buf), "fp %d status block", i); if (bxe_dma_alloc(sc, sizeof(union bxe_host_hc_status_block), &fp->sb_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { if (CHIP_IS_E2E3(sc)) { fp->status_block.e2_sb = (struct host_hc_status_block_e2 *)fp->sb_dma.vaddr; } else { fp->status_block.e1x_sb = (struct host_hc_status_block_e1x *)fp->sb_dma.vaddr; } } /******************/ /* FP TX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d tx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * TX_BD_NUM_PAGES), &fp->tx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->tx_chain = (union eth_tx_bd_types *)fp->tx_dma.vaddr; } /* link together the tx bd chain pages */ for (j = 1; j <= TX_BD_NUM_PAGES; j++) { /* index into the tx bd chain array to last entry per page */ struct eth_tx_next_bd *tx_next_bd = &fp->tx_chain[TX_BD_TOTAL_PER_PAGE * j - 1].next_bd; /* point to the next page and wrap from last page */ busaddr = (fp->tx_dma.paddr + (BCM_PAGE_SIZE * (j % TX_BD_NUM_PAGES))); tx_next_bd->addr_hi = htole32(U64_HI(busaddr)); tx_next_bd->addr_lo = htole32(U64_LO(busaddr)); } /******************/ /* FP RX BD CHAIN */ /******************/ snprintf(buf, sizeof(buf), "fp %d rx bd chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_BD_NUM_PAGES), &fp->rx_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_chain = (struct eth_rx_bd *)fp->rx_dma.vaddr; } /* link together the rx bd chain pages */ for (j = 1; j <= RX_BD_NUM_PAGES; j++) { /* index into the rx bd chain array to last entry per page */ struct eth_rx_bd *rx_bd = &fp->rx_chain[RX_BD_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_dma.paddr + (BCM_PAGE_SIZE * (j % RX_BD_NUM_PAGES))); rx_bd->addr_hi = htole32(U64_HI(busaddr)); rx_bd->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX RCQ CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d rcq chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RCQ_NUM_PAGES), &fp->rcq_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rcq_chain = (union eth_rx_cqe *)fp->rcq_dma.vaddr; } /* link together the rcq chain pages */ for (j = 1; j <= RCQ_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_cqe_next_page *rx_cqe_next = (struct eth_rx_cqe_next_page *) &fp->rcq_chain[RCQ_TOTAL_PER_PAGE * j - 1]; /* point to the next page and wrap from last page */ busaddr = (fp->rcq_dma.paddr + (BCM_PAGE_SIZE * (j % RCQ_NUM_PAGES))); rx_cqe_next->addr_hi = htole32(U64_HI(busaddr)); rx_cqe_next->addr_lo = htole32(U64_LO(busaddr)); } /*******************/ /* FP RX SGE CHAIN */ /*******************/ snprintf(buf, sizeof(buf), "fp %d sge chain", i); if (bxe_dma_alloc(sc, (BCM_PAGE_SIZE * RX_SGE_NUM_PAGES), &fp->rx_sge_dma, buf) != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to alloc %s\n", buf); return (1); } else { fp->rx_sge_chain = (struct eth_rx_sge *)fp->rx_sge_dma.vaddr; } /* link together the sge chain pages */ for (j = 1; j <= RX_SGE_NUM_PAGES; j++) { /* index into the rcq chain array to last entry per page */ struct eth_rx_sge *rx_sge = &fp->rx_sge_chain[RX_SGE_TOTAL_PER_PAGE * j - 2]; /* point to the next page and wrap from last page */ busaddr = (fp->rx_sge_dma.paddr + (BCM_PAGE_SIZE * (j % RX_SGE_NUM_PAGES))); rx_sge->addr_hi = htole32(U64_HI(busaddr)); rx_sge->addr_lo = htole32(U64_LO(busaddr)); } /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ /* set required sizes before mapping to conserve resources */ if (if_getcapenable(sc->ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) { max_size = BXE_TSO_MAX_SIZE; max_segments = BXE_TSO_MAX_SEGMENTS; max_seg_size = BXE_TSO_MAX_SEG_SIZE; } else { max_size = (MCLBYTES * BXE_MAX_SEGMENTS); max_segments = BXE_MAX_SEGMENTS; max_seg_size = MCLBYTES; } /* create a dma tag for the tx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ max_size, /* max map size */ max_segments, /* num discontinuous */ max_seg_size, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->tx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d tx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the tx mbuf clusters */ for (j = 0; j < TX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->tx_mbuf_tag, BUS_DMA_NOWAIT, &fp->tx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d tx mbuf %d' (%d)\n", i, j, rc); return (1); } } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ /* create a dma tag for the rx mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ MJUM9BYTES, /* max map size */ 1, /* num discontinuous */ MJUM9BYTES, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for each of the rx mbuf clusters */ for (j = 0; j < RX_BD_TOTAL; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ /* create a dma tag for the rx sge mbufs */ rc = bus_dma_tag_create(sc->parent_dma_tag, /* parent tag */ 1, /* alignment */ 0, /* boundary limit */ BUS_SPACE_MAXADDR, /* restricted low */ BUS_SPACE_MAXADDR, /* restricted hi */ NULL, /* addr filter() */ NULL, /* addr filter() arg */ BCM_PAGE_SIZE, /* max map size */ 1, /* num discontinuous */ BCM_PAGE_SIZE, /* max seg size */ 0, /* flags */ NULL, /* lock() */ NULL, /* lock() arg */ &fp->rx_sge_mbuf_tag); /* returned dma tag */ if (rc != 0) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma tag for " "'fp %d rx sge mbufs' (%d)\n", i, rc); return (1); } /* create dma maps for the rx sge mbuf clusters */ for (j = 0; j < RX_SGE_TOTAL; j++) { if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_chain[j].m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx sge mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx sge mbuf cluster */ if (bus_dmamap_create(fp->rx_sge_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_sge_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx sge mbuf' (%d)\n", i, rc); return (1); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ /* create dma maps for the rx tpa mbuf clusters */ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info[j].bd.m_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d rx tpa mbuf %d' (%d)\n", i, j, rc); return (1); } } /* create dma map for the spare rx tpa mbuf cluster */ if (bus_dmamap_create(fp->rx_mbuf_tag, BUS_DMA_NOWAIT, &fp->rx_tpa_info_mbuf_spare_map)) { /* XXX unwind and free previous fastpath allocations */ BLOGE(sc, "Failed to create dma map for " "'fp %d spare rx tpa mbuf' (%d)\n", i, rc); return (1); } bxe_init_sge_ring_bit_mask(fp); } return (0); } static void bxe_free_hsi_mem(struct bxe_softc *sc) { struct bxe_fastpath *fp; int max_agg_queues; int i, j; if (sc->parent_dma_tag == NULL) { return; /* assume nothing was allocated */ } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; /*******************/ /* FP STATUS BLOCK */ /*******************/ bxe_dma_free(sc, &fp->sb_dma); memset(&fp->status_block, 0, sizeof(fp->status_block)); /******************/ /* FP TX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->tx_dma); fp->tx_chain = NULL; /******************/ /* FP RX BD CHAIN */ /******************/ bxe_dma_free(sc, &fp->rx_dma); fp->rx_chain = NULL; /*******************/ /* FP RX RCQ CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rcq_dma); fp->rcq_chain = NULL; /*******************/ /* FP RX SGE CHAIN */ /*******************/ bxe_dma_free(sc, &fp->rx_sge_dma); fp->rx_sge_chain = NULL; /***********************/ /* FP TX MBUF DMA MAPS */ /***********************/ if (fp->tx_mbuf_tag != NULL) { for (j = 0; j < TX_BD_TOTAL; j++) { if (fp->tx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->tx_mbuf_tag, fp->tx_mbuf_chain[j].m_map); } } bus_dma_tag_destroy(fp->tx_mbuf_tag); fp->tx_mbuf_tag = NULL; } /***********************/ /* FP RX MBUF DMA MAPS */ /***********************/ if (fp->rx_mbuf_tag != NULL) { for (j = 0; j < RX_BD_TOTAL; j++) { if (fp->rx_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_chain[j].m_map); } } if (fp->rx_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_mbuf_spare_map); } /***************************/ /* FP RX TPA MBUF DMA MAPS */ /***************************/ max_agg_queues = MAX_AGG_QS(sc); for (j = 0; j < max_agg_queues; j++) { if (fp->rx_tpa_info[j].bd.m_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info[j].bd.m_map); } } if (fp->rx_tpa_info_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); bus_dmamap_destroy(fp->rx_mbuf_tag, fp->rx_tpa_info_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_mbuf_tag); fp->rx_mbuf_tag = NULL; } /***************************/ /* FP RX SGE MBUF DMA MAPS */ /***************************/ if (fp->rx_sge_mbuf_tag != NULL) { for (j = 0; j < RX_SGE_TOTAL; j++) { if (fp->rx_sge_mbuf_chain[j].m_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_chain[j].m_map); } } if (fp->rx_sge_mbuf_spare_map != NULL) { bus_dmamap_unload(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); bus_dmamap_destroy(fp->rx_sge_mbuf_tag, fp->rx_sge_mbuf_spare_map); } bus_dma_tag_destroy(fp->rx_sge_mbuf_tag); fp->rx_sge_mbuf_tag = NULL; } } /***************************/ /* FW DECOMPRESSION BUFFER */ /***************************/ bxe_dma_free(sc, &sc->gz_buf_dma); sc->gz_buf = NULL; free(sc->gz_strm, M_DEVBUF); sc->gz_strm = NULL; /*******************/ /* SLOW PATH QUEUE */ /*******************/ bxe_dma_free(sc, &sc->spq_dma); sc->spq = NULL; /*************/ /* SLOW PATH */ /*************/ bxe_dma_free(sc, &sc->sp_dma); sc->sp = NULL; /***************/ /* EVENT QUEUE */ /***************/ bxe_dma_free(sc, &sc->eq_dma); sc->eq = NULL; /************************/ /* DEFAULT STATUS BLOCK */ /************************/ bxe_dma_free(sc, &sc->def_sb_dma); sc->def_sb = NULL; bus_dma_tag_destroy(sc->parent_dma_tag); sc->parent_dma_tag = NULL; } /* * Previous driver DMAE transaction may have occurred when pre-boot stage * ended and boot began. This would invalidate the addresses of the * transaction, resulting in was-error bit set in the PCI causing all * hw-to-host PCIe transactions to timeout. If this happened we want to clear * the interrupt which detected this from the pglueb and the was-done bit */ static void bxe_prev_interrupted_dmae(struct bxe_softc *sc) { uint32_t val; if (!CHIP_IS_E1x(sc)) { val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS); if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { BLOGD(sc, DBG_LOAD, "Clearing 'was-error' bit that was set in pglueb"); REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, 1 << SC_FUNC(sc)); } } } static int bxe_prev_mcp_done(struct bxe_softc *sc) { uint32_t rc = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE, DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET); if (!rc) { BLOGE(sc, "MCP response failure, aborting\n"); return (-1); } return (0); } static struct bxe_prev_list_node * bxe_prev_path_get_entry(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; LIST_FOREACH(tmp, &bxe_prev_list, node) { if ((sc->pcie_bus == tmp->bus) && (sc->pcie_device == tmp->slot) && (SC_PATH(sc) == tmp->path)) { return (tmp); } } return (NULL); } static uint8_t bxe_prev_is_path_marked(struct bxe_softc *sc) { struct bxe_prev_list_node *tmp; int rc = FALSE; mtx_lock(&bxe_prev_mtx); tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (tmp->aer) { BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was marked by AER\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { rc = TRUE; BLOGD(sc, DBG_LOAD, "Path %d/%d/%d was already cleaned from previous drivers\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } } mtx_unlock(&bxe_prev_mtx); return (rc); } static int bxe_prev_mark_path(struct bxe_softc *sc, uint8_t after_undi) { struct bxe_prev_list_node *tmp; mtx_lock(&bxe_prev_mtx); /* Check whether the entry for this path already exists */ tmp = bxe_prev_path_get_entry(sc); if (tmp) { if (!tmp->aer) { BLOGD(sc, DBG_LOAD, "Re-marking AER in path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); } else { BLOGD(sc, DBG_LOAD, "Removing AER indication from path %d/%d/%d\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); tmp->aer = 0; } mtx_unlock(&bxe_prev_mtx); return (0); } mtx_unlock(&bxe_prev_mtx); /* Create an entry for this path and add it */ tmp = malloc(sizeof(struct bxe_prev_list_node), M_DEVBUF, (M_NOWAIT | M_ZERO)); if (!tmp) { BLOGE(sc, "Failed to allocate 'bxe_prev_list_node'\n"); return (-1); } tmp->bus = sc->pcie_bus; tmp->slot = sc->pcie_device; tmp->path = SC_PATH(sc); tmp->aer = 0; tmp->undi = after_undi ? (1 << SC_PORT(sc)) : 0; mtx_lock(&bxe_prev_mtx); BLOGD(sc, DBG_LOAD, "Marked path %d/%d/%d - finished previous unload\n", sc->pcie_bus, sc->pcie_device, SC_PATH(sc)); LIST_INSERT_HEAD(&bxe_prev_list, tmp, node); mtx_unlock(&bxe_prev_mtx); return (0); } static int bxe_do_flr(struct bxe_softc *sc) { int i; /* only E2 and onwards support FLR */ if (CHIP_IS_E1x(sc)) { BLOGD(sc, DBG_LOAD, "FLR not supported in E1/E1H\n"); return (-1); } /* only bootcode REQ_BC_VER_4_INITIATE_FLR and onwards support flr */ if (sc->devinfo.bc_ver < REQ_BC_VER_4_INITIATE_FLR) { BLOGD(sc, DBG_LOAD, "FLR not supported by BC_VER: 0x%08x\n", sc->devinfo.bc_ver); return (-1); } /* Wait for Transaction Pending bit clean */ for (i = 0; i < 4; i++) { if (i) { DELAY(((1 << (i - 1)) * 100) * 1000); } if (!bxe_is_pcie_pending(sc)) { goto clear; } } BLOGE(sc, "PCIE transaction is not cleared, " "proceeding with reset anyway\n"); clear: BLOGD(sc, DBG_LOAD, "Initiating FLR\n"); bxe_fw_command(sc, DRV_MSG_CODE_INITIATE_FLR, 0); return (0); } struct bxe_mac_vals { uint32_t xmac_addr; uint32_t xmac_val; uint32_t emac_addr; uint32_t emac_val; uint32_t umac_addr; uint32_t umac_val; uint32_t bmac_addr; uint32_t bmac_val[2]; }; static void bxe_prev_unload_close_mac(struct bxe_softc *sc, struct bxe_mac_vals *vals) { uint32_t val, base_addr, offset, mask, reset_reg; uint8_t mac_stopped = FALSE; uint8_t port = SC_PORT(sc); uint32_t wb_data[2]; /* reset addresses as they also mark which values were changed */ vals->bmac_addr = 0; vals->umac_addr = 0; vals->xmac_addr = 0; vals->emac_addr = 0; reset_reg = REG_RD(sc, MISC_REG_RESET_REG_2); if (!CHIP_IS_E3(sc)) { val = REG_RD(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4); mask = MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port; if ((mask & reset_reg) && val) { BLOGD(sc, DBG_LOAD, "Disable BMAC Rx\n"); base_addr = SC_PORT(sc) ? NIG_REG_INGRESS_BMAC1_MEM : NIG_REG_INGRESS_BMAC0_MEM; offset = CHIP_IS_E2(sc) ? BIGMAC2_REGISTER_BMAC_CONTROL : BIGMAC_REGISTER_BMAC_CONTROL; /* * use rd/wr since we cannot use dmae. This is safe * since MCP won't access the bus due to the request * to unload, and no function on the path can be * loaded at this time. */ wb_data[0] = REG_RD(sc, base_addr + offset); wb_data[1] = REG_RD(sc, base_addr + offset + 0x4); vals->bmac_addr = base_addr + offset; vals->bmac_val[0] = wb_data[0]; vals->bmac_val[1] = wb_data[1]; wb_data[0] &= ~ELINK_BMAC_CONTROL_RX_ENABLE; REG_WR(sc, vals->bmac_addr, wb_data[0]); REG_WR(sc, vals->bmac_addr + 0x4, wb_data[1]); } BLOGD(sc, DBG_LOAD, "Disable EMAC Rx\n"); vals->emac_addr = NIG_REG_NIG_EMAC0_EN + SC_PORT(sc)*4; vals->emac_val = REG_RD(sc, vals->emac_addr); REG_WR(sc, vals->emac_addr, 0); mac_stopped = TRUE; } else { if (reset_reg & MISC_REGISTERS_RESET_REG_2_XMAC) { BLOGD(sc, DBG_LOAD, "Disable XMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; val = REG_RD(sc, base_addr + XMAC_REG_PFC_CTRL_HI); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val & ~(1 << 1)); REG_WR(sc, base_addr + XMAC_REG_PFC_CTRL_HI, val | (1 << 1)); vals->xmac_addr = base_addr + XMAC_REG_CTRL; vals->xmac_val = REG_RD(sc, vals->xmac_addr); REG_WR(sc, vals->xmac_addr, 0); mac_stopped = TRUE; } mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port; if (mask & reset_reg) { BLOGD(sc, DBG_LOAD, "Disable UMAC Rx\n"); base_addr = SC_PORT(sc) ? GRCBASE_UMAC1 : GRCBASE_UMAC0; vals->umac_addr = base_addr + UMAC_REG_COMMAND_CONFIG; vals->umac_val = REG_RD(sc, vals->umac_addr); REG_WR(sc, vals->umac_addr, 0); mac_stopped = TRUE; } } if (mac_stopped) { DELAY(20000); } } #define BXE_PREV_UNDI_PROD_ADDR(p) (BAR_TSTRORM_INTMEM + 0x1508 + ((p) << 4)) #define BXE_PREV_UNDI_RCQ(val) ((val) & 0xffff) #define BXE_PREV_UNDI_BD(val) ((val) >> 16 & 0xffff) #define BXE_PREV_UNDI_PROD(rcq, bd) ((bd) << 16 | (rcq)) static void bxe_prev_unload_undi_inc(struct bxe_softc *sc, uint8_t port, uint8_t inc) { uint16_t rcq, bd; uint32_t tmp_reg = REG_RD(sc, BXE_PREV_UNDI_PROD_ADDR(port)); rcq = BXE_PREV_UNDI_RCQ(tmp_reg) + inc; bd = BXE_PREV_UNDI_BD(tmp_reg) + inc; tmp_reg = BXE_PREV_UNDI_PROD(rcq, bd); REG_WR(sc, BXE_PREV_UNDI_PROD_ADDR(port), tmp_reg); BLOGD(sc, DBG_LOAD, "UNDI producer [%d] rings bd -> 0x%04x, rcq -> 0x%04x\n", port, bd, rcq); } static int bxe_prev_unload_common(struct bxe_softc *sc) { uint32_t reset_reg, tmp_reg = 0, rc; uint8_t prev_undi = FALSE; struct bxe_mac_vals mac_vals; uint32_t timer_count = 1000; uint32_t prev_brb; /* * It is possible a previous function received 'common' answer, * but hasn't loaded yet, therefore creating a scenario of * multiple functions receiving 'common' on the same path. */ BLOGD(sc, DBG_LOAD, "Common unload Flow\n"); memset(&mac_vals, 0, sizeof(mac_vals)); if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } reset_reg = REG_RD(sc, MISC_REG_RESET_REG_1); /* Reset should be performed after BRB is emptied */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { /* Close the MAC Rx to prevent BRB from filling up */ bxe_prev_unload_close_mac(sc, &mac_vals); /* close LLH filters towards the BRB */ elink_set_rx_filter(&sc->link_params, 0); /* * Check if the UNDI driver was previously loaded. * UNDI driver initializes CID offset for normal bell to 0x7 */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_DORQ) { tmp_reg = REG_RD(sc, DORQ_REG_NORM_CID_OFST); if (tmp_reg == 0x7) { BLOGD(sc, DBG_LOAD, "UNDI previously loaded\n"); prev_undi = TRUE; /* clear the UNDI indication */ REG_WR(sc, DORQ_REG_NORM_CID_OFST, 0); /* clear possible idle check errors */ REG_RD(sc, NIG_REG_NIG_INT_STS_CLR_0); } } /* wait until BRB is empty */ tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); while (timer_count) { prev_brb = tmp_reg; tmp_reg = REG_RD(sc, BRB1_REG_NUM_OF_FULL_BLOCKS); if (!tmp_reg) { break; } BLOGD(sc, DBG_LOAD, "BRB still has 0x%08x\n", tmp_reg); /* reset timer as long as BRB actually gets emptied */ if (prev_brb > tmp_reg) { timer_count = 1000; } else { timer_count--; } /* If UNDI resides in memory, manually increment it */ if (prev_undi) { bxe_prev_unload_undi_inc(sc, SC_PORT(sc), 1); } DELAY(10); } if (!timer_count) { BLOGE(sc, "Failed to empty BRB\n"); } } /* No packets are in the pipeline, path is ready for reset */ bxe_reset_common(sc); if (mac_vals.xmac_addr) { REG_WR(sc, mac_vals.xmac_addr, mac_vals.xmac_val); } if (mac_vals.umac_addr) { REG_WR(sc, mac_vals.umac_addr, mac_vals.umac_val); } if (mac_vals.emac_addr) { REG_WR(sc, mac_vals.emac_addr, mac_vals.emac_val); } if (mac_vals.bmac_addr) { REG_WR(sc, mac_vals.bmac_addr, mac_vals.bmac_val[0]); REG_WR(sc, mac_vals.bmac_addr + 4, mac_vals.bmac_val[1]); } rc = bxe_prev_mark_path(sc, prev_undi); if (rc) { bxe_prev_mcp_done(sc); return (rc); } return (bxe_prev_mcp_done(sc)); } static int bxe_prev_unload_uncommon(struct bxe_softc *sc) { int rc; BLOGD(sc, DBG_LOAD, "Uncommon unload Flow\n"); /* Test if previous unload process was already finished for this path */ if (bxe_prev_is_path_marked(sc)) { return (bxe_prev_mcp_done(sc)); } BLOGD(sc, DBG_LOAD, "Path is unmarked\n"); /* * If function has FLR capabilities, and existing FW version matches * the one required, then FLR will be sufficient to clean any residue * left by previous driver */ rc = bxe_nic_load_analyze_req(sc, FW_MSG_CODE_DRV_LOAD_FUNCTION); if (!rc) { /* fw version is good */ BLOGD(sc, DBG_LOAD, "FW version matches our own, attempting FLR\n"); rc = bxe_do_flr(sc); } if (!rc) { /* FLR was performed */ BLOGD(sc, DBG_LOAD, "FLR successful\n"); return (0); } BLOGD(sc, DBG_LOAD, "Could not FLR\n"); /* Close the MCP request, return failure*/ rc = bxe_prev_mcp_done(sc); if (!rc) { rc = BXE_PREV_WAIT_NEEDED; } return (rc); } static int bxe_prev_unload(struct bxe_softc *sc) { int time_counter = 10; uint32_t fw, hw_lock_reg, hw_lock_val; uint32_t rc = 0; /* * Clear HW from errors which may have resulted from an interrupted * DMAE transaction. */ bxe_prev_interrupted_dmae(sc); /* Release previously held locks */ hw_lock_reg = (SC_FUNC(sc) <= 5) ? (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8) : (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8); hw_lock_val = (REG_RD(sc, hw_lock_reg)); if (hw_lock_val) { if (hw_lock_val & HW_LOCK_RESOURCE_NVRAM) { BLOGD(sc, DBG_LOAD, "Releasing previously held NVRAM lock\n"); REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << SC_PORT(sc))); } BLOGD(sc, DBG_LOAD, "Releasing previously held HW lock\n"); REG_WR(sc, hw_lock_reg, 0xffffffff); } else { BLOGD(sc, DBG_LOAD, "No need to release HW/NVRAM locks\n"); } if (MCPR_ACCESS_LOCK_LOCK & REG_RD(sc, MCP_REG_MCPR_ACCESS_LOCK)) { BLOGD(sc, DBG_LOAD, "Releasing previously held ALR\n"); REG_WR(sc, MCP_REG_MCPR_ACCESS_LOCK, 0); } do { /* Lock MCP using an unload request */ fw = bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0); if (!fw) { BLOGE(sc, "MCP response failure, aborting\n"); rc = -1; break; } if (fw == FW_MSG_CODE_DRV_UNLOAD_COMMON) { rc = bxe_prev_unload_common(sc); break; } /* non-common reply from MCP night require looping */ rc = bxe_prev_unload_uncommon(sc); if (rc != BXE_PREV_WAIT_NEEDED) { break; } DELAY(20000); } while (--time_counter); if (!time_counter || rc) { BLOGE(sc, "Failed to unload previous driver!" " time_counter %d rc %d\n", time_counter, rc); rc = -1; } return (rc); } void bxe_dcbx_set_state(struct bxe_softc *sc, uint8_t dcb_on, uint32_t dcbx_enabled) { if (!CHIP_IS_E1x(sc)) { sc->dcb_state = dcb_on; sc->dcbx_enabled = dcbx_enabled; } else { sc->dcb_state = FALSE; sc->dcbx_enabled = BXE_DCBX_ENABLED_INVALID; } BLOGD(sc, DBG_LOAD, "DCB state [%s:%s]\n", dcb_on ? "ON" : "OFF", (dcbx_enabled == BXE_DCBX_ENABLED_OFF) ? "user-mode" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_OFF) ? "on-chip static" : (dcbx_enabled == BXE_DCBX_ENABLED_ON_NEG_ON) ? "on-chip with negotiation" : "invalid"); } /* must be called after sriov-enable */ static int bxe_set_qm_cid_count(struct bxe_softc *sc) { int cid_count = BXE_L2_MAX_CID(sc); if (IS_SRIOV(sc)) { cid_count += BXE_VF_CIDS; } if (CNIC_SUPPORT(sc)) { cid_count += CNIC_CID_MAX; } return (roundup(cid_count, QM_CID_ROUND)); } static void bxe_init_multi_cos(struct bxe_softc *sc) { int pri, cos; uint32_t pri_map = 0; /* XXX change to user config */ for (pri = 0; pri < BXE_MAX_PRIORITY; pri++) { cos = ((pri_map & (0xf << (pri * 4))) >> (pri * 4)); if (cos < sc->max_cos) { sc->prio_to_cos[pri] = cos; } else { BLOGW(sc, "Invalid COS %d for priority %d " "(max COS is %d), setting to 0\n", cos, pri, (sc->max_cos - 1)); sc->prio_to_cos[pri] = 0; } } } static int bxe_sysctl_state(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc; int error, result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) { return (error); } if (result == 1) { uint32_t temp; sc = (struct bxe_softc *)arg1; BLOGI(sc, "... dumping driver state ...\n"); temp = SHMEM2_RD(sc, temperature_in_half_celsius); BLOGI(sc, "\t Device Temperature = %d Celsius\n", (temp/2)); } return (error); } static int bxe_sysctl_eth_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats = (uint32_t *)&sc->eth_stats; uint32_t *offset; uint64_t value = 0; int index = (int)arg2; if (index >= BXE_NUM_ETH_STATS) { BLOGE(sc, "bxe_eth_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_stats_arr[index].offset); switch (bxe_eth_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_stats size (index=%d size=%d)\n", index, bxe_eth_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static int bxe_sysctl_eth_q_stat(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1; uint32_t *eth_stats; uint32_t *offset; uint64_t value = 0; uint32_t q_stat = (uint32_t)arg2; uint32_t fp_index = ((q_stat >> 16) & 0xffff); uint32_t index = (q_stat & 0xffff); eth_stats = (uint32_t *)&sc->fp[fp_index].eth_q_stats; if (index >= BXE_NUM_ETH_Q_STATS) { BLOGE(sc, "bxe_eth_q_stats index out of range (%d)\n", index); return (-1); } offset = (eth_stats + bxe_eth_q_stats_arr[index].offset); switch (bxe_eth_q_stats_arr[index].size) { case 4: value = (uint64_t)*offset; break; case 8: value = HILO_U64(*offset, *(offset + 1)); break; default: BLOGE(sc, "Invalid bxe_eth_q_stats size (index=%d size=%d)\n", index, bxe_eth_q_stats_arr[index].size); return (-1); } return (sysctl_handle_64(oidp, &value, 0, req)); } static void bxe_force_link_reset(struct bxe_softc *sc) { bxe_acquire_phy_lock(sc); elink_link_reset(&sc->link_params, &sc->link_vars, 1); bxe_release_phy_lock(sc); } static int bxe_sysctl_pauseparam(SYSCTL_HANDLER_ARGS) { struct bxe_softc *sc = (struct bxe_softc *)arg1;; uint32_t cfg_idx = bxe_get_link_cfg_idx(sc); int rc = 0; int error; int result; error = sysctl_handle_int(oidp, &sc->bxe_pause_param, 0, req); if (error || !req->newptr) { return (error); } if ((sc->bxe_pause_param < 0) || (sc->bxe_pause_param > 8)) { BLOGW(sc, "invalid pause param (%d) - use intergers between 1 & 8\n",sc->bxe_pause_param); sc->bxe_pause_param = 8; } result = (sc->bxe_pause_param << PORT_FEATURE_FLOW_CONTROL_SHIFT); if((result & 0x400) && !(sc->port.supported[cfg_idx] & ELINK_SUPPORTED_Autoneg)) { BLOGW(sc, "Does not support Autoneg pause_param %d\n", sc->bxe_pause_param); return -EINVAL; } if(IS_MF(sc)) return 0; sc->link_params.req_flow_ctrl[cfg_idx] = ELINK_FLOW_CTRL_AUTO; if(result & ELINK_FLOW_CTRL_RX) sc->link_params.req_flow_ctrl[cfg_idx] |= ELINK_FLOW_CTRL_RX; if(result & ELINK_FLOW_CTRL_TX) sc->link_params.req_flow_ctrl[cfg_idx] |= ELINK_FLOW_CTRL_TX; if(sc->link_params.req_flow_ctrl[cfg_idx] == ELINK_FLOW_CTRL_AUTO) sc->link_params.req_flow_ctrl[cfg_idx] = ELINK_FLOW_CTRL_NONE; if(result & 0x400) { if (sc->link_params.req_line_speed[cfg_idx] == ELINK_SPEED_AUTO_NEG) { sc->link_params.req_flow_ctrl[cfg_idx] = ELINK_FLOW_CTRL_AUTO; } sc->link_params.req_fc_auto_adv = 0; if (result & ELINK_FLOW_CTRL_RX) sc->link_params.req_fc_auto_adv |= ELINK_FLOW_CTRL_RX; if (result & ELINK_FLOW_CTRL_TX) sc->link_params.req_fc_auto_adv |= ELINK_FLOW_CTRL_TX; if (!sc->link_params.req_fc_auto_adv) sc->link_params.req_fc_auto_adv |= ELINK_FLOW_CTRL_NONE; } if (IS_PF(sc)) { if (sc->link_vars.link_up) { bxe_stats_handle(sc, STATS_EVENT_STOP); } if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) { bxe_force_link_reset(sc); bxe_acquire_phy_lock(sc); rc = elink_phy_init(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); bxe_calc_fc_adv(sc); } } return rc; } static void bxe_add_sysctls(struct bxe_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *queue_top, *queue; struct sysctl_oid_list *queue_top_children, *queue_children; char queue_num_buf[32]; uint32_t q_stat; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "version", CTLFLAG_RD, BXE_DRIVER_VERSION, 0, "version"); snprintf(sc->fw_ver_str, sizeof(sc->fw_ver_str), "%d.%d.%d.%d", BCM_5710_FW_MAJOR_VERSION, BCM_5710_FW_MINOR_VERSION, BCM_5710_FW_REVISION_VERSION, BCM_5710_FW_ENGINEERING_VERSION); snprintf(sc->mf_mode_str, sizeof(sc->mf_mode_str), "%s", ((sc->devinfo.mf_info.mf_mode == SINGLE_FUNCTION) ? "Single" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SD) ? "MF-SD" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_SI) ? "MF-SI" : (sc->devinfo.mf_info.mf_mode == MULTI_FUNCTION_AFEX) ? "MF-AFEX" : "Unknown")); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "mf_vnics", CTLFLAG_RD, &sc->devinfo.mf_info.vnics_per_port, 0, "multifunction vnics per port"); snprintf(sc->pci_link_str, sizeof(sc->pci_link_str), "%s x%d", ((sc->devinfo.pcie_link_speed == 1) ? "2.5GT/s" : (sc->devinfo.pcie_link_speed == 2) ? "5.0GT/s" : (sc->devinfo.pcie_link_speed == 4) ? "8.0GT/s" : "???GT/s"), sc->devinfo.pcie_link_width); sc->debug = bxe_debug; #if __FreeBSD_version >= 900000 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bc_version", CTLFLAG_RD, sc->devinfo.bc_ver_str, 0, "bootcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "fw_version", CTLFLAG_RD, sc->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mf_mode", CTLFLAG_RD, sc->mf_mode_str, 0, "multifunction mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mac_addr", CTLFLAG_RD, sc->mac_addr_str, 0, "mac address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pci_link", CTLFLAG_RD, sc->pci_link_str, 0, "pci link status"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, "debug logging mode"); #else SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bc_version", CTLFLAG_RD, &sc->devinfo.bc_ver_str, 0, "bootcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "fw_version", CTLFLAG_RD, &sc->fw_ver_str, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mf_mode", CTLFLAG_RD, &sc->mf_mode_str, 0, "multifunction mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "mac_addr", CTLFLAG_RD, &sc->mac_addr_str, 0, "mac address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pci_link", CTLFLAG_RD, &sc->pci_link_str, 0, "pci link status"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "debug logging mode"); #endif /* #if __FreeBSD_version >= 900000 */ sc->trigger_grcdump = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "trigger_grcdump", CTLFLAG_RW, &sc->trigger_grcdump, 0, "trigger grcdump should be invoked" " before collecting grcdump"); sc->grcdump_started = 0; sc->grcdump_done = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "grcdump_done", CTLFLAG_RD, &sc->grcdump_done, 0, "set by driver when grcdump is done"); sc->rx_budget = bxe_rx_budget; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_budget", CTLFLAG_RW, &sc->rx_budget, 0, "rx processing budget"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_param", CTLTYPE_UINT | CTLFLAG_RW, sc, 0, bxe_sysctl_pauseparam, "IU", "need pause frames- DEF:0/TX:1/RX:2/BOTH:3/AUTO:4/AUTOTX:5/AUTORX:6/AUTORXTX:7/NONE:8"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "state", CTLTYPE_UINT | CTLFLAG_RW, sc, 0, bxe_sysctl_state, "IU", "dump driver state"); for (i = 0; i < BXE_NUM_ETH_STATS; i++) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, bxe_eth_stats_arr[i].string, CTLTYPE_U64 | CTLFLAG_RD, sc, i, bxe_sysctl_eth_stat, "LU", bxe_eth_stats_arr[i].string); } /* add a new parent node for all queues "dev.bxe.#.queue" */ queue_top = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "queue", CTLFLAG_RD, NULL, "queue"); queue_top_children = SYSCTL_CHILDREN(queue_top); for (i = 0; i < sc->num_queues; i++) { /* add a new parent node for a single queue "dev.bxe.#.queue.#" */ snprintf(queue_num_buf, sizeof(queue_num_buf), "%d", i); queue = SYSCTL_ADD_NODE(ctx, queue_top_children, OID_AUTO, queue_num_buf, CTLFLAG_RD, NULL, "single queue"); queue_children = SYSCTL_CHILDREN(queue); for (j = 0; j < BXE_NUM_ETH_Q_STATS; j++) { q_stat = ((i << 16) | j); SYSCTL_ADD_PROC(ctx, queue_children, OID_AUTO, bxe_eth_q_stats_arr[j].string, CTLTYPE_U64 | CTLFLAG_RD, sc, q_stat, bxe_sysctl_eth_q_stat, "LU", bxe_eth_q_stats_arr[j].string); } } } static int bxe_alloc_buf_rings(struct bxe_softc *sc) { #if __FreeBSD_version >= 901504 int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; fp->tx_br = buf_ring_alloc(BXE_BR_SIZE, M_DEVBUF, M_NOWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) return (-1); } #endif return (0); } static void bxe_free_buf_rings(struct bxe_softc *sc) { #if __FreeBSD_version >= 901504 int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (fp->tx_br) { buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } } #endif } static void bxe_init_fp_mutexs(struct bxe_softc *sc) { int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "bxe%d_fp%d_tx_lock", sc->unit, i); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); snprintf(fp->rx_mtx_name, sizeof(fp->rx_mtx_name), "bxe%d_fp%d_rx_lock", sc->unit, i); mtx_init(&fp->rx_mtx, fp->rx_mtx_name, NULL, MTX_DEF); } } static void bxe_destroy_fp_mutexs(struct bxe_softc *sc) { int i; struct bxe_fastpath *fp; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if (mtx_initialized(&fp->tx_mtx)) { mtx_destroy(&fp->tx_mtx); } if (mtx_initialized(&fp->rx_mtx)) { mtx_destroy(&fp->rx_mtx); } } } /* * Device attach function. * * Allocates device resources, performs secondary chip identification, and * initializes driver instance variables. This function is called from driver * load after a successful probe. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_attach(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting attach...\n"); sc->state = BXE_STATE_CLOSED; sc->dev = dev; sc->unit = device_get_unit(dev); BLOGD(sc, DBG_LOAD, "softc = %p\n", sc); sc->pcie_bus = pci_get_bus(dev); sc->pcie_device = pci_get_slot(dev); sc->pcie_func = pci_get_function(dev); /* enable bus master capability */ pci_enable_busmaster(dev); /* get the BARs */ if (bxe_allocate_bars(sc) != 0) { return (ENXIO); } /* initialize the mutexes */ bxe_init_mutexes(sc); /* prepare the periodic callout */ callout_init(&sc->periodic_callout, 0); /* prepare the chip taskqueue */ sc->chip_tq_flags = CHIP_TQ_NONE; snprintf(sc->chip_tq_name, sizeof(sc->chip_tq_name), "bxe%d_chip_tq", sc->unit); TASK_INIT(&sc->chip_tq_task, 0, bxe_handle_chip_tq, sc); sc->chip_tq = taskqueue_create(sc->chip_tq_name, M_NOWAIT, taskqueue_thread_enqueue, &sc->chip_tq); taskqueue_start_threads(&sc->chip_tq, 1, PWAIT, /* lower priority */ "%s", sc->chip_tq_name); /* get device info and set params */ if (bxe_get_device_info(sc) != 0) { BLOGE(sc, "getting device info\n"); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* get final misc params */ bxe_get_params(sc); /* set the default MTU (changed via ifconfig) */ sc->mtu = ETHERMTU; bxe_set_modes_bitmap(sc); /* XXX * If in AFEX mode and the function is configured for FCoE * then bail... no L2 allowed. */ /* get phy settings from shmem and 'and' against admin settings */ bxe_get_phy_info(sc); /* initialize the FreeBSD ifnet interface */ if (bxe_init_ifnet(sc) != 0) { bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } if (bxe_add_cdev(sc) != 0) { if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate device interrupts */ if (bxe_interrupt_alloc(sc) != 0) { bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } bxe_init_fp_mutexs(sc); if (bxe_alloc_buf_rings(sc) != 0) { bxe_free_buf_rings(sc); bxe_interrupt_free(sc); bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate ilt */ if (bxe_alloc_ilt_mem(sc) != 0) { bxe_free_buf_rings(sc); bxe_interrupt_free(sc); bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* allocate the host hardware/software hsi structures */ if (bxe_alloc_hsi_mem(sc) != 0) { bxe_free_ilt_mem(sc); bxe_free_buf_rings(sc); bxe_interrupt_free(sc); bxe_del_cdev(sc); if (sc->ifp != NULL) { ether_ifdetach(sc->ifp); } ifmedia_removeall(&sc->ifmedia); bxe_release_mutexes(sc); bxe_deallocate_bars(sc); pci_disable_busmaster(dev); return (ENXIO); } /* need to reset chip if UNDI was active */ if (IS_PF(sc) && !BXE_NOMCP(sc)) { /* init fw_seq */ sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); BLOGD(sc, DBG_LOAD, "prev unload fw_seq 0x%04x\n", sc->fw_seq); bxe_prev_unload(sc); } #if 1 /* XXX */ bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); #else if (SHMEM2_HAS(sc, dcbx_lldp_params_offset) && SHMEM2_HAS(sc, dcbx_lldp_dcbx_stat_offset) && SHMEM2_RD(sc, dcbx_lldp_params_offset) && SHMEM2_RD(sc, dcbx_lldp_dcbx_stat_offset)) { bxe_dcbx_set_state(sc, TRUE, BXE_DCBX_ENABLED_ON_NEG_ON); bxe_dcbx_init_params(sc); } else { bxe_dcbx_set_state(sc, FALSE, BXE_DCBX_ENABLED_OFF); } #endif /* calculate qm_cid_count */ sc->qm_cid_count = bxe_set_qm_cid_count(sc); BLOGD(sc, DBG_LOAD, "qm_cid_count=%d\n", sc->qm_cid_count); sc->max_cos = 1; bxe_init_multi_cos(sc); bxe_add_sysctls(sc); return (0); } /* * Device detach function. * * Stops the controller, resets the controller, and releases resources. * * Returns: * 0 = Success, >0 = Failure */ static int bxe_detach(device_t dev) { struct bxe_softc *sc; if_t ifp; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting detach...\n"); ifp = sc->ifp; if (ifp != NULL && if_vlantrunkinuse(ifp)) { BLOGE(sc, "Cannot detach while VLANs are in use.\n"); return(EBUSY); } bxe_del_cdev(sc); /* stop the periodic callout */ bxe_periodic_stop(sc); /* stop the chip taskqueue */ atomic_store_rel_long(&sc->chip_tq_flags, CHIP_TQ_NONE); if (sc->chip_tq) { taskqueue_drain(sc->chip_tq, &sc->chip_tq_task); taskqueue_free(sc->chip_tq); sc->chip_tq = NULL; } /* stop and reset the controller if it was open */ if (sc->state != BXE_STATE_CLOSED) { BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_CLOSE, TRUE); sc->state = BXE_STATE_DISABLED; BXE_CORE_UNLOCK(sc); } /* release the network interface */ if (ifp != NULL) { ether_ifdetach(ifp); } ifmedia_removeall(&sc->ifmedia); /* XXX do the following based on driver state... */ /* free the host hardware/software hsi structures */ bxe_free_hsi_mem(sc); /* free ilt */ bxe_free_ilt_mem(sc); bxe_free_buf_rings(sc); /* release the interrupts */ bxe_interrupt_free(sc); /* Release the mutexes*/ bxe_destroy_fp_mutexs(sc); bxe_release_mutexes(sc); /* Release the PCIe BAR mapped memory */ bxe_deallocate_bars(sc); /* Release the FreeBSD interface. */ if (sc->ifp != NULL) { if_free(sc->ifp); } pci_disable_busmaster(dev); return (0); } /* * Device shutdown function. * * Stops and resets the controller. * * Returns: * Nothing */ static int bxe_shutdown(device_t dev) { struct bxe_softc *sc; sc = device_get_softc(dev); BLOGD(sc, DBG_LOAD, "Starting shutdown...\n"); /* stop the periodic callout */ bxe_periodic_stop(sc); BXE_CORE_LOCK(sc); bxe_nic_unload(sc, UNLOAD_NORMAL, FALSE); BXE_CORE_UNLOCK(sc); return (0); } void bxe_igu_ack_sb(struct bxe_softc *sc, uint8_t igu_sb_id, uint8_t segment, uint16_t index, uint8_t op, uint8_t update) { uint32_t igu_addr = sc->igu_base_addr; igu_addr += (IGU_CMD_INT_ACK_BASE + igu_sb_id)*8; bxe_igu_ack_sb_gen(sc, igu_sb_id, segment, index, op, update, igu_addr); } static void bxe_igu_clear_sb_gen(struct bxe_softc *sc, uint8_t func, uint8_t idu_sb_id, uint8_t is_pf) { uint32_t data, ctl, cnt = 100; uint32_t igu_addr_data = IGU_REG_COMMAND_REG_32LSB_DATA; uint32_t igu_addr_ctl = IGU_REG_COMMAND_REG_CTRL; uint32_t igu_addr_ack = IGU_REG_CSTORM_TYPE_0_SB_CLEANUP + (idu_sb_id/32)*4; uint32_t sb_bit = 1 << (idu_sb_id%32); uint32_t func_encode = func | (is_pf ? 1 : 0) << IGU_FID_ENCODE_IS_PF_SHIFT; uint32_t addr_encode = IGU_CMD_E2_PROD_UPD_BASE + idu_sb_id; /* Not supported in BC mode */ if (CHIP_INT_MODE_IS_BC(sc)) { return; } data = ((IGU_USE_REGISTER_cstorm_type_0_sb_cleanup << IGU_REGULAR_CLEANUP_TYPE_SHIFT) | IGU_REGULAR_CLEANUP_SET | IGU_REGULAR_BCLEANUP); ctl = ((addr_encode << IGU_CTRL_REG_ADDRESS_SHIFT) | (func_encode << IGU_CTRL_REG_FID_SHIFT) | (IGU_CTRL_CMD_TYPE_WR << IGU_CTRL_REG_TYPE_SHIFT)); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", data, igu_addr_data); REG_WR(sc, igu_addr_data, data); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); BLOGD(sc, DBG_LOAD, "write 0x%08x to IGU(via GRC) addr 0x%x\n", ctl, igu_addr_ctl); REG_WR(sc, igu_addr_ctl, ctl); bus_space_barrier(sc->bar[BAR0].tag, sc->bar[BAR0].handle, 0, 0, BUS_SPACE_BARRIER_WRITE); mb(); /* wait for clean up to finish */ while (!(REG_RD(sc, igu_addr_ack) & sb_bit) && --cnt) { DELAY(20000); } if (!(REG_RD(sc, igu_addr_ack) & sb_bit)) { BLOGD(sc, DBG_LOAD, "Unable to finish IGU cleanup: " "idu_sb_id %d offset %d bit %d (cnt %d)\n", idu_sb_id, idu_sb_id/32, idu_sb_id%32, cnt); } } static void bxe_igu_clear_sb(struct bxe_softc *sc, uint8_t idu_sb_id) { bxe_igu_clear_sb_gen(sc, SC_FUNC(sc), idu_sb_id, TRUE /*PF*/); } /*******************/ /* ECORE CALLBACKS */ /*******************/ static void bxe_reset_common(struct bxe_softc *sc) { uint32_t val = 0x1400; /* reset_common */ REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR), 0xd3ffff7f); if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR), val); } static void bxe_common_init_phy(struct bxe_softc *sc) { uint32_t shmem_base[2]; uint32_t shmem2_base[2]; /* Avoid common init in case MFW supports LFA */ if (SHMEM2_RD(sc, size) > (uint32_t)offsetof(struct shmem2_region, lfa_host_addr[SC_PORT(sc)])) { return; } shmem_base[0] = sc->devinfo.shmem_base; shmem2_base[0] = sc->devinfo.shmem2_base; if (!CHIP_IS_E1x(sc)) { shmem_base[1] = SHMEM2_RD(sc, other_shmem_base_addr); shmem2_base[1] = SHMEM2_RD(sc, other_shmem2_base_addr); } bxe_acquire_phy_lock(sc); elink_common_init_phy(sc, shmem_base, shmem2_base, sc->devinfo.chip_id, 0); bxe_release_phy_lock(sc); } static void bxe_pf_disable(struct bxe_softc *sc) { uint32_t val = REG_RD(sc, IGU_REG_PF_CONFIGURATION); val &= ~IGU_PF_CONF_FUNC_EN; REG_WR(sc, IGU_REG_PF_CONFIGURATION, val); REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 0); } static void bxe_init_pxp(struct bxe_softc *sc) { uint16_t devctl; int r_order, w_order; devctl = bxe_pcie_capability_read(sc, PCIR_EXPRESS_DEVICE_CTL, 2); BLOGD(sc, DBG_LOAD, "read 0x%08x from devctl\n", devctl); w_order = ((devctl & PCIM_EXP_CTL_MAX_PAYLOAD) >> 5); if (sc->mrrs == -1) { r_order = ((devctl & PCIM_EXP_CTL_MAX_READ_REQUEST) >> 12); } else { BLOGD(sc, DBG_LOAD, "forcing read order to %d\n", sc->mrrs); r_order = sc->mrrs; } ecore_init_pxp_arb(sc, r_order, w_order); } static uint32_t bxe_get_pretend_reg(struct bxe_softc *sc) { uint32_t base = PXP2_REG_PGL_PRETEND_FUNC_F0; uint32_t stride = (PXP2_REG_PGL_PRETEND_FUNC_F1 - base); return (base + (SC_ABS_FUNC(sc)) * stride); } /* * Called only on E1H or E2. * When pretending to be PF, the pretend value is the function number 0..7. * When pretending to be VF, the pretend val is the PF-num:VF-valid:ABS-VFID * combination. */ static int bxe_pretend_func(struct bxe_softc *sc, uint16_t pretend_func_val) { uint32_t pretend_reg; if (CHIP_IS_E1H(sc) && (pretend_func_val > E1H_FUNC_MAX)) { return (-1); } /* get my own pretend register */ pretend_reg = bxe_get_pretend_reg(sc); REG_WR(sc, pretend_reg, pretend_func_val); REG_RD(sc, pretend_reg); return (0); } static void bxe_iov_init_dmae(struct bxe_softc *sc) { return; } static void bxe_iov_init_dq(struct bxe_softc *sc) { return; } /* send a NIG loopback debug packet */ static void bxe_lb_pckt(struct bxe_softc *sc) { uint32_t wb_write[3]; /* Ethernet source and destination addresses */ wb_write[0] = 0x55555555; wb_write[1] = 0x55555555; wb_write[2] = 0x20; /* SOP */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); /* NON-IP protocol */ wb_write[0] = 0x09000000; wb_write[1] = 0x55555555; wb_write[2] = 0x10; /* EOP, eop_bvalid = 0 */ REG_WR_DMAE(sc, NIG_REG_DEBUG_PACKET_LB, wb_write, 3); } /* * Some of the internal memories are not directly readable from the driver. * To test them we send debug packets. */ static int bxe_int_mem_test(struct bxe_softc *sc) { int factor; int count, i; uint32_t val = 0; if (CHIP_REV_IS_FPGA(sc)) { factor = 120; } else if (CHIP_REV_IS_EMUL(sc)) { factor = 200; } else { factor = 1; } /* disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send Ethernet packet */ bxe_lb_pckt(sc); /* TODO do i reset NIG statistic? */ /* Wait until NIG register shows 1 packet of size 0x10 */ count = 1000 * factor; while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0x10) { break; } DELAY(10000); count--; } if (val != 0x10) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-1); } /* wait until PRS register shows 1 packet */ count = (1000 * factor); while (count) { val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val == 1) { break; } DELAY(10000); count--; } if (val != 0x1) { BLOGE(sc, "PRS timeout val=0x%x\n", val); return (-2); } /* Reset and init BRB, PRS */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); /* Disable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x0); REG_WR(sc, TCM_REG_PRS_IFEN, 0x0); REG_WR(sc, CFC_REG_DEBUG0, 0x1); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x0); /* Write 0 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x0); /* send 10 Ethernet packets */ for (i = 0; i < 10; i++) { bxe_lb_pckt(sc); } /* Wait until NIG register shows 10+1 packets of size 11*0x10 = 0xb0 */ count = (1000 * factor); while (count) { bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); if (val == 0xb0) { break; } DELAY(10000); count--; } if (val != 0xb0) { BLOGE(sc, "NIG timeout val=0x%x\n", val); return (-3); } /* Wait until PRS register shows 2 packets */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 2) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* Write 1 to parser credits for CFC search request */ REG_WR(sc, PRS_REG_CFC_SEARCH_INITIAL_CREDIT, 0x1); /* Wait until PRS register shows 3 packets */ DELAY(10000 * factor); /* Wait until NIG register shows 1 packet of size 0x10 */ val = REG_RD(sc, PRS_REG_NUM_OF_PACKETS); if (val != 3) { BLOGE(sc, "PRS timeout val=0x%x\n", val); } /* clear NIG EOP FIFO */ for (i = 0; i < 11; i++) { REG_RD(sc, NIG_REG_INGRESS_EOP_LB_FIFO); } val = REG_RD(sc, NIG_REG_INGRESS_EOP_LB_EMPTY); if (val != 1) { BLOGE(sc, "clear of NIG failed val=0x%x\n", val); return (-4); } /* Reset and init BRB, PRS, NIG */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x03); DELAY(50000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x03); DELAY(50000); ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); if (!CNIC_SUPPORT(sc)) { /* set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); } /* Enable inputs of parser neighbor blocks */ REG_WR(sc, TSDM_REG_ENABLE_IN1, 0x7fffffff); REG_WR(sc, TCM_REG_PRS_IFEN, 0x1); REG_WR(sc, CFC_REG_DEBUG0, 0x0); REG_WR(sc, NIG_REG_PRS_REQ_IN_EN, 0x1); return (0); } static void bxe_setup_fan_failure_detection(struct bxe_softc *sc) { int is_required; uint32_t val; int port; is_required = 0; val = (SHMEM_RD(sc, dev_info.shared_hw_config.config2) & SHARED_HW_CFG_FAN_FAILURE_MASK); if (val == SHARED_HW_CFG_FAN_FAILURE_ENABLED) { is_required = 1; } /* * The fan failure mechanism is usually related to the PHY type since * the power consumption of the board is affected by the PHY. Currently, * fan is required for most designs with SFX7101, BCM8727 and BCM8481. */ else if (val == SHARED_HW_CFG_FAN_FAILURE_PHY_TYPE) { for (port = PORT_0; port < PORT_MAX; port++) { is_required |= elink_fan_failure_det_req(sc, sc->devinfo.shmem_base, sc->devinfo.shmem2_base, port); } } BLOGD(sc, DBG_LOAD, "fan detection setting: %d\n", is_required); if (is_required == 0) { return; } /* Fan failure is indicated by SPIO 5 */ bxe_set_spio(sc, MISC_SPIO_SPIO5, MISC_SPIO_INPUT_HI_Z); /* set to active low mode */ val = REG_RD(sc, MISC_REG_SPIO_INT); val |= (MISC_SPIO_SPIO5 << MISC_SPIO_INT_OLD_SET_POS); REG_WR(sc, MISC_REG_SPIO_INT, val); /* enable interrupt to signal the IGU */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); val |= MISC_SPIO_SPIO5; REG_WR(sc, MISC_REG_SPIO_EVENT_EN, val); } static void bxe_enable_blocks_attention(struct bxe_softc *sc) { uint32_t val; REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0x40); } else { REG_WR(sc, PXP_REG_PXP_INT_MASK_1, 0); } REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* * mask read length error interrupts in brb for parser * (parsing unit and 'checksum and crc' unit) * these errors are legal (PU reads fixed length and CAC can cause * read length error on truncated packets) */ REG_WR(sc, BRB1_REG_BRB1_INT_MASK, 0xFC00); REG_WR(sc, QM_REG_QM_INT_MASK, 0); REG_WR(sc, TM_REG_TM_INT_MASK, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_0, 0); REG_WR(sc, XSDM_REG_XSDM_INT_MASK_1, 0); REG_WR(sc, XCM_REG_XCM_INT_MASK, 0); /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_0, 0); */ /* REG_WR(sc, XSEM_REG_XSEM_INT_MASK_1, 0); */ REG_WR(sc, USDM_REG_USDM_INT_MASK_0, 0); REG_WR(sc, USDM_REG_USDM_INT_MASK_1, 0); REG_WR(sc, UCM_REG_UCM_INT_MASK, 0); /* REG_WR(sc, USEM_REG_USEM_INT_MASK_0, 0); */ /* REG_WR(sc, USEM_REG_USEM_INT_MASK_1, 0); */ REG_WR(sc, GRCBASE_UPB + PB_REG_PB_INT_MASK, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_0, 0); REG_WR(sc, CSDM_REG_CSDM_INT_MASK_1, 0); REG_WR(sc, CCM_REG_CCM_INT_MASK, 0); /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_0, 0); */ /* REG_WR(sc, CSEM_REG_CSEM_INT_MASK_1, 0); */ val = (PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_AFT | PXP2_PXP2_INT_MASK_0_REG_PGL_CPL_OF | PXP2_PXP2_INT_MASK_0_REG_PGL_PCIE_ATTN); if (!CHIP_IS_E1x(sc)) { val |= (PXP2_PXP2_INT_MASK_0_REG_PGL_READ_BLOCKED | PXP2_PXP2_INT_MASK_0_REG_PGL_WRITE_BLOCKED); } REG_WR(sc, PXP2_REG_PXP2_INT_MASK_0, val); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_0, 0); REG_WR(sc, TSDM_REG_TSDM_INT_MASK_1, 0); REG_WR(sc, TCM_REG_TCM_INT_MASK, 0); /* REG_WR(sc, TSEM_REG_TSEM_INT_MASK_0, 0); */ if (!CHIP_IS_E1x(sc)) { /* enable VFC attentions: bits 11 and 12, bits 31:13 reserved */ REG_WR(sc, TSEM_REG_TSEM_INT_MASK_1, 0x07ff); } REG_WR(sc, CDU_REG_CDU_INT_MASK, 0); REG_WR(sc, DMAE_REG_DMAE_INT_MASK, 0); /* REG_WR(sc, MISC_REG_MISC_INT_MASK, 0); */ REG_WR(sc, PBF_REG_PBF_INT_MASK, 0x18); /* bit 3,4 masked */ } /** * bxe_init_hw_common - initialize the HW at the COMMON phase. * * @sc: driver handle */ static int bxe_init_hw_common(struct bxe_softc *sc) { uint8_t abs_func_id; uint32_t val; BLOGD(sc, DBG_LOAD, "starting common init for func %d\n", SC_ABS_FUNC(sc)); /* * take the RESET lock to protect undi_unload flow from accessing * registers while we are resetting the chip */ bxe_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); bxe_reset_common(sc); REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET), 0xffffffff); val = 0xfffc; if (CHIP_IS_E3(sc)) { val |= MISC_REGISTERS_RESET_REG_2_MSTAT0; val |= MISC_REGISTERS_RESET_REG_2_MSTAT1; } REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET), val); bxe_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET); ecore_init_block(sc, BLOCK_MISC, PHASE_COMMON); BLOGD(sc, DBG_LOAD, "after misc block init\n"); if (!CHIP_IS_E1x(sc)) { /* * 4-port mode or 2-port mode we need to turn off master-enable for * everyone. After that we turn it back on for self. So, we disregard * multi-function, and always disable all functions on the given path, * this means 0,2,4,6 for path 0 and 1,3,5,7 for path 1 */ for (abs_func_id = SC_PATH(sc); abs_func_id < (E2_FUNC_MAX * 2); abs_func_id += 2) { if (abs_func_id == SC_ABS_FUNC(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); continue; } bxe_pretend_func(sc, abs_func_id); /* clear pf enable */ bxe_pf_disable(sc); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); } } BLOGD(sc, DBG_LOAD, "after pf disable\n"); ecore_init_block(sc, BLOCK_PXP, PHASE_COMMON); if (CHIP_IS_E1(sc)) { /* * enable HW interrupt from PXP on USDM overflow * bit 16 on INT_MASK_0 */ REG_WR(sc, PXP_REG_PXP_INT_MASK_0, 0); } ecore_init_block(sc, BLOCK_PXP2, PHASE_COMMON); bxe_init_pxp(sc); #ifdef __BIG_ENDIAN REG_WR(sc, PXP2_REG_RQ_QM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_TM_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_SRC_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_CDU_ENDIAN_M, 1); REG_WR(sc, PXP2_REG_RQ_DBG_ENDIAN_M, 1); /* make sure this value is 0 */ REG_WR(sc, PXP2_REG_RQ_HC_ENDIAN_M, 0); //REG_WR(sc, PXP2_REG_RD_PBF_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_QM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_TM_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_SRC_SWAP_MODE, 1); REG_WR(sc, PXP2_REG_RD_CDURD_SWAP_MODE, 1); #endif ecore_ilt_init_page_size(sc, INITOP_SET); if (CHIP_REV_IS_FPGA(sc) && CHIP_IS_E1H(sc)) { REG_WR(sc, PXP2_REG_PGL_TAGS_LIMIT, 0x1); } /* let the HW do it's magic... */ DELAY(100000); /* finish PXP init */ val = REG_RD(sc, PXP2_REG_RQ_CFG_DONE); if (val != 1) { BLOGE(sc, "PXP2 CFG failed PXP2_REG_RQ_CFG_DONE val = 0x%x\n", val); return (-1); } val = REG_RD(sc, PXP2_REG_RD_INIT_DONE); if (val != 1) { BLOGE(sc, "PXP2 RD_INIT failed val = 0x%x\n", val); return (-1); } BLOGD(sc, DBG_LOAD, "after pxp init\n"); /* * Timer bug workaround for E2 only. We need to set the entire ILT to have * entries with value "0" and valid bit on. This needs to be done by the * first PF that is loaded in a path (i.e. common phase) */ if (!CHIP_IS_E1x(sc)) { /* * In E2 there is a bug in the timers block that can cause function 6 / 7 * (i.e. vnic3) to start even if it is marked as "scan-off". * This occurs when a different function (func2,3) is being marked * as "scan-off". Real-life scenario for example: if a driver is being * load-unloaded while func6,7 are down. This will cause the timer to access * the ilt, translate to a logical address and send a request to read/write. * Since the ilt for the function that is down is not valid, this will cause * a translation error which is unrecoverable. * The Workaround is intended to make sure that when this happens nothing * fatal will occur. The workaround: * 1. First PF driver which loads on a path will: * a. After taking the chip out of reset, by using pretend, * it will write "0" to the following registers of * the other vnics. * REG_WR(pdev, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); * REG_WR(pdev, CFC_REG_WEAK_ENABLE_PF,0); * REG_WR(pdev, CFC_REG_STRONG_ENABLE_PF,0); * And for itself it will write '1' to * PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER to enable * dmae-operations (writing to pram for example.) * note: can be done for only function 6,7 but cleaner this * way. * b. Write zero+valid to the entire ILT. * c. Init the first_timers_ilt_entry, last_timers_ilt_entry of * VNIC3 (of that port). The range allocated will be the * entire ILT. This is needed to prevent ILT range error. * 2. Any PF driver load flow: * a. ILT update with the physical addresses of the allocated * logical pages. * b. Wait 20msec. - note that this timeout is needed to make * sure there are no requests in one of the PXP internal * queues with "old" ILT addresses. * c. PF enable in the PGLC. * d. Clear the was_error of the PF in the PGLC. (could have * occurred while driver was down) * e. PF enable in the CFC (WEAK + STRONG) * f. Timers scan enable * 3. PF driver unload flow: * a. Clear the Timers scan_en. * b. Polling for scan_on=0 for that PF. * c. Clear the PF enable bit in the PXP. * d. Clear the PF enable in the CFC (WEAK + STRONG) * e. Write zero+valid to all ILT entries (The valid bit must * stay set) * f. If this is VNIC 3 of a port then also init * first_timers_ilt_entry to zero and last_timers_ilt_entry * to the last enrty in the ILT. * * Notes: * Currently the PF error in the PGLC is non recoverable. * In the future the there will be a recovery routine for this error. * Currently attention is masked. * Having an MCP lock on the load/unload process does not guarantee that * there is no Timer disable during Func6/7 enable. This is because the * Timers scan is currently being cleared by the MCP on FLR. * Step 2.d can be done only for PF6/7 and the driver can also check if * there is error before clearing it. But the flow above is simpler and * more general. * All ILT entries are written by zero+valid and not just PF6/7 * ILT entries since in the future the ILT entries allocation for * PF-s might be dynamic. */ struct ilt_client_info ilt_cli; struct ecore_ilt ilt; memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); memset(&ilt, 0, sizeof(struct ecore_ilt)); /* initialize dummy TM client */ ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; /* * Step 1: set zeroes to all ilt page entries with valid bit on * Step 2: set the timers first/last ilt entry to point * to the entire range to prevent ILT range error for 3rd/4th * vnic (this code assumes existence of the vnic) * * both steps performed by call to ecore_ilt_client_init_op() * with dummy TM client * * we must use pretend since PXP2_REG_RQ_##blk##_FIRST_ILT * and his brother are split registers */ bxe_pretend_func(sc, (SC_PATH(sc) + 6)); ecore_ilt_client_init_op_ilt(sc, &ilt, &ilt_cli, INITOP_CLEAR); bxe_pretend_func(sc, SC_ABS_FUNC(sc)); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_RD, BXE_PXP_DRAM_ALIGN); REG_WR(sc, PXP2_REG_RQ_DRAM_ALIGN_SEL, 1); } REG_WR(sc, PXP2_REG_RQ_DISABLE_INPUTS, 0); REG_WR(sc, PXP2_REG_RD_DISABLE_INPUTS, 0); if (!CHIP_IS_E1x(sc)) { int factor = CHIP_REV_IS_EMUL(sc) ? 1000 : (CHIP_REV_IS_FPGA(sc) ? 400 : 0); ecore_init_block(sc, BLOCK_PGLUE_B, PHASE_COMMON); ecore_init_block(sc, BLOCK_ATC, PHASE_COMMON); /* let the HW do it's magic... */ do { DELAY(200000); val = REG_RD(sc, ATC_REG_ATC_INIT_DONE); } while (factor-- && (val != 1)); if (val != 1) { BLOGE(sc, "ATC_INIT failed val = 0x%x\n", val); return (-1); } } BLOGD(sc, DBG_LOAD, "after pglue and atc init\n"); ecore_init_block(sc, BLOCK_DMAE, PHASE_COMMON); bxe_iov_init_dmae(sc); /* clean the DMAE memory */ sc->dmae_ready = 1; ecore_init_fill(sc, TSEM_REG_PRAM, 0, 8, 1); ecore_init_block(sc, BLOCK_TCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_UCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CCM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XCM, PHASE_COMMON); bxe_read_dmae(sc, XSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, CSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, TSEM_REG_PASSIVE_BUFFER, 3); bxe_read_dmae(sc, USEM_REG_PASSIVE_BUFFER, 3); ecore_init_block(sc, BLOCK_QM, PHASE_COMMON); /* QM queues pointers table */ ecore_qm_init_ptr_table(sc, sc->qm_cid_count, INITOP_SET); /* soft reset pulse */ REG_WR(sc, QM_REG_SOFT_RESET, 1); REG_WR(sc, QM_REG_SOFT_RESET, 0); if (CNIC_SUPPORT(sc)) ecore_init_block(sc, BLOCK_TM, PHASE_COMMON); ecore_init_block(sc, BLOCK_DORQ, PHASE_COMMON); REG_WR(sc, DORQ_REG_DPM_CID_OFST, BXE_DB_SHIFT); if (!CHIP_REV_IS_SLOW(sc)) { /* enable hw interrupt from doorbell Q */ REG_WR(sc, DORQ_REG_DORQ_INT_MASK, 0); } ecore_init_block(sc, BLOCK_BRB1, PHASE_COMMON); ecore_init_block(sc, BLOCK_PRS, PHASE_COMMON); REG_WR(sc, PRS_REG_A_PRSU_20, 0xf); if (!CHIP_IS_E1(sc)) { REG_WR(sc, PRS_REG_E1HOV_MODE, sc->devinfo.mf_info.path_has_ovlan); } if (!CHIP_IS_E1x(sc) && !CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * received in AFEX mode */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PRS_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PRS_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PRS_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PRS_REG_TAG_LEN_0, 0x4); } else { /* * Bit-map indicating which L2 hdrs may appear * after the basic Ethernet header */ REG_WR(sc, PRS_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } ecore_init_block(sc, BLOCK_TSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USDM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSDM, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { /* reset VFC memories */ REG_WR(sc, TSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); REG_WR(sc, XSEM_REG_FAST_MEMORY + VFC_REG_MEMORIES_RST, VFC_MEMORIES_RST_REG_CAM_RST | VFC_MEMORIES_RST_REG_RAM_RST); DELAY(20000); } ecore_init_block(sc, BLOCK_TSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_USEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_CSEM, PHASE_COMMON); ecore_init_block(sc, BLOCK_XSEM, PHASE_COMMON); /* sync semi rtc */ REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0x80000000); REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0x80000000); ecore_init_block(sc, BLOCK_UPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_XPB, PHASE_COMMON); ecore_init_block(sc, BLOCK_PBF, PHASE_COMMON); if (!CHIP_IS_E1x(sc)) { if (IS_MF_AFEX(sc)) { /* * configure that AFEX and VLAN headers must be * sent in AFEX mode */ REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, 0xE); REG_WR(sc, PBF_REG_MUST_HAVE_HDRS, 0xA); REG_WR(sc, PBF_REG_HDRS_AFTER_TAG_0, 0x6); REG_WR(sc, PBF_REG_TAG_ETHERTYPE_0, 0x8926); REG_WR(sc, PBF_REG_TAG_LEN_0, 0x4); } else { REG_WR(sc, PBF_REG_HDRS_AFTER_BASIC, sc->devinfo.mf_info.path_has_ovlan ? 7 : 6); } } REG_WR(sc, SRC_REG_SOFT_RST, 1); ecore_init_block(sc, BLOCK_SRC, PHASE_COMMON); if (CNIC_SUPPORT(sc)) { REG_WR(sc, SRC_REG_KEYSEARCH_0, 0x63285672); REG_WR(sc, SRC_REG_KEYSEARCH_1, 0x24b8f2cc); REG_WR(sc, SRC_REG_KEYSEARCH_2, 0x223aef9b); REG_WR(sc, SRC_REG_KEYSEARCH_3, 0x26001e3a); REG_WR(sc, SRC_REG_KEYSEARCH_4, 0x7ae91116); REG_WR(sc, SRC_REG_KEYSEARCH_5, 0x5ce5230b); REG_WR(sc, SRC_REG_KEYSEARCH_6, 0x298d8adf); REG_WR(sc, SRC_REG_KEYSEARCH_7, 0x6eb0ff09); REG_WR(sc, SRC_REG_KEYSEARCH_8, 0x1830f82f); REG_WR(sc, SRC_REG_KEYSEARCH_9, 0x01e46be7); } REG_WR(sc, SRC_REG_SOFT_RST, 0); if (sizeof(union cdu_context) != 1024) { /* we currently assume that a context is 1024 bytes */ BLOGE(sc, "please adjust the size of cdu_context(%ld)\n", (long)sizeof(union cdu_context)); } ecore_init_block(sc, BLOCK_CDU, PHASE_COMMON); val = (4 << 24) + (0 << 12) + 1024; REG_WR(sc, CDU_REG_CDU_GLOBAL_PARAMS, val); ecore_init_block(sc, BLOCK_CFC, PHASE_COMMON); REG_WR(sc, CFC_REG_INIT_REG, 0x7FF); /* enable context validation interrupt from CFC */ REG_WR(sc, CFC_REG_CFC_INT_MASK, 0); /* set the thresholds to prevent CFC/CDU race */ REG_WR(sc, CFC_REG_DEBUG0, 0x20020000); ecore_init_block(sc, BLOCK_HC, PHASE_COMMON); if (!CHIP_IS_E1x(sc) && BXE_NOMCP(sc)) { REG_WR(sc, IGU_REG_RESET_MEMORIES, 0x36); } ecore_init_block(sc, BLOCK_IGU, PHASE_COMMON); ecore_init_block(sc, BLOCK_MISC_AEU, PHASE_COMMON); /* Reset PCIE errors for debug */ REG_WR(sc, 0x2814, 0xffffffff); REG_WR(sc, 0x3820, 0xffffffff); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_CONTROL_5, (PXPCS_TL_CONTROL_5_ERR_UNSPPORT1 | PXPCS_TL_CONTROL_5_ERR_UNSPPORT)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC345_STAT, (PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT4 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT3 | PXPCS_TL_FUNC345_STAT_ERR_UNSPPORT2)); REG_WR(sc, PCICFG_OFFSET + PXPCS_TL_FUNC678_STAT, (PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT7 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT6 | PXPCS_TL_FUNC678_STAT_ERR_UNSPPORT5)); } ecore_init_block(sc, BLOCK_NIG, PHASE_COMMON); if (!CHIP_IS_E1(sc)) { /* in E3 this done in per-port section */ if (!CHIP_IS_E3(sc)) REG_WR(sc, NIG_REG_LLH_MF_MODE, IS_MF(sc)); } if (CHIP_IS_E1H(sc)) { /* not applicable for E2 (and above ...) */ REG_WR(sc, NIG_REG_LLH_E1HOV_MODE, IS_MF_SD(sc)); } if (CHIP_REV_IS_SLOW(sc)) { DELAY(200000); } /* finish CFC init */ val = reg_poll(sc, CFC_REG_LL_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC LL_INIT failed val=0x%x\n", val); return (-1); } val = reg_poll(sc, CFC_REG_AC_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC AC_INIT failed val=0x%x\n", val); return (-1); } val = reg_poll(sc, CFC_REG_CAM_INIT_DONE, 1, 100, 10); if (val != 1) { BLOGE(sc, "CFC CAM_INIT failed val=0x%x\n", val); return (-1); } REG_WR(sc, CFC_REG_DEBUG0, 0); if (CHIP_IS_E1(sc)) { /* read NIG statistic to see if this is our first up since powerup */ bxe_read_dmae(sc, NIG_REG_STAT2_BRB_OCTET, 2); val = *BXE_SP(sc, wb_data[0]); /* do internal memory self test */ if ((val == 0) && bxe_int_mem_test(sc)) { BLOGE(sc, "internal mem self test failed val=0x%x\n", val); return (-1); } } bxe_setup_fan_failure_detection(sc); /* clear PXP2 attentions */ REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0); bxe_enable_blocks_attention(sc); if (!CHIP_REV_IS_SLOW(sc)) { ecore_enable_blocks_parity(sc); } if (!BXE_NOMCP(sc)) { if (CHIP_IS_E1x(sc)) { bxe_common_init_phy(sc); } } return (0); } /** * bxe_init_hw_common_chip - init HW at the COMMON_CHIP phase. * * @sc: driver handle */ static int bxe_init_hw_common_chip(struct bxe_softc *sc) { int rc = bxe_init_hw_common(sc); if (rc) { BLOGE(sc, "bxe_init_hw_common failed rc=%d\n", rc); return (rc); } /* In E2 2-PORT mode, same ext phy is used for the two paths */ if (!BXE_NOMCP(sc)) { bxe_common_init_phy(sc); } return (0); } static int bxe_init_hw_port(struct bxe_softc *sc) { int port = SC_PORT(sc); int init_phase = port ? PHASE_PORT1 : PHASE_PORT0; uint32_t low, high; uint32_t val; BLOGD(sc, DBG_LOAD, "starting port init for port %d\n", port); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); /* * Timers bug workaround: disables the pf_master bit in pglue at * common phase, we need to enable it here before any dmae access are * attempted. Therefore we manually added the enable-master to the * port phase (it also happens in the function phase) */ if (!CHIP_IS_E1x(sc)) { REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); } ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); /* QM cid (connection) count */ ecore_qm_init_cid_count(sc, sc->qm_cid_count, INITOP_SET); if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_TM, init_phase); REG_WR(sc, TM_REG_LIN0_SCAN_TIME + port*4, 20); REG_WR(sc, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31); } ecore_init_block(sc, BLOCK_DORQ, init_phase); ecore_init_block(sc, BLOCK_BRB1, init_phase); if (CHIP_IS_E1(sc) || CHIP_IS_E1H(sc)) { if (IS_MF(sc)) { low = (BXE_ONE_PORT(sc) ? 160 : 246); } else if (sc->mtu > 4096) { if (BXE_ONE_PORT(sc)) { low = 160; } else { val = sc->mtu; /* (24*1024 + val*4)/256 */ low = (96 + (val / 64) + ((val % 64) ? 1 : 0)); } } else { low = (BXE_ONE_PORT(sc) ? 80 : 160); } high = (low + 56); /* 14*1024/256 */ REG_WR(sc, BRB1_REG_PAUSE_LOW_THRESHOLD_0 + port*4, low); REG_WR(sc, BRB1_REG_PAUSE_HIGH_THRESHOLD_0 + port*4, high); } if (CHIP_IS_MODE_4_PORT(sc)) { REG_WR(sc, SC_PORT(sc) ? BRB1_REG_MAC_GUARANTIED_1 : BRB1_REG_MAC_GUARANTIED_0, 40); } ecore_init_block(sc, BLOCK_PRS, init_phase); if (CHIP_IS_E3B0(sc)) { if (IS_MF_AFEX(sc)) { /* configure headers for AFEX mode */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, 0xE); REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_TAG_0_PORT_1 : PRS_REG_HDRS_AFTER_TAG_0_PORT_0, 0x6); REG_WR(sc, SC_PORT(sc) ? PRS_REG_MUST_HAVE_HDRS_PORT_1 : PRS_REG_MUST_HAVE_HDRS_PORT_0, 0xA); } else { /* Ovlan exists only if we are in multi-function + * switch-dependent mode, in switch-independent there * is no ovlan headers */ REG_WR(sc, SC_PORT(sc) ? PRS_REG_HDRS_AFTER_BASIC_PORT_1 : PRS_REG_HDRS_AFTER_BASIC_PORT_0, (sc->devinfo.mf_info.path_has_ovlan ? 7 : 6)); } } ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (CHIP_IS_E1x(sc)) { /* configure PBF to work without PAUSE mtu 9000 */ REG_WR(sc, PBF_REG_P0_PAUSE_ENABLE + port*4, 0); /* update threshold */ REG_WR(sc, PBF_REG_P0_ARB_THRSH + port*4, (9040/16)); /* update init credit */ REG_WR(sc, PBF_REG_P0_INIT_CRD + port*4, (9040/16) + 553 - 22); /* probe changes */ REG_WR(sc, PBF_REG_INIT_P0 + port*4, 1); DELAY(50); REG_WR(sc, PBF_REG_INIT_P0 + port*4, 0); } if (CNIC_SUPPORT(sc)) { ecore_init_block(sc, BLOCK_SRC, init_phase); } ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (CHIP_IS_E1(sc)) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); ecore_init_block(sc, BLOCK_IGU, init_phase); ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* init aeu_mask_attn_func_0/1: * - SF mode: bits 3-7 are masked. only bits 0-2 are in use * - MF mode: bit 3 is masked. bits 0-2 are in use as in SF * bits 4-7 are used for "per vn group attention" */ val = IS_MF(sc) ? 0xF7 : 0x7; /* Enable DCBX attention for all but E1 */ val |= CHIP_IS_E1(sc) ? 0 : 0x10; REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, val); ecore_init_block(sc, BLOCK_NIG, init_phase); if (!CHIP_IS_E1x(sc)) { /* Bit-map indicating which L2 hdrs may appear after the * basic Ethernet header */ if (IS_MF_AFEX(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, 0xE); } else { REG_WR(sc, SC_PORT(sc) ? NIG_REG_P1_HDRS_AFTER_BASIC : NIG_REG_P0_HDRS_AFTER_BASIC, IS_MF_SD(sc) ? 7 : 6); } if (CHIP_IS_E3(sc)) { REG_WR(sc, SC_PORT(sc) ? NIG_REG_LLH1_MF_MODE : NIG_REG_LLH_MF_MODE, IS_MF(sc)); } } if (!CHIP_IS_E3(sc)) { REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port*4, 1); } if (!CHIP_IS_E1(sc)) { /* 0x2 disable mf_ov, 0x1 enable */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + port*4, (IS_MF_SD(sc) ? 0x1 : 0x2)); if (!CHIP_IS_E1x(sc)) { val = 0; switch (sc->devinfo.mf_info.mf_mode) { case MULTI_FUNCTION_SD: val = 1; break; case MULTI_FUNCTION_SI: case MULTI_FUNCTION_AFEX: val = 2; break; } REG_WR(sc, (SC_PORT(sc) ? NIG_REG_LLH1_CLS_TYPE : NIG_REG_LLH0_CLS_TYPE), val); } REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port*4, 0); REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port*4, 0); REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port*4, 1); } /* If SPIO5 is set to generate interrupts, enable it for this port */ val = REG_RD(sc, MISC_REG_SPIO_EVENT_EN); if (val & MISC_SPIO_SPIO5) { uint32_t reg_addr = (port ? MISC_REG_AEU_ENABLE1_FUNC_1_OUT_0 : MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0); val = REG_RD(sc, reg_addr); val |= AEU_INPUTS_ATTN_BITS_SPIO5; REG_WR(sc, reg_addr, val); } return (0); } static uint32_t bxe_flr_clnup_reg_poll(struct bxe_softc *sc, uint32_t reg, uint32_t expected, uint32_t poll_count) { uint32_t cur_cnt = poll_count; uint32_t val; while ((val = REG_RD(sc, reg)) != expected && cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); } return (val); } static int bxe_flr_clnup_poll_hw_counter(struct bxe_softc *sc, uint32_t reg, char *msg, uint32_t poll_cnt) { uint32_t val = bxe_flr_clnup_reg_poll(sc, reg, 0, poll_cnt); if (val != 0) { BLOGE(sc, "%s usage count=%d\n", msg, val); return (1); } return (0); } /* Common routines with VF FLR cleanup */ static uint32_t bxe_flr_clnup_poll_count(struct bxe_softc *sc) { /* adjust polling timeout */ if (CHIP_REV_IS_EMUL(sc)) { return (FLR_POLL_CNT * 2000); } if (CHIP_REV_IS_FPGA(sc)) { return (FLR_POLL_CNT * 120); } return (FLR_POLL_CNT); } static int bxe_poll_hw_usage_counters(struct bxe_softc *sc, uint32_t poll_cnt) { /* wait for CFC PF usage-counter to zero (includes all the VFs) */ if (bxe_flr_clnup_poll_hw_counter(sc, CFC_REG_NUM_LCIDS_INSIDE_PF, "CFC PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for DQ PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, DORQ_REG_PF_USAGE_CNT, "DQ PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for QM PF usage-counter to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, QM_REG_PF_USG_CNT_0 + 4*SC_FUNC(sc), "QM PF usage counter timed out", poll_cnt)) { return (1); } /* Wait for Timer PF usage-counters to zero (until DQ cleanup) */ if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_VNIC_UC + 4*SC_PORT(sc), "Timers VNIC usage counter timed out", poll_cnt)) { return (1); } if (bxe_flr_clnup_poll_hw_counter(sc, TM_REG_LIN0_NUM_SCANS + 4*SC_PORT(sc), "Timers NUM_SCANS usage counter timed out", poll_cnt)) { return (1); } /* Wait DMAE PF usage counter to zero */ if (bxe_flr_clnup_poll_hw_counter(sc, dmae_reg_go_c[INIT_DMAE_C(sc)], "DMAE dommand register timed out", poll_cnt)) { return (1); } return (0); } #define OP_GEN_PARAM(param) \ (((param) << SDM_OP_GEN_COMP_PARAM_SHIFT) & SDM_OP_GEN_COMP_PARAM) #define OP_GEN_TYPE(type) \ (((type) << SDM_OP_GEN_COMP_TYPE_SHIFT) & SDM_OP_GEN_COMP_TYPE) #define OP_GEN_AGG_VECT(index) \ (((index) << SDM_OP_GEN_AGG_VECT_IDX_SHIFT) & SDM_OP_GEN_AGG_VECT_IDX) static int bxe_send_final_clnup(struct bxe_softc *sc, uint8_t clnup_func, uint32_t poll_cnt) { uint32_t op_gen_command = 0; uint32_t comp_addr = (BAR_CSTRORM_INTMEM + CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func)); int ret = 0; if (REG_RD(sc, comp_addr)) { BLOGE(sc, "Cleanup complete was not 0 before sending\n"); return (1); } op_gen_command |= OP_GEN_PARAM(XSTORM_AGG_INT_FINAL_CLEANUP_INDEX); op_gen_command |= OP_GEN_TYPE(XSTORM_AGG_INT_FINAL_CLEANUP_COMP_TYPE); op_gen_command |= OP_GEN_AGG_VECT(clnup_func); op_gen_command |= 1 << SDM_OP_GEN_AGG_VECT_IDX_VALID_SHIFT; BLOGD(sc, DBG_LOAD, "sending FW Final cleanup\n"); REG_WR(sc, XSDM_REG_OPERATION_GEN, op_gen_command); if (bxe_flr_clnup_reg_poll(sc, comp_addr, 1, poll_cnt) != 1) { BLOGE(sc, "FW final cleanup did not succeed\n"); BLOGD(sc, DBG_LOAD, "At timeout completion address contained %x\n", (REG_RD(sc, comp_addr))); bxe_panic(sc, ("FLR cleanup failed\n")); return (1); } /* Zero completion for nxt FLR */ REG_WR(sc, comp_addr, 0); return (ret); } static void bxe_pbf_pN_buf_flushed(struct bxe_softc *sc, struct pbf_pN_buf_regs *regs, uint32_t poll_count) { uint32_t init_crd, crd, crd_start, crd_freed, crd_freed_start; uint32_t cur_cnt = poll_count; crd_freed = crd_freed_start = REG_RD(sc, regs->crd_freed); crd = crd_start = REG_RD(sc, regs->crd); init_crd = REG_RD(sc, regs->init_crd); BLOGD(sc, DBG_LOAD, "INIT CREDIT[%d] : %x\n", regs->pN, init_crd); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : s:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: s:%x\n", regs->pN, crd_freed); while ((crd != init_crd) && ((uint32_t)((int32_t)crd_freed - (int32_t)crd_freed_start) < (init_crd - crd_start))) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); crd = REG_RD(sc, regs->crd); crd_freed = REG_RD(sc, regs->crd_freed); } else { BLOGD(sc, DBG_LOAD, "PBF tx buffer[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "CREDIT[%d] : c:%x\n", regs->pN, crd); BLOGD(sc, DBG_LOAD, "CREDIT_FREED[%d]: c:%x\n", regs->pN, crd_freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF tx buffer[%d]\n", poll_count-cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_pbf_pN_cmd_flushed(struct bxe_softc *sc, struct pbf_pN_cmd_regs *regs, uint32_t poll_count) { uint32_t occup, to_free, freed, freed_start; uint32_t cur_cnt = poll_count; occup = to_free = REG_RD(sc, regs->lines_occup); freed = freed_start = REG_RD(sc, regs->lines_freed); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); while (occup && ((uint32_t)((int32_t)freed - (int32_t)freed_start) < to_free)) { if (cur_cnt--) { DELAY(FLR_WAIT_INTERVAL); occup = REG_RD(sc, regs->lines_occup); freed = REG_RD(sc, regs->lines_freed); } else { BLOGD(sc, DBG_LOAD, "PBF cmd queue[%d] timed out\n", regs->pN); BLOGD(sc, DBG_LOAD, "OCCUPANCY[%d] : s:%x\n", regs->pN, occup); BLOGD(sc, DBG_LOAD, "LINES_FREED[%d] : s:%x\n", regs->pN, freed); break; } } BLOGD(sc, DBG_LOAD, "Waited %d*%d usec for PBF cmd queue[%d]\n", poll_count - cur_cnt, FLR_WAIT_INTERVAL, regs->pN); } static void bxe_tx_hw_flushed(struct bxe_softc *sc, uint32_t poll_count) { struct pbf_pN_cmd_regs cmd_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q0 : PBF_REG_P0_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q0 : PBF_REG_P0_TQ_LINES_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_Q1 : PBF_REG_P1_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_Q1 : PBF_REG_P1_TQ_LINES_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_OCCUPANCY_LB_Q : PBF_REG_P4_TQ_OCCUPANCY, (CHIP_IS_E3B0(sc)) ? PBF_REG_TQ_LINES_FREED_CNT_LB_Q : PBF_REG_P4_TQ_LINES_FREED_CNT} }; struct pbf_pN_buf_regs buf_regs[] = { {0, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q0 : PBF_REG_P0_INIT_CRD , (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q0 : PBF_REG_P0_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q0 : PBF_REG_P0_INTERNAL_CRD_FREED_CNT}, {1, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_Q1 : PBF_REG_P1_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_Q1 : PBF_REG_P1_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_Q1 : PBF_REG_P1_INTERNAL_CRD_FREED_CNT}, {4, (CHIP_IS_E3B0(sc)) ? PBF_REG_INIT_CRD_LB_Q : PBF_REG_P4_INIT_CRD, (CHIP_IS_E3B0(sc)) ? PBF_REG_CREDIT_LB_Q : PBF_REG_P4_CREDIT, (CHIP_IS_E3B0(sc)) ? PBF_REG_INTERNAL_CRD_FREED_CNT_LB_Q : PBF_REG_P4_INTERNAL_CRD_FREED_CNT}, }; int i; /* Verify the command queues are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(cmd_regs); i++) { bxe_pbf_pN_cmd_flushed(sc, &cmd_regs[i], poll_count); } /* Verify the transmission buffers are flushed P0, P1, P4 */ for (i = 0; i < ARRAY_SIZE(buf_regs); i++) { bxe_pbf_pN_buf_flushed(sc, &buf_regs[i], poll_count); } } static void bxe_hw_enable_status(struct bxe_softc *sc) { uint32_t val; val = REG_RD(sc, CFC_REG_WEAK_ENABLE_PF); BLOGD(sc, DBG_LOAD, "CFC_REG_WEAK_ENABLE_PF is 0x%x\n", val); val = REG_RD(sc, PBF_REG_DISABLE_PF); BLOGD(sc, DBG_LOAD, "PBF_REG_DISABLE_PF is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSI_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSI_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_EN); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_EN is 0x%x\n", val); val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_FUNC_MASK); BLOGD(sc, DBG_LOAD, "IGU_REG_PCI_PF_MSIX_FUNC_MASK is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR is 0x%x\n", val); val = REG_RD(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER); BLOGD(sc, DBG_LOAD, "PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER is 0x%x\n", val); } static int bxe_pf_flr_clnup(struct bxe_softc *sc) { uint32_t poll_cnt = bxe_flr_clnup_poll_count(sc); BLOGD(sc, DBG_LOAD, "Cleanup after FLR PF[%d]\n", SC_ABS_FUNC(sc)); /* Re-enable PF target read access */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); /* Poll HW usage counters */ BLOGD(sc, DBG_LOAD, "Polling usage counters\n"); if (bxe_poll_hw_usage_counters(sc, poll_cnt)) { return (-1); } /* Zero the igu 'trailing edge' and 'leading edge' */ /* Send the FW cleanup command */ if (bxe_send_final_clnup(sc, (uint8_t)SC_FUNC(sc), poll_cnt)) { return (-1); } /* ATC cleanup */ /* Verify TX hw is flushed */ bxe_tx_hw_flushed(sc, poll_cnt); /* Wait 100ms (not adjusted according to platform) */ DELAY(100000); /* Verify no pending pci transactions */ if (bxe_is_pcie_pending(sc)) { BLOGE(sc, "PCIE Transactions still pending\n"); } /* Debug */ bxe_hw_enable_status(sc); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); return (0); } static int bxe_init_hw_func(struct bxe_softc *sc) { int port = SC_PORT(sc); int func = SC_FUNC(sc); int init_phase = PHASE_PF0 + func; struct ecore_ilt *ilt = sc->ilt; uint16_t cdu_ilt_start; uint32_t addr, val; uint32_t main_mem_base, main_mem_size, main_mem_prty_clr; int i, main_mem_width, rc; BLOGD(sc, DBG_LOAD, "starting func init for func %d\n", func); /* FLR cleanup */ if (!CHIP_IS_E1x(sc)) { rc = bxe_pf_flr_clnup(sc); if (rc) { BLOGE(sc, "FLR cleanup failed!\n"); // XXX bxe_fw_dump(sc); // XXX bxe_idle_chk(sc); return (rc); } } /* set MSI reconfigure capability */ if (sc->devinfo.int_block == INT_BLOCK_HC) { addr = (port ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0); val = REG_RD(sc, addr); val |= HC_CONFIG_0_REG_MSI_ATTN_EN_0; REG_WR(sc, addr, val); } ecore_init_block(sc, BLOCK_PXP, init_phase); ecore_init_block(sc, BLOCK_PXP2, init_phase); ilt = sc->ilt; cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; for (i = 0; i < L2_ILT_LINES(sc); i++) { ilt->lines[cdu_ilt_start + i].page = sc->context[i].vcxt; ilt->lines[cdu_ilt_start + i].page_mapping = sc->context[i].vcxt_dma.paddr; ilt->lines[cdu_ilt_start + i].size = sc->context[i].size; } ecore_ilt_init_op(sc, INITOP_SET); /* Set NIC mode */ REG_WR(sc, PRS_REG_NIC_MODE, 1); BLOGD(sc, DBG_LOAD, "NIC MODE configured\n"); if (!CHIP_IS_E1x(sc)) { uint32_t pf_conf = IGU_PF_CONF_FUNC_EN; /* Turn on a single ISR mode in IGU if driver is going to use * INT#x or MSI */ if (sc->interrupt_mode != INTR_MODE_MSIX) { pf_conf |= IGU_PF_CONF_SINGLE_ISR_EN; } /* * Timers workaround bug: function init part. * Need to wait 20msec after initializing ILT, * needed to make sure there are no requests in * one of the PXP internal queues with "old" ILT addresses */ DELAY(20000); /* * Master enable - Due to WB DMAE writes performed before this * register is re-initialized as part of the regular function * init */ REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 1); /* Enable the function in IGU */ REG_WR(sc, IGU_REG_PF_CONFIGURATION, pf_conf); } sc->dmae_ready = 1; ecore_init_block(sc, BLOCK_PGLUE_B, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, func); ecore_init_block(sc, BLOCK_ATC, init_phase); ecore_init_block(sc, BLOCK_DMAE, init_phase); ecore_init_block(sc, BLOCK_NIG, init_phase); ecore_init_block(sc, BLOCK_SRC, init_phase); ecore_init_block(sc, BLOCK_MISC, init_phase); ecore_init_block(sc, BLOCK_TCM, init_phase); ecore_init_block(sc, BLOCK_UCM, init_phase); ecore_init_block(sc, BLOCK_CCM, init_phase); ecore_init_block(sc, BLOCK_XCM, init_phase); ecore_init_block(sc, BLOCK_TSEM, init_phase); ecore_init_block(sc, BLOCK_USEM, init_phase); ecore_init_block(sc, BLOCK_CSEM, init_phase); ecore_init_block(sc, BLOCK_XSEM, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, QM_REG_PF_EN, 1); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, TSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, USEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, CSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); REG_WR(sc, XSEM_REG_VFPF_ERR_NUM, BXE_MAX_NUM_OF_VFS + func); } ecore_init_block(sc, BLOCK_QM, init_phase); ecore_init_block(sc, BLOCK_TM, init_phase); ecore_init_block(sc, BLOCK_DORQ, init_phase); bxe_iov_init_dq(sc); ecore_init_block(sc, BLOCK_BRB1, init_phase); ecore_init_block(sc, BLOCK_PRS, init_phase); ecore_init_block(sc, BLOCK_TSDM, init_phase); ecore_init_block(sc, BLOCK_CSDM, init_phase); ecore_init_block(sc, BLOCK_USDM, init_phase); ecore_init_block(sc, BLOCK_XSDM, init_phase); ecore_init_block(sc, BLOCK_UPB, init_phase); ecore_init_block(sc, BLOCK_XPB, init_phase); ecore_init_block(sc, BLOCK_PBF, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, PBF_REG_DISABLE_PF, 0); ecore_init_block(sc, BLOCK_CDU, init_phase); ecore_init_block(sc, BLOCK_CFC, init_phase); if (!CHIP_IS_E1x(sc)) REG_WR(sc, CFC_REG_WEAK_ENABLE_PF, 1); if (IS_MF(sc)) { REG_WR(sc, NIG_REG_LLH0_FUNC_EN + port*8, 1); REG_WR(sc, NIG_REG_LLH0_FUNC_VLAN_ID + port*8, OVLAN(sc)); } ecore_init_block(sc, BLOCK_MISC_AEU, init_phase); /* HC init per function */ if (sc->devinfo.int_block == INT_BLOCK_HC) { if (CHIP_IS_E1H(sc)) { REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } ecore_init_block(sc, BLOCK_HC, init_phase); } else { int num_segs, sb_idx, prod_offset; REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_12 + func*4, 0); if (!CHIP_IS_E1x(sc)) { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } ecore_init_block(sc, BLOCK_IGU, init_phase); if (!CHIP_IS_E1x(sc)) { int dsb_idx = 0; /** * Producer memory: * E2 mode: address 0-135 match to the mapping memory; * 136 - PF0 default prod; 137 - PF1 default prod; * 138 - PF2 default prod; 139 - PF3 default prod; * 140 - PF0 attn prod; 141 - PF1 attn prod; * 142 - PF2 attn prod; 143 - PF3 attn prod; * 144-147 reserved. * * E1.5 mode - In backward compatible mode; * for non default SB; each even line in the memory * holds the U producer and each odd line hold * the C producer. The first 128 producers are for * NDSB (PF0 - 0-31; PF1 - 32-63 and so on). The last 20 * producers are for the DSB for each PF. * Each PF has five segments: (the order inside each * segment is PF0; PF1; PF2; PF3) - 128-131 U prods; * 132-135 C prods; 136-139 X prods; 140-143 T prods; * 144-147 attn prods; */ /* non-default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_NDSB_NUM_SEGS : IGU_NORM_NDSB_NUM_SEGS; for (sb_idx = 0; sb_idx < sc->igu_sb_cnt; sb_idx++) { prod_offset = (sc->igu_base_sb + sb_idx) * num_segs; for (i = 0; i < num_segs; i++) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i) * 4; REG_WR(sc, addr, 0); } /* send consumer update with value 0 */ bxe_ack_sb(sc, sc->igu_base_sb + sb_idx, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_igu_clear_sb(sc, sc->igu_base_sb + sb_idx); } /* default-status-blocks */ num_segs = CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_DSB_NUM_SEGS : IGU_NORM_DSB_NUM_SEGS; if (CHIP_IS_MODE_4_PORT(sc)) dsb_idx = SC_FUNC(sc); else dsb_idx = SC_VN(sc); prod_offset = (CHIP_INT_MODE_IS_BC(sc) ? IGU_BC_BASE_DSB_PROD + dsb_idx : IGU_NORM_BASE_DSB_PROD + dsb_idx); /* * igu prods come in chunks of E1HVN_MAX (4) - * does not matters what is the current chip mode */ for (i = 0; i < (num_segs * E1HVN_MAX); i += E1HVN_MAX) { addr = IGU_REG_PROD_CONS_MEMORY + (prod_offset + i)*4; REG_WR(sc, addr, 0); } /* send consumer update with 0 */ if (CHIP_INT_MODE_IS_BC(sc)) { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, CSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, XSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, TSTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } else { bxe_ack_sb(sc, sc->igu_dsb_id, USTORM_ID, 0, IGU_INT_NOP, 1); bxe_ack_sb(sc, sc->igu_dsb_id, ATTENTION_ID, 0, IGU_INT_NOP, 1); } bxe_igu_clear_sb(sc, sc->igu_dsb_id); /* !!! these should become driver const once rf-tool supports split-68 const */ REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_INT_BEFORE_MASK_MSB, 0); REG_WR(sc, IGU_REG_SB_MASK_LSB, 0); REG_WR(sc, IGU_REG_SB_MASK_MSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_LSB, 0); REG_WR(sc, IGU_REG_PBA_STATUS_MSB, 0); } } /* Reset PCIE errors for debug */ REG_WR(sc, 0x2114, 0xffffffff); REG_WR(sc, 0x2120, 0xffffffff); if (CHIP_IS_E1x(sc)) { main_mem_size = HC_REG_MAIN_MEMORY_SIZE / 2; /*dwords*/ main_mem_base = HC_REG_MAIN_MEMORY + SC_PORT(sc) * (main_mem_size * 4); main_mem_prty_clr = HC_REG_HC_PRTY_STS_CLR; main_mem_width = 8; val = REG_RD(sc, main_mem_prty_clr); if (val) { BLOGD(sc, DBG_LOAD, "Parity errors in HC block during function init (0x%x)!\n", val); } /* Clear "false" parity errors in MSI-X table */ for (i = main_mem_base; i < main_mem_base + main_mem_size * 4; i += main_mem_width) { bxe_read_dmae(sc, i, main_mem_width / 4); bxe_write_dmae(sc, BXE_SP_MAPPING(sc, wb_data), i, main_mem_width / 4); } /* Clear HC parity attention */ REG_RD(sc, main_mem_prty_clr); } #if 1 /* Enable STORMs SP logging */ REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_RECORD_SLOW_PATH_OFFSET(SC_FUNC(sc)), 1); #endif elink_phy_probe(&sc->link_params); return (0); } static void bxe_link_reset(struct bxe_softc *sc) { if (!BXE_NOMCP(sc)) { bxe_acquire_phy_lock(sc); elink_lfa_reset(&sc->link_params, &sc->link_vars); bxe_release_phy_lock(sc); } else { if (!CHIP_REV_IS_SLOW(sc)) { BLOGW(sc, "Bootcode is missing - cannot reset link\n"); } } } static void bxe_reset_port(struct bxe_softc *sc) { int port = SC_PORT(sc); uint32_t val; ELINK_DEBUG_P0(sc, "bxe_reset_port called\n"); /* reset physical Link */ bxe_link_reset(sc); REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port*4, 0); /* Do not rcv packets to BRB */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK + port*4, 0x0); /* Do not direct rcv packets that are not for MCP to the BRB */ REG_WR(sc, (port ? NIG_REG_LLH1_BRB1_NOT_MCP : NIG_REG_LLH0_BRB1_NOT_MCP), 0x0); /* Configure AEU */ REG_WR(sc, MISC_REG_AEU_MASK_ATTN_FUNC_0 + port*4, 0); DELAY(100000); /* Check for BRB port occupancy */ val = REG_RD(sc, BRB1_REG_PORT_NUM_OCC_BLOCKS_0 + port*4); if (val) { BLOGD(sc, DBG_LOAD, "BRB1 is not empty, %d blocks are occupied\n", val); } /* TODO: Close Doorbell port? */ } static void bxe_ilt_wr(struct bxe_softc *sc, uint32_t index, bus_addr_t addr) { int reg; uint32_t wb_write[2]; if (CHIP_IS_E1(sc)) { reg = PXP2_REG_RQ_ONCHIP_AT + index*8; } else { reg = PXP2_REG_RQ_ONCHIP_AT_B0 + index*8; } wb_write[0] = ONCHIP_ADDR1(addr); wb_write[1] = ONCHIP_ADDR2(addr); REG_WR_DMAE(sc, reg, wb_write, 2); } static void bxe_clear_func_ilt(struct bxe_softc *sc, uint32_t func) { uint32_t i, base = FUNC_ILT_BASE(func); for (i = base; i < base + ILT_PER_FUNC; i++) { bxe_ilt_wr(sc, i, 0); } } static void bxe_reset_func(struct bxe_softc *sc) { struct bxe_fastpath *fp; int port = SC_PORT(sc); int func = SC_FUNC(sc); int i; /* Disable the function in the FW */ REG_WR8(sc, BAR_XSTRORM_INTMEM + XSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_TSTRORM_INTMEM + TSTORM_FUNC_EN_OFFSET(func), 0); REG_WR8(sc, BAR_USTRORM_INTMEM + USTORM_FUNC_EN_OFFSET(func), 0); /* FP SBs */ FOR_EACH_ETH_QUEUE(sc, i) { fp = &sc->fp[i]; REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET(fp->fw_sb_id), SB_DISABLED); } /* SP SB */ REG_WR8(sc, BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_STATE_OFFSET(func), SB_DISABLED); for (i = 0; i < XSTORM_SPQ_DATA_SIZE / 4; i++) { REG_WR(sc, BAR_XSTRORM_INTMEM + XSTORM_SPQ_DATA_OFFSET(func), 0); } /* Configure IGU */ if (sc->devinfo.int_block == INT_BLOCK_HC) { REG_WR(sc, HC_REG_LEADING_EDGE_0 + port*8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port*8, 0); } else { REG_WR(sc, IGU_REG_LEADING_EDGE_LATCH, 0); REG_WR(sc, IGU_REG_TRAILING_EDGE_LATCH, 0); } if (CNIC_LOADED(sc)) { /* Disable Timer scan */ REG_WR(sc, TM_REG_EN_LINEAR0_TIMER + port*4, 0); /* * Wait for at least 10ms and up to 2 second for the timers * scan to complete */ for (i = 0; i < 200; i++) { DELAY(10000); if (!REG_RD(sc, TM_REG_LIN0_SCAN_ON + port*4)) break; } } /* Clear ILT */ bxe_clear_func_ilt(sc, func); /* * Timers workaround bug for E2: if this is vnic-3, * we need to set the entire ilt range for this timers. */ if (!CHIP_IS_E1x(sc) && SC_VN(sc) == 3) { struct ilt_client_info ilt_cli; /* use dummy TM client */ memset(&ilt_cli, 0, sizeof(struct ilt_client_info)); ilt_cli.start = 0; ilt_cli.end = ILT_NUM_PAGE_ENTRIES - 1; ilt_cli.client_num = ILT_CLIENT_TM; ecore_ilt_boundry_init_op(sc, &ilt_cli, 0, INITOP_CLEAR); } /* this assumes that reset_port() called before reset_func()*/ if (!CHIP_IS_E1x(sc)) { bxe_pf_disable(sc); } sc->dmae_ready = 0; } static int bxe_gunzip_init(struct bxe_softc *sc) { return (0); } static void bxe_gunzip_end(struct bxe_softc *sc) { return; } static int bxe_init_firmware(struct bxe_softc *sc) { if (CHIP_IS_E1(sc)) { ecore_init_e1_firmware(sc); sc->iro_array = e1_iro_arr; } else if (CHIP_IS_E1H(sc)) { ecore_init_e1h_firmware(sc); sc->iro_array = e1h_iro_arr; } else if (!CHIP_IS_E1x(sc)) { ecore_init_e2_firmware(sc); sc->iro_array = e2_iro_arr; } else { BLOGE(sc, "Unsupported chip revision\n"); return (-1); } return (0); } static void bxe_release_firmware(struct bxe_softc *sc) { /* Do nothing */ return; } static int ecore_gunzip(struct bxe_softc *sc, const uint8_t *zbuf, int len) { /* XXX : Implement... */ BLOGD(sc, DBG_LOAD, "ECORE_GUNZIP NOT IMPLEMENTED\n"); return (FALSE); } static void ecore_reg_wr_ind(struct bxe_softc *sc, uint32_t addr, uint32_t val) { bxe_reg_wr_ind(sc, addr, val); } static void ecore_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { bxe_write_dmae_phys_len(sc, phys_addr, addr, len); } void ecore_storm_memset_struct(struct bxe_softc *sc, uint32_t addr, size_t size, uint32_t *data) { uint8_t i; for (i = 0; i < size/4; i++) { REG_WR(sc, addr + (i * 4), data[i]); } } /* * character device - ioctl interface definitions */ #include "bxe_dump.h" #include "bxe_ioctl.h" #include static int bxe_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); static struct cdevsw bxe_cdevsw = { .d_version = D_VERSION, .d_ioctl = bxe_eioctl, .d_name = "bxecnic", }; #define BXE_PATH(sc) (CHIP_IS_E1x(sc) ? 0 : (sc->pcie_func & 1)) #define DUMP_ALL_PRESETS 0x1FFF #define DUMP_MAX_PRESETS 13 #define IS_E1_REG(chips) ((chips & DUMP_CHIP_E1) == DUMP_CHIP_E1) #define IS_E1H_REG(chips) ((chips & DUMP_CHIP_E1H) == DUMP_CHIP_E1H) #define IS_E2_REG(chips) ((chips & DUMP_CHIP_E2) == DUMP_CHIP_E2) #define IS_E3A0_REG(chips) ((chips & DUMP_CHIP_E3A0) == DUMP_CHIP_E3A0) #define IS_E3B0_REG(chips) ((chips & DUMP_CHIP_E3B0) == DUMP_CHIP_E3B0) #define IS_REG_IN_PRESET(presets, idx) \ ((presets & (1 << (idx-1))) == (1 << (idx-1))) static int bxe_get_preset_regs_len(struct bxe_softc *sc, uint32_t preset) { if (CHIP_IS_E1(sc)) return dump_num_registers[0][preset-1]; else if (CHIP_IS_E1H(sc)) return dump_num_registers[1][preset-1]; else if (CHIP_IS_E2(sc)) return dump_num_registers[2][preset-1]; else if (CHIP_IS_E3A0(sc)) return dump_num_registers[3][preset-1]; else if (CHIP_IS_E3B0(sc)) return dump_num_registers[4][preset-1]; else return 0; } static int bxe_get_total_regs_len32(struct bxe_softc *sc) { uint32_t preset_idx; int regdump_len32 = 0; /* Calculate the total preset regs length */ for (preset_idx = 1; preset_idx <= DUMP_MAX_PRESETS; preset_idx++) { regdump_len32 += bxe_get_preset_regs_len(sc, preset_idx); } return regdump_len32; } static const uint32_t * __bxe_get_page_addr_ar(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return page_vals_e2; else if (CHIP_IS_E3(sc)) return page_vals_e3; else return NULL; } static uint32_t __bxe_get_page_reg_num(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return PAGE_MODE_VALUES_E2; else if (CHIP_IS_E3(sc)) return PAGE_MODE_VALUES_E3; else return 0; } static const uint32_t * __bxe_get_page_write_ar(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return page_write_regs_e2; else if (CHIP_IS_E3(sc)) return page_write_regs_e3; else return NULL; } static uint32_t __bxe_get_page_write_num(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return PAGE_WRITE_REGS_E2; else if (CHIP_IS_E3(sc)) return PAGE_WRITE_REGS_E3; else return 0; } static const struct reg_addr * __bxe_get_page_read_ar(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return page_read_regs_e2; else if (CHIP_IS_E3(sc)) return page_read_regs_e3; else return NULL; } static uint32_t __bxe_get_page_read_num(struct bxe_softc *sc) { if (CHIP_IS_E2(sc)) return PAGE_READ_REGS_E2; else if (CHIP_IS_E3(sc)) return PAGE_READ_REGS_E3; else return 0; } static bool bxe_is_reg_in_chip(struct bxe_softc *sc, const struct reg_addr *reg_info) { if (CHIP_IS_E1(sc)) return IS_E1_REG(reg_info->chips); else if (CHIP_IS_E1H(sc)) return IS_E1H_REG(reg_info->chips); else if (CHIP_IS_E2(sc)) return IS_E2_REG(reg_info->chips); else if (CHIP_IS_E3A0(sc)) return IS_E3A0_REG(reg_info->chips); else if (CHIP_IS_E3B0(sc)) return IS_E3B0_REG(reg_info->chips); else return 0; } static bool bxe_is_wreg_in_chip(struct bxe_softc *sc, const struct wreg_addr *wreg_info) { if (CHIP_IS_E1(sc)) return IS_E1_REG(wreg_info->chips); else if (CHIP_IS_E1H(sc)) return IS_E1H_REG(wreg_info->chips); else if (CHIP_IS_E2(sc)) return IS_E2_REG(wreg_info->chips); else if (CHIP_IS_E3A0(sc)) return IS_E3A0_REG(wreg_info->chips); else if (CHIP_IS_E3B0(sc)) return IS_E3B0_REG(wreg_info->chips); else return 0; } /** * bxe_read_pages_regs - read "paged" registers * * @bp device handle * @p output buffer * * Reads "paged" memories: memories that may only be read by first writing to a * specific address ("write address") and then reading from a specific address * ("read address"). There may be more than one write address per "page" and * more than one read address per write address. */ static void bxe_read_pages_regs(struct bxe_softc *sc, uint32_t *p, uint32_t preset) { uint32_t i, j, k, n; /* addresses of the paged registers */ const uint32_t *page_addr = __bxe_get_page_addr_ar(sc); /* number of paged registers */ int num_pages = __bxe_get_page_reg_num(sc); /* write addresses */ const uint32_t *write_addr = __bxe_get_page_write_ar(sc); /* number of write addresses */ int write_num = __bxe_get_page_write_num(sc); /* read addresses info */ const struct reg_addr *read_addr = __bxe_get_page_read_ar(sc); /* number of read addresses */ int read_num = __bxe_get_page_read_num(sc); uint32_t addr, size; for (i = 0; i < num_pages; i++) { for (j = 0; j < write_num; j++) { REG_WR(sc, write_addr[j], page_addr[i]); for (k = 0; k < read_num; k++) { if (IS_REG_IN_PRESET(read_addr[k].presets, preset)) { size = read_addr[k].size; for (n = 0; n < size; n++) { addr = read_addr[k].addr + n*4; *p++ = REG_RD(sc, addr); } } } } } return; } static int bxe_get_preset_regs(struct bxe_softc *sc, uint32_t *p, uint32_t preset) { uint32_t i, j, addr; const struct wreg_addr *wreg_addr_p = NULL; if (CHIP_IS_E1(sc)) wreg_addr_p = &wreg_addr_e1; else if (CHIP_IS_E1H(sc)) wreg_addr_p = &wreg_addr_e1h; else if (CHIP_IS_E2(sc)) wreg_addr_p = &wreg_addr_e2; else if (CHIP_IS_E3A0(sc)) wreg_addr_p = &wreg_addr_e3; else if (CHIP_IS_E3B0(sc)) wreg_addr_p = &wreg_addr_e3b0; else return (-1); /* Read the idle_chk registers */ for (i = 0; i < IDLE_REGS_COUNT; i++) { if (bxe_is_reg_in_chip(sc, &idle_reg_addrs[i]) && IS_REG_IN_PRESET(idle_reg_addrs[i].presets, preset)) { for (j = 0; j < idle_reg_addrs[i].size; j++) *p++ = REG_RD(sc, idle_reg_addrs[i].addr + j*4); } } /* Read the regular registers */ for (i = 0; i < REGS_COUNT; i++) { if (bxe_is_reg_in_chip(sc, ®_addrs[i]) && IS_REG_IN_PRESET(reg_addrs[i].presets, preset)) { for (j = 0; j < reg_addrs[i].size; j++) *p++ = REG_RD(sc, reg_addrs[i].addr + j*4); } } /* Read the CAM registers */ if (bxe_is_wreg_in_chip(sc, wreg_addr_p) && IS_REG_IN_PRESET(wreg_addr_p->presets, preset)) { for (i = 0; i < wreg_addr_p->size; i++) { *p++ = REG_RD(sc, wreg_addr_p->addr + i*4); /* In case of wreg_addr register, read additional registers from read_regs array */ for (j = 0; j < wreg_addr_p->read_regs_count; j++) { addr = *(wreg_addr_p->read_regs); *p++ = REG_RD(sc, addr + j*4); } } } /* Paged registers are supported in E2 & E3 only */ if (CHIP_IS_E2(sc) || CHIP_IS_E3(sc)) { /* Read "paged" registers */ bxe_read_pages_regs(sc, p, preset); } return 0; } int bxe_grc_dump(struct bxe_softc *sc) { int rval = 0; uint32_t preset_idx; uint8_t *buf; uint32_t size; struct dump_header *d_hdr; uint32_t i; uint32_t reg_val; uint32_t reg_addr; uint32_t cmd_offset; struct ecore_ilt *ilt = SC_ILT(sc); struct bxe_fastpath *fp; struct ilt_client_info *ilt_cli; int grc_dump_size; if (sc->grcdump_done || sc->grcdump_started) return (rval); sc->grcdump_started = 1; BLOGI(sc, "Started collecting grcdump\n"); grc_dump_size = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); sc->grc_dump = malloc(grc_dump_size, M_DEVBUF, M_NOWAIT); if (sc->grc_dump == NULL) { BLOGW(sc, "Unable to allocate memory for grcdump collection\n"); return(ENOMEM); } /* Disable parity attentions as long as following dump may * cause false alarms by reading never written registers. We * will re-enable parity attentions right after the dump. */ /* Disable parity on path 0 */ bxe_pretend_func(sc, 0); ecore_disable_blocks_parity(sc); /* Disable parity on path 1 */ bxe_pretend_func(sc, 1); ecore_disable_blocks_parity(sc); /* Return to current function */ bxe_pretend_func(sc, SC_ABS_FUNC(sc)); buf = sc->grc_dump; d_hdr = sc->grc_dump; d_hdr->header_size = (sizeof(struct dump_header) >> 2) - 1; d_hdr->version = BNX2X_DUMP_VERSION; d_hdr->preset = DUMP_ALL_PRESETS; if (CHIP_IS_E1(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E1; } else if (CHIP_IS_E1H(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E1H; } else if (CHIP_IS_E2(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E2 | (BXE_PATH(sc) ? DUMP_PATH_1 : DUMP_PATH_0); } else if (CHIP_IS_E3A0(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E3A0 | (BXE_PATH(sc) ? DUMP_PATH_1 : DUMP_PATH_0); } else if (CHIP_IS_E3B0(sc)) { d_hdr->dump_meta_data = DUMP_CHIP_E3B0 | (BXE_PATH(sc) ? DUMP_PATH_1 : DUMP_PATH_0); } buf += sizeof(struct dump_header); for (preset_idx = 1; preset_idx <= DUMP_MAX_PRESETS; preset_idx++) { /* Skip presets with IOR */ if ((preset_idx == 2) || (preset_idx == 5) || (preset_idx == 8) || (preset_idx == 11)) continue; rval = bxe_get_preset_regs(sc, (uint32_t *)buf, preset_idx); if (rval) break; size = bxe_get_preset_regs_len(sc, preset_idx) * (sizeof (uint32_t)); buf += size; } bxe_pretend_func(sc, 0); ecore_clear_blocks_parity(sc); ecore_enable_blocks_parity(sc); bxe_pretend_func(sc, 1); ecore_clear_blocks_parity(sc); ecore_enable_blocks_parity(sc); /* Return to current function */ bxe_pretend_func(sc, SC_ABS_FUNC(sc)); if(sc->state == BXE_STATE_OPEN) { if(sc->fw_stats_req != NULL) { BLOGI(sc, "fw stats start_paddr %#jx end_paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->fw_stats_req_mapping, (uintmax_t)sc->fw_stats_data_mapping, sc->fw_stats_req, (sc->fw_stats_req_size + sc->fw_stats_data_size)); } if(sc->def_sb != NULL) { BLOGI(sc, "def_status_block paddr %p vaddr %p size 0x%zx\n", (void *)sc->def_sb_dma.paddr, sc->def_sb, sizeof(struct host_sp_status_block)); } if(sc->eq_dma.vaddr != NULL) { BLOGI(sc, "event_queue paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->eq_dma.paddr, sc->eq_dma.vaddr, BCM_PAGE_SIZE); } if(sc->sp_dma.vaddr != NULL) { BLOGI(sc, "slow path paddr %#jx vaddr %p size 0x%zx\n", (uintmax_t)sc->sp_dma.paddr, sc->sp_dma.vaddr, sizeof(struct bxe_slowpath)); } if(sc->spq_dma.vaddr != NULL) { BLOGI(sc, "slow path queue paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->spq_dma.paddr, sc->spq_dma.vaddr, BCM_PAGE_SIZE); } if(sc->gz_buf_dma.vaddr != NULL) { BLOGI(sc, "fw_buf paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)sc->gz_buf_dma.paddr, sc->gz_buf_dma.vaddr, FW_BUF_SIZE); } for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; if(fp->sb_dma.vaddr != NULL && fp->tx_dma.vaddr != NULL && fp->rx_dma.vaddr != NULL && fp->rcq_dma.vaddr != NULL && fp->rx_sge_dma.vaddr != NULL) { BLOGI(sc, "FP status block fp %d paddr %#jx vaddr %p size 0x%zx\n", i, (uintmax_t)fp->sb_dma.paddr, fp->sb_dma.vaddr, sizeof(union bxe_host_hc_status_block)); BLOGI(sc, "TX BD CHAIN fp %d paddr %#jx vaddr %p size 0x%x\n", i, (uintmax_t)fp->tx_dma.paddr, fp->tx_dma.vaddr, (BCM_PAGE_SIZE * TX_BD_NUM_PAGES)); BLOGI(sc, "RX BD CHAIN fp %d paddr %#jx vaddr %p size 0x%x\n", i, (uintmax_t)fp->rx_dma.paddr, fp->rx_dma.vaddr, (BCM_PAGE_SIZE * RX_BD_NUM_PAGES)); BLOGI(sc, "RX RCQ CHAIN fp %d paddr %#jx vaddr %p size 0x%zx\n", i, (uintmax_t)fp->rcq_dma.paddr, fp->rcq_dma.vaddr, (BCM_PAGE_SIZE * RCQ_NUM_PAGES)); BLOGI(sc, "RX SGE CHAIN fp %d paddr %#jx vaddr %p size 0x%x\n", i, (uintmax_t)fp->rx_sge_dma.paddr, fp->rx_sge_dma.vaddr, (BCM_PAGE_SIZE * RX_SGE_NUM_PAGES)); } } if(ilt != NULL ) { ilt_cli = &ilt->clients[1]; if(ilt->lines != NULL) { for (i = ilt_cli->start; i <= ilt_cli->end; i++) { BLOGI(sc, "ECORE_ILT paddr %#jx vaddr %p size 0x%x\n", (uintmax_t)(((struct bxe_dma *)((&ilt->lines[i])->page))->paddr), ((struct bxe_dma *)((&ilt->lines[i])->page))->vaddr, BCM_PAGE_SIZE); } } } cmd_offset = DMAE_REG_CMD_MEM; for (i = 0; i < 224; i++) { reg_addr = (cmd_offset +(i * 4)); reg_val = REG_RD(sc, reg_addr); BLOGI(sc, "DMAE_REG_CMD_MEM i=%d reg_addr 0x%x reg_val 0x%08x\n",i, reg_addr, reg_val); } } BLOGI(sc, "Collection of grcdump done\n"); sc->grcdump_done = 1; return(rval); } static int bxe_add_cdev(struct bxe_softc *sc) { sc->eeprom = malloc(BXE_EEPROM_MAX_DATA_LEN, M_DEVBUF, M_NOWAIT); if (sc->eeprom == NULL) { BLOGW(sc, "Unable to alloc for eeprom size buffer\n"); return (-1); } sc->ioctl_dev = make_dev(&bxe_cdevsw, sc->ifp->if_dunit, UID_ROOT, GID_WHEEL, 0600, "%s", if_name(sc->ifp)); if (sc->ioctl_dev == NULL) { free(sc->eeprom, M_DEVBUF); sc->eeprom = NULL; return (-1); } sc->ioctl_dev->si_drv1 = sc; return (0); } static void bxe_del_cdev(struct bxe_softc *sc) { if (sc->ioctl_dev != NULL) destroy_dev(sc->ioctl_dev); if (sc->eeprom != NULL) { free(sc->eeprom, M_DEVBUF); sc->eeprom = NULL; } sc->ioctl_dev = NULL; return; } static bool bxe_is_nvram_accessible(struct bxe_softc *sc) { if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) == 0) return FALSE; return TRUE; } static int bxe_wr_eeprom(struct bxe_softc *sc, void *data, uint32_t offset, uint32_t len) { int rval = 0; if(!bxe_is_nvram_accessible(sc)) { BLOGW(sc, "Cannot access eeprom when interface is down\n"); return (-EAGAIN); } rval = bxe_nvram_write(sc, offset, (uint8_t *)data, len); return (rval); } static int bxe_rd_eeprom(struct bxe_softc *sc, void *data, uint32_t offset, uint32_t len) { int rval = 0; if(!bxe_is_nvram_accessible(sc)) { BLOGW(sc, "Cannot access eeprom when interface is down\n"); return (-EAGAIN); } rval = bxe_nvram_read(sc, offset, (uint8_t *)data, len); return (rval); } static int bxe_eeprom_rd_wr(struct bxe_softc *sc, bxe_eeprom_t *eeprom) { int rval = 0; switch (eeprom->eeprom_cmd) { case BXE_EEPROM_CMD_SET_EEPROM: rval = copyin(eeprom->eeprom_data, sc->eeprom, eeprom->eeprom_data_len); if (rval) break; rval = bxe_wr_eeprom(sc, sc->eeprom, eeprom->eeprom_offset, eeprom->eeprom_data_len); break; case BXE_EEPROM_CMD_GET_EEPROM: rval = bxe_rd_eeprom(sc, sc->eeprom, eeprom->eeprom_offset, eeprom->eeprom_data_len); if (rval) { break; } rval = copyout(sc->eeprom, eeprom->eeprom_data, eeprom->eeprom_data_len); break; default: rval = EINVAL; break; } if (rval) { BLOGW(sc, "ioctl cmd %d failed rval %d\n", eeprom->eeprom_cmd, rval); } return (rval); } static int bxe_get_settings(struct bxe_softc *sc, bxe_dev_setting_t *dev_p) { uint32_t ext_phy_config; int port = SC_PORT(sc); int cfg_idx = bxe_get_link_cfg_idx(sc); dev_p->supported = sc->port.supported[cfg_idx] | (sc->port.supported[cfg_idx ^ 1] & (ELINK_SUPPORTED_TP | ELINK_SUPPORTED_FIBRE)); dev_p->advertising = sc->port.advertising[cfg_idx]; if(sc->link_params.phy[bxe_get_cur_phy_idx(sc)].media_type == ELINK_ETH_PHY_SFP_1G_FIBER) { dev_p->supported = ~(ELINK_SUPPORTED_10000baseT_Full); dev_p->advertising &= ~(ADVERTISED_10000baseT_Full); } if ((sc->state == BXE_STATE_OPEN) && sc->link_vars.link_up && !(sc->flags & BXE_MF_FUNC_DIS)) { dev_p->duplex = sc->link_vars.duplex; if (IS_MF(sc) && !BXE_NOMCP(sc)) dev_p->speed = bxe_get_mf_speed(sc); else dev_p->speed = sc->link_vars.line_speed; } else { dev_p->duplex = DUPLEX_UNKNOWN; dev_p->speed = SPEED_UNKNOWN; } dev_p->port = bxe_media_detect(sc); ext_phy_config = SHMEM_RD(sc, dev_info.port_hw_config[port].external_phy_config); if((ext_phy_config & PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK) == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) dev_p->phy_address = sc->port.phy_addr; else if(((ext_phy_config & PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK) != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE) && ((ext_phy_config & PORT_HW_CFG_XGXS_EXT_PHY_TYPE_MASK) != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN)) dev_p->phy_address = ELINK_XGXS_EXT_PHY_ADDR(ext_phy_config); else dev_p->phy_address = 0; if(sc->link_params.req_line_speed[cfg_idx] == ELINK_SPEED_AUTO_NEG) dev_p->autoneg = AUTONEG_ENABLE; else dev_p->autoneg = AUTONEG_DISABLE; return 0; } static int bxe_eioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct bxe_softc *sc; int rval = 0; device_t pci_dev; bxe_grcdump_t *dump = NULL; int grc_dump_size; bxe_drvinfo_t *drv_infop = NULL; bxe_dev_setting_t *dev_p; bxe_dev_setting_t dev_set; bxe_get_regs_t *reg_p; bxe_reg_rdw_t *reg_rdw_p; bxe_pcicfg_rdw_t *cfg_rdw_p; bxe_perm_mac_addr_t *mac_addr_p; if ((sc = (struct bxe_softc *)dev->si_drv1) == NULL) return ENXIO; pci_dev= sc->dev; dump = (bxe_grcdump_t *)data; switch(cmd) { case BXE_GRC_DUMP_SIZE: dump->pci_func = sc->pcie_func; dump->grcdump_size = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); break; case BXE_GRC_DUMP: grc_dump_size = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); if ((!sc->trigger_grcdump) || (dump->grcdump == NULL) || (dump->grcdump_size < grc_dump_size)) { rval = EINVAL; break; } if((sc->trigger_grcdump) && (!sc->grcdump_done) && (!sc->grcdump_started)) { rval = bxe_grc_dump(sc); } if((!rval) && (sc->grcdump_done) && (sc->grcdump_started) && (sc->grc_dump != NULL)) { dump->grcdump_dwords = grc_dump_size >> 2; rval = copyout(sc->grc_dump, dump->grcdump, grc_dump_size); free(sc->grc_dump, M_DEVBUF); sc->grc_dump = NULL; sc->grcdump_started = 0; sc->grcdump_done = 0; } break; case BXE_DRV_INFO: drv_infop = (bxe_drvinfo_t *)data; snprintf(drv_infop->drv_name, BXE_DRV_NAME_LENGTH, "%s", "bxe"); snprintf(drv_infop->drv_version, BXE_DRV_VERSION_LENGTH, "v:%s", BXE_DRIVER_VERSION); snprintf(drv_infop->mfw_version, BXE_MFW_VERSION_LENGTH, "%s", sc->devinfo.bc_ver_str); snprintf(drv_infop->stormfw_version, BXE_STORMFW_VERSION_LENGTH, "%s", sc->fw_ver_str); drv_infop->eeprom_dump_len = sc->devinfo.flash_size; drv_infop->reg_dump_len = (bxe_get_total_regs_len32(sc) * sizeof(uint32_t)) + sizeof(struct dump_header); snprintf(drv_infop->bus_info, BXE_BUS_INFO_LENGTH, "%d:%d:%d", sc->pcie_bus, sc->pcie_device, sc->pcie_func); break; case BXE_DEV_SETTING: dev_p = (bxe_dev_setting_t *)data; bxe_get_settings(sc, &dev_set); dev_p->supported = dev_set.supported; dev_p->advertising = dev_set.advertising; dev_p->speed = dev_set.speed; dev_p->duplex = dev_set.duplex; dev_p->port = dev_set.port; dev_p->phy_address = dev_set.phy_address; dev_p->autoneg = dev_set.autoneg; break; case BXE_GET_REGS: reg_p = (bxe_get_regs_t *)data; grc_dump_size = reg_p->reg_buf_len; if((!sc->grcdump_done) && (!sc->grcdump_started)) { bxe_grc_dump(sc); } if((sc->grcdump_done) && (sc->grcdump_started) && (sc->grc_dump != NULL)) { rval = copyout(sc->grc_dump, reg_p->reg_buf, grc_dump_size); free(sc->grc_dump, M_DEVBUF); sc->grc_dump = NULL; sc->grcdump_started = 0; sc->grcdump_done = 0; } break; case BXE_RDW_REG: reg_rdw_p = (bxe_reg_rdw_t *)data; if((reg_rdw_p->reg_cmd == BXE_READ_REG_CMD) && (reg_rdw_p->reg_access_type == BXE_REG_ACCESS_DIRECT)) reg_rdw_p->reg_val = REG_RD(sc, reg_rdw_p->reg_id); if((reg_rdw_p->reg_cmd == BXE_WRITE_REG_CMD) && (reg_rdw_p->reg_access_type == BXE_REG_ACCESS_DIRECT)) REG_WR(sc, reg_rdw_p->reg_id, reg_rdw_p->reg_val); break; case BXE_RDW_PCICFG: cfg_rdw_p = (bxe_pcicfg_rdw_t *)data; if(cfg_rdw_p->cfg_cmd == BXE_READ_PCICFG) { cfg_rdw_p->cfg_val = pci_read_config(sc->dev, cfg_rdw_p->cfg_id, cfg_rdw_p->cfg_width); } else if(cfg_rdw_p->cfg_cmd == BXE_WRITE_PCICFG) { pci_write_config(sc->dev, cfg_rdw_p->cfg_id, cfg_rdw_p->cfg_val, cfg_rdw_p->cfg_width); } else { BLOGW(sc, "BXE_RDW_PCICFG ioctl wrong cmd passed\n"); } break; case BXE_MAC_ADDR: mac_addr_p = (bxe_perm_mac_addr_t *)data; snprintf(mac_addr_p->mac_addr_str, sizeof(sc->mac_addr_str), "%s", sc->mac_addr_str); break; case BXE_EEPROM: rval = bxe_eeprom_rd_wr(sc, (bxe_eeprom_t *)data); break; default: break; } return (rval); } #ifdef NETDUMP static void bxe_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) { struct bxe_softc *sc; sc = if_getsoftc(ifp); BXE_CORE_LOCK(sc); *nrxr = sc->num_queues; *ncl = NETDUMP_MAX_IN_FLIGHT; *clsize = sc->fp[0].mbuf_alloc_size; BXE_CORE_UNLOCK(sc); } static void bxe_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused) { } static int bxe_netdump_transmit(struct ifnet *ifp, struct mbuf *m) { struct bxe_softc *sc; int error; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || !sc->link_vars.link_up) return (ENOENT); error = bxe_tx_encap(&sc->fp[0], &m); if (error != 0 && m != NULL) m_freem(m); return (error); } static int bxe_netdump_poll(struct ifnet *ifp, int count) { struct bxe_softc *sc; int i; sc = if_getsoftc(ifp); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || !sc->link_vars.link_up) return (ENOENT); for (i = 0; i < sc->num_queues; i++) (void)bxe_rxeof(sc, &sc->fp[i]); (void)bxe_txeof(sc, &sc->fp[0]); return (0); } #endif /* NETDUMP */ Index: projects/pnfs-planb-server/sys/dev/drm2/i915/intel_display.c =================================================================== --- projects/pnfs-planb-server/sys/dev/drm2/i915/intel_display.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/drm2/i915/intel_display.c (revision 334967) @@ -1,9560 +1,9568 @@ /* * Copyright © 2006-2007 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Eric Anholt */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include bool intel_pipe_has_type(struct drm_crtc *crtc, int type); static void intel_increase_pllclock(struct drm_crtc *crtc); static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on); typedef struct { /* given values */ int n; int m1, m2; int p1, p2; /* derived values */ int dot; int vco; int m; int p; } intel_clock_t; typedef struct { int min, max; } intel_range_t; typedef struct { int dot_limit; int p2_slow, p2_fast; } intel_p2_t; #define INTEL_P2_NUM 2 typedef struct intel_limit intel_limit_t; struct intel_limit { intel_range_t dot, vco, n, m, m1, m2, p, p1; intel_p2_t p2; bool (* find_pll)(const intel_limit_t *, struct drm_crtc *, int, int, intel_clock_t *, intel_clock_t *); }; /* FDI */ #define IRONLAKE_FDI_FREQ 2700000 /* in kHz for mode->clock */ int intel_pch_rawclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; WARN_ON(!HAS_PCH_SPLIT(dev)); return I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK; } static bool intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock); static bool intel_g4x_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock); static bool intel_find_pll_g4x_dp(const intel_limit_t *, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock); static bool intel_find_pll_ironlake_dp(const intel_limit_t *, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock); static bool intel_vlv_find_best_pll(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock); static inline u32 /* units of 100MHz */ intel_fdi_link_freq(struct drm_device *dev) { if (IS_GEN5(dev)) { struct drm_i915_private *dev_priv = dev->dev_private; return (I915_READ(FDI_PLL_BIOS_0) & FDI_PLL_FB_CLOCK_MASK) + 2; } else return 27; } static const intel_limit_t intel_limits_i8xx_dvo = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 930000, .max = 1400000 }, .n = { .min = 3, .max = 16 }, .m = { .min = 96, .max = 140 }, .m1 = { .min = 18, .max = 26 }, .m2 = { .min = 6, .max = 16 }, .p = { .min = 4, .max = 128 }, .p1 = { .min = 2, .max = 33 }, .p2 = { .dot_limit = 165000, .p2_slow = 4, .p2_fast = 2 }, .find_pll = intel_find_best_PLL, }; static const intel_limit_t intel_limits_i8xx_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 930000, .max = 1400000 }, .n = { .min = 3, .max = 16 }, .m = { .min = 96, .max = 140 }, .m1 = { .min = 18, .max = 26 }, .m2 = { .min = 6, .max = 16 }, .p = { .min = 4, .max = 128 }, .p1 = { .min = 1, .max = 6 }, .p2 = { .dot_limit = 165000, .p2_slow = 14, .p2_fast = 7 }, .find_pll = intel_find_best_PLL, }; static const intel_limit_t intel_limits_i9xx_sdvo = { .dot = { .min = 20000, .max = 400000 }, .vco = { .min = 1400000, .max = 2800000 }, .n = { .min = 1, .max = 6 }, .m = { .min = 70, .max = 120 }, .m1 = { .min = 8, .max = 18 }, .m2 = { .min = 3, .max = 7 }, .p = { .min = 5, .max = 80 }, .p1 = { .min = 1, .max = 8 }, .p2 = { .dot_limit = 200000, .p2_slow = 10, .p2_fast = 5 }, .find_pll = intel_find_best_PLL, }; static const intel_limit_t intel_limits_i9xx_lvds = { .dot = { .min = 20000, .max = 400000 }, .vco = { .min = 1400000, .max = 2800000 }, .n = { .min = 1, .max = 6 }, .m = { .min = 70, .max = 120 }, .m1 = { .min = 10, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 7, .max = 98 }, .p1 = { .min = 1, .max = 8 }, .p2 = { .dot_limit = 112000, .p2_slow = 14, .p2_fast = 7 }, .find_pll = intel_find_best_PLL, }; static const intel_limit_t intel_limits_g4x_sdvo = { .dot = { .min = 25000, .max = 270000 }, .vco = { .min = 1750000, .max = 3500000}, .n = { .min = 1, .max = 4 }, .m = { .min = 104, .max = 138 }, .m1 = { .min = 17, .max = 23 }, .m2 = { .min = 5, .max = 11 }, .p = { .min = 10, .max = 30 }, .p1 = { .min = 1, .max = 3}, .p2 = { .dot_limit = 270000, .p2_slow = 10, .p2_fast = 10 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_g4x_hdmi = { .dot = { .min = 22000, .max = 400000 }, .vco = { .min = 1750000, .max = 3500000}, .n = { .min = 1, .max = 4 }, .m = { .min = 104, .max = 138 }, .m1 = { .min = 16, .max = 23 }, .m2 = { .min = 5, .max = 11 }, .p = { .min = 5, .max = 80 }, .p1 = { .min = 1, .max = 8}, .p2 = { .dot_limit = 165000, .p2_slow = 10, .p2_fast = 5 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_g4x_single_channel_lvds = { .dot = { .min = 20000, .max = 115000 }, .vco = { .min = 1750000, .max = 3500000 }, .n = { .min = 1, .max = 3 }, .m = { .min = 104, .max = 138 }, .m1 = { .min = 17, .max = 23 }, .m2 = { .min = 5, .max = 11 }, .p = { .min = 28, .max = 112 }, .p1 = { .min = 2, .max = 8 }, .p2 = { .dot_limit = 0, .p2_slow = 14, .p2_fast = 14 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_g4x_dual_channel_lvds = { .dot = { .min = 80000, .max = 224000 }, .vco = { .min = 1750000, .max = 3500000 }, .n = { .min = 1, .max = 3 }, .m = { .min = 104, .max = 138 }, .m1 = { .min = 17, .max = 23 }, .m2 = { .min = 5, .max = 11 }, .p = { .min = 14, .max = 42 }, .p1 = { .min = 2, .max = 6 }, .p2 = { .dot_limit = 0, .p2_slow = 7, .p2_fast = 7 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_g4x_display_port = { .dot = { .min = 161670, .max = 227000 }, .vco = { .min = 1750000, .max = 3500000}, .n = { .min = 1, .max = 2 }, .m = { .min = 97, .max = 108 }, .m1 = { .min = 0x10, .max = 0x12 }, .m2 = { .min = 0x05, .max = 0x06 }, .p = { .min = 10, .max = 20 }, .p1 = { .min = 1, .max = 2}, .p2 = { .dot_limit = 0, .p2_slow = 10, .p2_fast = 10 }, .find_pll = intel_find_pll_g4x_dp, }; static const intel_limit_t intel_limits_pineview_sdvo = { .dot = { .min = 20000, .max = 400000}, .vco = { .min = 1700000, .max = 3500000 }, /* Pineview's Ncounter is a ring counter */ .n = { .min = 3, .max = 6 }, .m = { .min = 2, .max = 256 }, /* Pineview only has one combined m divider, which we treat as m2. */ .m1 = { .min = 0, .max = 0 }, .m2 = { .min = 0, .max = 254 }, .p = { .min = 5, .max = 80 }, .p1 = { .min = 1, .max = 8 }, .p2 = { .dot_limit = 200000, .p2_slow = 10, .p2_fast = 5 }, .find_pll = intel_find_best_PLL, }; static const intel_limit_t intel_limits_pineview_lvds = { .dot = { .min = 20000, .max = 400000 }, .vco = { .min = 1700000, .max = 3500000 }, .n = { .min = 3, .max = 6 }, .m = { .min = 2, .max = 256 }, .m1 = { .min = 0, .max = 0 }, .m2 = { .min = 0, .max = 254 }, .p = { .min = 7, .max = 112 }, .p1 = { .min = 1, .max = 8 }, .p2 = { .dot_limit = 112000, .p2_slow = 14, .p2_fast = 14 }, .find_pll = intel_find_best_PLL, }; /* Ironlake / Sandybridge * * We calculate clock using (register_value + 2) for N/M1/M2, so here * the range value for them is (actual_value - 2). */ static const intel_limit_t intel_limits_ironlake_dac = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 5 }, .m = { .min = 79, .max = 127 }, .m1 = { .min = 12, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 5, .max = 80 }, .p1 = { .min = 1, .max = 8 }, .p2 = { .dot_limit = 225000, .p2_slow = 10, .p2_fast = 5 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_ironlake_single_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, .m = { .min = 79, .max = 118 }, .m1 = { .min = 12, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 28, .max = 112 }, .p1 = { .min = 2, .max = 8 }, .p2 = { .dot_limit = 225000, .p2_slow = 14, .p2_fast = 14 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_ironlake_dual_lvds = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, .m = { .min = 79, .max = 127 }, .m1 = { .min = 12, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 14, .max = 56 }, .p1 = { .min = 2, .max = 8 }, .p2 = { .dot_limit = 225000, .p2_slow = 7, .p2_fast = 7 }, .find_pll = intel_g4x_find_best_PLL, }; /* LVDS 100mhz refclk limits. */ static const intel_limit_t intel_limits_ironlake_single_lvds_100m = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 2 }, .m = { .min = 79, .max = 126 }, .m1 = { .min = 12, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 28, .max = 112 }, .p1 = { .min = 2, .max = 8 }, .p2 = { .dot_limit = 225000, .p2_slow = 14, .p2_fast = 14 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_ironlake_dual_lvds_100m = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000 }, .n = { .min = 1, .max = 3 }, .m = { .min = 79, .max = 126 }, .m1 = { .min = 12, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 14, .max = 42 }, .p1 = { .min = 2, .max = 6 }, .p2 = { .dot_limit = 225000, .p2_slow = 7, .p2_fast = 7 }, .find_pll = intel_g4x_find_best_PLL, }; static const intel_limit_t intel_limits_ironlake_display_port = { .dot = { .min = 25000, .max = 350000 }, .vco = { .min = 1760000, .max = 3510000}, .n = { .min = 1, .max = 2 }, .m = { .min = 81, .max = 90 }, .m1 = { .min = 12, .max = 22 }, .m2 = { .min = 5, .max = 9 }, .p = { .min = 10, .max = 20 }, .p1 = { .min = 1, .max = 2}, .p2 = { .dot_limit = 0, .p2_slow = 10, .p2_fast = 10 }, .find_pll = intel_find_pll_ironlake_dp, }; static const intel_limit_t intel_limits_vlv_dac = { .dot = { .min = 25000, .max = 270000 }, .vco = { .min = 4000000, .max = 6000000 }, .n = { .min = 1, .max = 7 }, .m = { .min = 22, .max = 450 }, /* guess */ .m1 = { .min = 2, .max = 3 }, .m2 = { .min = 11, .max = 156 }, .p = { .min = 10, .max = 30 }, .p1 = { .min = 2, .max = 3 }, .p2 = { .dot_limit = 270000, .p2_slow = 2, .p2_fast = 20 }, .find_pll = intel_vlv_find_best_pll, }; static const intel_limit_t intel_limits_vlv_hdmi = { .dot = { .min = 20000, .max = 165000 }, .vco = { .min = 4000000, .max = 5994000}, .n = { .min = 1, .max = 7 }, .m = { .min = 60, .max = 300 }, /* guess */ .m1 = { .min = 2, .max = 3 }, .m2 = { .min = 11, .max = 156 }, .p = { .min = 10, .max = 30 }, .p1 = { .min = 2, .max = 3 }, .p2 = { .dot_limit = 270000, .p2_slow = 2, .p2_fast = 20 }, .find_pll = intel_vlv_find_best_pll, }; static const intel_limit_t intel_limits_vlv_dp = { .dot = { .min = 25000, .max = 270000 }, .vco = { .min = 4000000, .max = 6000000 }, .n = { .min = 1, .max = 7 }, .m = { .min = 22, .max = 450 }, .m1 = { .min = 2, .max = 3 }, .m2 = { .min = 11, .max = 156 }, .p = { .min = 10, .max = 30 }, .p1 = { .min = 2, .max = 3 }, .p2 = { .dot_limit = 270000, .p2_slow = 2, .p2_fast = 20 }, .find_pll = intel_vlv_find_best_pll, }; u32 intel_dpio_read(struct drm_i915_private *dev_priv, int reg) { u32 val = 0; sx_xlock(&dev_priv->dpio_lock); if (wait_for_atomic_us((I915_READ(DPIO_PKT) & DPIO_BUSY) == 0, 100)) { DRM_ERROR("DPIO idle wait timed out\n"); goto out_unlock; } I915_WRITE(DPIO_REG, reg); I915_WRITE(DPIO_PKT, DPIO_RID | DPIO_OP_READ | DPIO_PORTID | DPIO_BYTE); if (wait_for_atomic_us((I915_READ(DPIO_PKT) & DPIO_BUSY) == 0, 100)) { DRM_ERROR("DPIO read wait timed out\n"); goto out_unlock; } val = I915_READ(DPIO_DATA); out_unlock: sx_xunlock(&dev_priv->dpio_lock); return val; } static void intel_dpio_write(struct drm_i915_private *dev_priv, int reg, u32 val) { sx_xlock(&dev_priv->dpio_lock); if (wait_for_atomic_us((I915_READ(DPIO_PKT) & DPIO_BUSY) == 0, 100)) { DRM_ERROR("DPIO idle wait timed out\n"); goto out_unlock; } I915_WRITE(DPIO_DATA, val); I915_WRITE(DPIO_REG, reg); I915_WRITE(DPIO_PKT, DPIO_RID | DPIO_OP_WRITE | DPIO_PORTID | DPIO_BYTE); if (wait_for_atomic_us((I915_READ(DPIO_PKT) & DPIO_BUSY) == 0, 100)) DRM_ERROR("DPIO write wait timed out\n"); out_unlock: sx_xunlock(&dev_priv->dpio_lock); } static void vlv_init_dpio(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; /* Reset the DPIO config */ I915_WRITE(DPIO_CTL, 0); POSTING_READ(DPIO_CTL); I915_WRITE(DPIO_CTL, 1); POSTING_READ(DPIO_CTL); } static int intel_dual_link_lvds_callback(const struct dmi_system_id *id) { DRM_INFO("Forcing lvds to dual link mode on %s\n", id->ident); return 1; } static const struct dmi_system_id intel_dual_link_lvds[] = { { .callback = intel_dual_link_lvds_callback, .ident = "Apple MacBook Pro (Core i5/i7 Series)", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro8,2"), }, }, { } /* terminating entry */ }; static bool is_dual_link_lvds(struct drm_i915_private *dev_priv, unsigned int reg) { unsigned int val; /* use the module option value if specified */ if (i915_lvds_channel_mode > 0) return i915_lvds_channel_mode == 2; if (dmi_check_system(intel_dual_link_lvds)) return true; if (dev_priv->lvds_val) val = dev_priv->lvds_val; else { /* BIOS should set the proper LVDS register value at boot, but * in reality, it doesn't set the value when the lid is closed; * we need to check "the value to be set" in VBT when LVDS * register is uninitialized. */ val = I915_READ(reg); if (!(val & ~(LVDS_PIPE_MASK | LVDS_DETECTED))) val = dev_priv->bios_lvds_val; dev_priv->lvds_val = val; } return (val & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP; } static const intel_limit_t *intel_ironlake_limit(struct drm_crtc *crtc, int refclk) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; const intel_limit_t *limit; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { if (is_dual_link_lvds(dev_priv, PCH_LVDS)) { /* LVDS dual channel */ if (refclk == 100000) limit = &intel_limits_ironlake_dual_lvds_100m; else limit = &intel_limits_ironlake_dual_lvds; } else { if (refclk == 100000) limit = &intel_limits_ironlake_single_lvds_100m; else limit = &intel_limits_ironlake_single_lvds; } } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) || intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) limit = &intel_limits_ironlake_display_port; else limit = &intel_limits_ironlake_dac; return limit; } static const intel_limit_t *intel_g4x_limit(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; const intel_limit_t *limit; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { if (is_dual_link_lvds(dev_priv, LVDS)) /* LVDS with dual channel */ limit = &intel_limits_g4x_dual_channel_lvds; else /* LVDS with dual channel */ limit = &intel_limits_g4x_single_channel_lvds; } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI) || intel_pipe_has_type(crtc, INTEL_OUTPUT_ANALOG)) { limit = &intel_limits_g4x_hdmi; } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO)) { limit = &intel_limits_g4x_sdvo; } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) { limit = &intel_limits_g4x_display_port; } else /* The option is for other outputs */ limit = &intel_limits_i9xx_sdvo; return limit; } static const intel_limit_t *intel_limit(struct drm_crtc *crtc, int refclk) { struct drm_device *dev = crtc->dev; const intel_limit_t *limit; if (HAS_PCH_SPLIT(dev)) limit = intel_ironlake_limit(crtc, refclk); else if (IS_G4X(dev)) { limit = intel_g4x_limit(crtc); } else if (IS_PINEVIEW(dev)) { if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) limit = &intel_limits_pineview_lvds; else limit = &intel_limits_pineview_sdvo; } else if (IS_VALLEYVIEW(dev)) { if (intel_pipe_has_type(crtc, INTEL_OUTPUT_ANALOG)) limit = &intel_limits_vlv_dac; else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI)) limit = &intel_limits_vlv_hdmi; else limit = &intel_limits_vlv_dp; } else if (!IS_GEN2(dev)) { if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) limit = &intel_limits_i9xx_lvds; else limit = &intel_limits_i9xx_sdvo; } else { if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) limit = &intel_limits_i8xx_lvds; else limit = &intel_limits_i8xx_dvo; } return limit; } /* m1 is reserved as 0 in Pineview, n is a ring counter */ static void pineview_clock(int refclk, intel_clock_t *clock) { clock->m = clock->m2 + 2; clock->p = clock->p1 * clock->p2; clock->vco = refclk * clock->m / clock->n; clock->dot = clock->vco / clock->p; } static void intel_clock(struct drm_device *dev, int refclk, intel_clock_t *clock) { if (IS_PINEVIEW(dev)) { pineview_clock(refclk, clock); return; } clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2); clock->p = clock->p1 * clock->p2; clock->vco = refclk * clock->m / (clock->n + 2); clock->dot = clock->vco / clock->p; } /** * Returns whether any output on the specified pipe is of the specified type */ bool intel_pipe_has_type(struct drm_crtc *crtc, int type) { struct drm_device *dev = crtc->dev; struct intel_encoder *encoder; for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->type == type) return true; return false; } #define INTELPllInvalid(s) do { /* DRM_DEBUG(s); */ return false; } while (0) /** * Returns whether the given set of divisors are valid for a given refclk with * the given connectors. */ static bool intel_PLL_is_valid(struct drm_device *dev, const intel_limit_t *limit, const intel_clock_t *clock) { if (clock->p1 < limit->p1.min || limit->p1.max < clock->p1) INTELPllInvalid("p1 out of range\n"); if (clock->p < limit->p.min || limit->p.max < clock->p) INTELPllInvalid("p out of range\n"); if (clock->m2 < limit->m2.min || limit->m2.max < clock->m2) INTELPllInvalid("m2 out of range\n"); if (clock->m1 < limit->m1.min || limit->m1.max < clock->m1) INTELPllInvalid("m1 out of range\n"); if (clock->m1 <= clock->m2 && !IS_PINEVIEW(dev)) INTELPllInvalid("m1 <= m2\n"); if (clock->m < limit->m.min || limit->m.max < clock->m) INTELPllInvalid("m out of range\n"); if (clock->n < limit->n.min || limit->n.max < clock->n) INTELPllInvalid("n out of range\n"); if (clock->vco < limit->vco.min || limit->vco.max < clock->vco) INTELPllInvalid("vco out of range\n"); /* XXX: We may need to be checking "Dot clock" depending on the multiplier, * connector, etc., rather than just a single range. */ if (clock->dot < limit->dot.min || limit->dot.max < clock->dot) INTELPllInvalid("dot out of range\n"); return true; } static bool intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; intel_clock_t clock; int err = target; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) && (I915_READ(LVDS)) != 0) { /* * For LVDS, if the panel is on, just rely on its current * settings for dual-channel. We haven't figured out how to * reliably set up different single/dual channel state, if we * even can. */ if (is_dual_link_lvds(dev_priv, LVDS)) clock.p2 = limit->p2.p2_fast; else clock.p2 = limit->p2.p2_slow; } else { if (target < limit->p2.dot_limit) clock.p2 = limit->p2.p2_slow; else clock.p2 = limit->p2.p2_fast; } memset(best_clock, 0, sizeof(*best_clock)); for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) { for (clock.m2 = limit->m2.min; clock.m2 <= limit->m2.max; clock.m2++) { /* m1 is always 0 in Pineview */ if (clock.m2 >= clock.m1 && !IS_PINEVIEW(dev)) break; for (clock.n = limit->n.min; clock.n <= limit->n.max; clock.n++) { for (clock.p1 = limit->p1.min; clock.p1 <= limit->p1.max; clock.p1++) { int this_err; intel_clock(dev, refclk, &clock); if (!intel_PLL_is_valid(dev, limit, &clock)) continue; if (match_clock && clock.p != match_clock->p) continue; this_err = abs(clock.dot - target); if (this_err < err) { *best_clock = clock; err = this_err; } } } } } return (err != target); } static bool intel_g4x_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; intel_clock_t clock; int max_n; bool found; /* approximately equals target * 0.00585 */ int err_most = (target >> 8) + (target >> 9); found = false; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { int lvds_reg; if (HAS_PCH_SPLIT(dev)) lvds_reg = PCH_LVDS; else lvds_reg = LVDS; if ((I915_READ(lvds_reg) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) clock.p2 = limit->p2.p2_fast; else clock.p2 = limit->p2.p2_slow; } else { if (target < limit->p2.dot_limit) clock.p2 = limit->p2.p2_slow; else clock.p2 = limit->p2.p2_fast; } memset(best_clock, 0, sizeof(*best_clock)); max_n = limit->n.max; /* based on hardware requirement, prefer smaller n to precision */ for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) { /* based on hardware requirement, prefere larger m1,m2 */ for (clock.m1 = limit->m1.max; clock.m1 >= limit->m1.min; clock.m1--) { for (clock.m2 = limit->m2.max; clock.m2 >= limit->m2.min; clock.m2--) { for (clock.p1 = limit->p1.max; clock.p1 >= limit->p1.min; clock.p1--) { int this_err; intel_clock(dev, refclk, &clock); if (!intel_PLL_is_valid(dev, limit, &clock)) continue; if (match_clock && clock.p != match_clock->p) continue; this_err = abs(clock.dot - target); if (this_err < err_most) { *best_clock = clock; err_most = this_err; max_n = clock.n; found = true; } } } } } return found; } static bool intel_find_pll_ironlake_dp(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock) { struct drm_device *dev = crtc->dev; intel_clock_t clock; if (target < 200000) { clock.n = 1; clock.p1 = 2; clock.p2 = 10; clock.m1 = 12; clock.m2 = 9; } else { clock.n = 2; clock.p1 = 1; clock.p2 = 10; clock.m1 = 14; clock.m2 = 8; } intel_clock(dev, refclk, &clock); memcpy(best_clock, &clock, sizeof(intel_clock_t)); return true; } /* DisplayPort has only two frequencies, 162MHz and 270MHz */ static bool intel_find_pll_g4x_dp(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock) { intel_clock_t clock; if (target < 200000) { clock.p1 = 2; clock.p2 = 10; clock.n = 2; clock.m1 = 23; clock.m2 = 8; } else { clock.p1 = 1; clock.p2 = 10; clock.n = 1; clock.m1 = 14; clock.m2 = 2; } clock.m = 5 * (clock.m1 + 2) + (clock.m2 + 2); clock.p = (clock.p1 * clock.p2); clock.dot = 96000 * clock.m / (clock.n + 2) / clock.p; clock.vco = 0; memcpy(best_clock, &clock, sizeof(intel_clock_t)); return true; } static bool intel_vlv_find_best_pll(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock) { u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2; u32 m, n, fastclk; u32 updrate, minupdate, fracbits, p; unsigned long bestppm, ppm, absppm; int dotclk, flag; flag = 0; dotclk = target * 1000; bestppm = 1000000; ppm = absppm = 0; fastclk = dotclk / (2*100); updrate = 0; minupdate = 19200; fracbits = 1; n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0; bestm1 = bestm2 = bestp1 = bestp2 = 0; /* based on hardware requirement, prefer smaller n to precision */ for (n = limit->n.min; n <= ((refclk) / minupdate); n++) { updrate = refclk / n; for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) { for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) { if (p2 > 10) p2 = p2 - 1; p = p1 * p2; /* based on hardware requirement, prefer bigger m1,m2 values */ for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) { m2 = (((2*(fastclk * p * n / m1 )) + refclk) / (2*refclk)); m = m1 * m2; vco = updrate * m; if (vco >= limit->vco.min && vco < limit->vco.max) { ppm = 1000000 * ((vco / p) - fastclk) / fastclk; absppm = (ppm > 0) ? ppm : (-ppm); if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) { bestppm = 0; flag = 1; } if (absppm < bestppm - 10) { bestppm = absppm; flag = 1; } if (flag) { bestn = n; bestm1 = m1; bestm2 = m2; bestp1 = p1; bestp2 = p2; flag = 0; } } } } } } best_clock->n = bestn; best_clock->m1 = bestm1; best_clock->m2 = bestm2; best_clock->p1 = bestp1; best_clock->p2 = bestp2; return true; } enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); return intel_crtc->cpu_transcoder; } static void ironlake_wait_for_vblank(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = dev->dev_private; u32 frame, frame_reg = PIPEFRAME(pipe); frame = I915_READ(frame_reg); if (wait_for(I915_READ_NOTRACE(frame_reg) != frame, 50)) DRM_DEBUG_KMS("vblank wait timed out\n"); } /** * intel_wait_for_vblank - wait for vblank on a given pipe * @dev: drm device * @pipe: pipe to wait for * * Wait for vblank to occur on a given pipe. Needed for various bits of * mode setting code. */ void intel_wait_for_vblank(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = dev->dev_private; int pipestat_reg = PIPESTAT(pipe); if (INTEL_INFO(dev)->gen >= 5) { ironlake_wait_for_vblank(dev, pipe); return; } /* Clear existing vblank status. Note this will clear any other * sticky status fields as well. * * This races with i915_driver_irq_handler() with the result * that either function could miss a vblank event. Here it is not * fatal, as we will either wait upon the next vblank interrupt or * timeout. Generally speaking intel_wait_for_vblank() is only * called during modeset at which time the GPU should be idle and * should *not* be performing page flips and thus not waiting on * vblanks... * Currently, the result of us stealing a vblank from the irq * handler is that a single frame will be skipped during swapbuffers. */ I915_WRITE(pipestat_reg, I915_READ(pipestat_reg) | PIPE_VBLANK_INTERRUPT_STATUS); /* Wait for vblank interrupt bit to set */ if (wait_for(I915_READ(pipestat_reg) & PIPE_VBLANK_INTERRUPT_STATUS, 50)) DRM_DEBUG_KMS("vblank wait timed out\n"); } /* * intel_wait_for_pipe_off - wait for pipe to turn off * @dev: drm device * @pipe: pipe to wait for * * After disabling a pipe, we can't wait for vblank in the usual way, * spinning on the vblank interrupt status bit, since we won't actually * see an interrupt when the pipe is disabled. * * On Gen4 and above: * wait for the pipe register state bit to turn off * * Otherwise: * wait for the display line value to settle (it usually * ends up stopping at the start of the next frame). * */ void intel_wait_for_pipe_off(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = dev->dev_private; enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); if (INTEL_INFO(dev)->gen >= 4) { int reg = PIPECONF(cpu_transcoder); /* Wait for the Pipe State to go off */ if (wait_for((I915_READ(reg) & I965_PIPECONF_ACTIVE) == 0, 100)) WARN(1, "pipe_off wait timed out\n"); } else { u32 last_line, line_mask; int reg = PIPEDSL(pipe); unsigned long timeout = jiffies + msecs_to_jiffies(100); if (IS_GEN2(dev)) line_mask = DSL_LINEMASK_GEN2; else line_mask = DSL_LINEMASK_GEN3; /* Wait for the display line to settle */ do { last_line = I915_READ(reg) & line_mask; mdelay(5); } while (((I915_READ(reg) & line_mask) != last_line) && time_after(timeout, jiffies)); if (time_after(jiffies, timeout)) WARN(1, "pipe_off wait timed out\n"); } } static const char *state_string(bool enabled) { return enabled ? "on" : "off"; } /* Only for pre-ILK configs */ static void assert_pll(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { int reg; u32 val; bool cur_state; reg = DPLL(pipe); val = I915_READ(reg); cur_state = !!(val & DPLL_VCO_ENABLE); WARN(cur_state != state, "PLL state assertion failure (expected %s, current %s)\n", state_string(state), state_string(cur_state)); } #define assert_pll_enabled(d, p) assert_pll(d, p, true) #define assert_pll_disabled(d, p) assert_pll(d, p, false) /* For ILK+ */ static void assert_pch_pll(struct drm_i915_private *dev_priv, struct intel_pch_pll *pll, struct intel_crtc *crtc, bool state) { u32 val; bool cur_state; if (HAS_PCH_LPT(dev_priv->dev)) { DRM_DEBUG_DRIVER("LPT detected: skipping PCH PLL test\n"); return; } if (WARN (!pll, "asserting PCH PLL %s with no PLL\n", state_string(state))) return; val = I915_READ(pll->pll_reg); cur_state = !!(val & DPLL_VCO_ENABLE); WARN(cur_state != state, "PCH PLL state for reg %x assertion failure (expected %s, current %s), val=%08x\n", pll->pll_reg, state_string(state), state_string(cur_state), val); /* Make sure the selected PLL is correctly attached to the transcoder */ if (crtc && HAS_PCH_CPT(dev_priv->dev)) { u32 pch_dpll; pch_dpll = I915_READ(PCH_DPLL_SEL); cur_state = pll->pll_reg == _PCH_DPLL_B; if (!WARN(((pch_dpll >> (4 * crtc->pipe)) & 1) != cur_state, "PLL[%d] not attached to this transcoder %d: %08x\n", cur_state, crtc->pipe, pch_dpll)) { cur_state = !!(val >> (4*crtc->pipe + 3)); WARN(cur_state != state, "PLL[%d] not %s on this transcoder %d: %08x\n", pll->pll_reg == _PCH_DPLL_B, state_string(state), crtc->pipe, val); } } } #define assert_pch_pll_enabled(d, p, c) assert_pch_pll(d, p, c, true) #define assert_pch_pll_disabled(d, p, c) assert_pch_pll(d, p, c, false) static void assert_fdi_tx(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { int reg; u32 val; bool cur_state; enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); if (IS_HASWELL(dev_priv->dev)) { /* On Haswell, DDI is used instead of FDI_TX_CTL */ reg = TRANS_DDI_FUNC_CTL(cpu_transcoder); val = I915_READ(reg); cur_state = !!(val & TRANS_DDI_FUNC_ENABLE); } else { reg = FDI_TX_CTL(pipe); val = I915_READ(reg); cur_state = !!(val & FDI_TX_ENABLE); } WARN(cur_state != state, "FDI TX state assertion failure (expected %s, current %s)\n", state_string(state), state_string(cur_state)); } #define assert_fdi_tx_enabled(d, p) assert_fdi_tx(d, p, true) #define assert_fdi_tx_disabled(d, p) assert_fdi_tx(d, p, false) static void assert_fdi_rx(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { int reg; u32 val; bool cur_state; reg = FDI_RX_CTL(pipe); val = I915_READ(reg); cur_state = !!(val & FDI_RX_ENABLE); WARN(cur_state != state, "FDI RX state assertion failure (expected %s, current %s)\n", state_string(state), state_string(cur_state)); } #define assert_fdi_rx_enabled(d, p) assert_fdi_rx(d, p, true) #define assert_fdi_rx_disabled(d, p) assert_fdi_rx(d, p, false) static void assert_fdi_tx_pll_enabled(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg; u32 val; /* ILK FDI PLL is always enabled */ if (dev_priv->info->gen == 5) return; /* On Haswell, DDI ports are responsible for the FDI PLL setup */ if (IS_HASWELL(dev_priv->dev)) return; reg = FDI_TX_CTL(pipe); val = I915_READ(reg); WARN(!(val & FDI_TX_PLL_ENABLE), "FDI TX PLL assertion failure, should be active but is disabled\n"); } static void assert_fdi_rx_pll_enabled(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg; u32 val; reg = FDI_RX_CTL(pipe); val = I915_READ(reg); WARN(!(val & FDI_RX_PLL_ENABLE), "FDI RX PLL assertion failure, should be active but is disabled\n"); } static void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) { int pp_reg, lvds_reg; u32 val; enum pipe panel_pipe = PIPE_A; bool locked = true; if (HAS_PCH_SPLIT(dev_priv->dev)) { pp_reg = PCH_PP_CONTROL; lvds_reg = PCH_LVDS; } else { pp_reg = PP_CONTROL; lvds_reg = LVDS; } val = I915_READ(pp_reg); if (!(val & PANEL_POWER_ON) || ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS)) locked = false; if (I915_READ(lvds_reg) & LVDS_PIPEB_SELECT) panel_pipe = PIPE_B; WARN(panel_pipe == pipe && locked, "panel assertion failure, pipe %c regs locked\n", pipe_name(pipe)); } void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { int reg; u32 val; bool cur_state; enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); /* if we need the pipe A quirk it must be always on */ if (pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) state = true; reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); cur_state = !!(val & PIPECONF_ENABLE); WARN(cur_state != state, "pipe %c assertion failure (expected %s, current %s)\n", pipe_name(pipe), state_string(state), state_string(cur_state)); } static void assert_plane(struct drm_i915_private *dev_priv, enum plane plane, bool state) { int reg; u32 val; bool cur_state; reg = DSPCNTR(plane); val = I915_READ(reg); cur_state = !!(val & DISPLAY_PLANE_ENABLE); WARN(cur_state != state, "plane %c assertion failure (expected %s, current %s)\n", plane_name(plane), state_string(state), state_string(cur_state)); } #define assert_plane_enabled(d, p) assert_plane(d, p, true) #define assert_plane_disabled(d, p) assert_plane(d, p, false) static void assert_planes_disabled(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg, i; u32 val; int cur_pipe; /* Planes are fixed to pipes on ILK+ */ if (HAS_PCH_SPLIT(dev_priv->dev)) { reg = DSPCNTR(pipe); val = I915_READ(reg); WARN((val & DISPLAY_PLANE_ENABLE), "plane %c assertion failure, should be disabled but not\n", plane_name(pipe)); return; } /* Need to check both planes against the pipe */ for (i = 0; i < 2; i++) { reg = DSPCNTR(i); val = I915_READ(reg); cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> DISPPLANE_SEL_PIPE_SHIFT; WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe, "plane %c assertion failure, should be off on pipe %c but is still active\n", plane_name(i), pipe_name(pipe)); } } static void assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) { u32 val; bool enabled; if (HAS_PCH_LPT(dev_priv->dev)) { DRM_DEBUG_DRIVER("LPT does not has PCH refclk, skipping check\n"); return; } val = I915_READ(PCH_DREF_CONTROL); enabled = !!(val & (DREF_SSC_SOURCE_MASK | DREF_NONSPREAD_SOURCE_MASK | DREF_SUPERSPREAD_SOURCE_MASK)); WARN(!enabled, "PCH refclk assertion failure, should be active but is disabled\n"); } static void assert_transcoder_disabled(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg; u32 val; bool enabled; reg = TRANSCONF(pipe); val = I915_READ(reg); enabled = !!(val & TRANS_ENABLE); WARN(enabled, "transcoder assertion failed, should be off on pipe %c but is still active\n", pipe_name(pipe)); } static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, u32 port_sel, u32 val) { if ((val & DP_PORT_EN) == 0) return false; if (HAS_PCH_CPT(dev_priv->dev)) { u32 trans_dp_ctl_reg = TRANS_DP_CTL(pipe); u32 trans_dp_ctl = I915_READ(trans_dp_ctl_reg); if ((trans_dp_ctl & TRANS_DP_PORT_SEL_MASK) != port_sel) return false; } else { if ((val & DP_PIPE_MASK) != (pipe << 30)) return false; } return true; } static bool hdmi_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, u32 val) { if ((val & PORT_ENABLE) == 0) return false; if (HAS_PCH_CPT(dev_priv->dev)) { if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) return false; } else { if ((val & TRANSCODER_MASK) != TRANSCODER(pipe)) return false; } return true; } static bool lvds_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, u32 val) { if ((val & LVDS_PORT_EN) == 0) return false; if (HAS_PCH_CPT(dev_priv->dev)) { if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) return false; } else { if ((val & LVDS_PIPE_MASK) != LVDS_PIPE(pipe)) return false; } return true; } static bool adpa_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, u32 val) { if ((val & ADPA_DAC_ENABLE) == 0) return false; if (HAS_PCH_CPT(dev_priv->dev)) { if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) return false; } else { if ((val & ADPA_PIPE_SELECT_MASK) != ADPA_PIPE_SELECT(pipe)) return false; } return true; } static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 port_sel) { u32 val = I915_READ(reg); WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val), "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", reg, pipe_name(pipe)); WARN(HAS_PCH_IBX(dev_priv->dev) && (val & DP_PORT_EN) == 0 && (val & DP_PIPEB_SELECT), "IBX PCH dp port still using transcoder B\n"); } static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) { u32 val = I915_READ(reg); WARN(hdmi_pipe_enabled(dev_priv, pipe, val), "PCH HDMI (0x%08x) enabled on transcoder %c, should be disabled\n", reg, pipe_name(pipe)); WARN(HAS_PCH_IBX(dev_priv->dev) && (val & PORT_ENABLE) == 0 && (val & SDVO_PIPE_B_SELECT), "IBX PCH hdmi port still using transcoder B\n"); } static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg; u32 val; assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_B, TRANS_DP_PORT_SEL_B); assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_C, TRANS_DP_PORT_SEL_C); assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_D, TRANS_DP_PORT_SEL_D); reg = PCH_ADPA; val = I915_READ(reg); WARN(adpa_pipe_enabled(dev_priv, pipe, val), "PCH VGA enabled on transcoder %c, should be disabled\n", pipe_name(pipe)); reg = PCH_LVDS; val = I915_READ(reg); WARN(lvds_pipe_enabled(dev_priv, pipe, val), "PCH LVDS enabled on transcoder %c, should be disabled\n", pipe_name(pipe)); assert_pch_hdmi_disabled(dev_priv, pipe, HDMIB); assert_pch_hdmi_disabled(dev_priv, pipe, HDMIC); assert_pch_hdmi_disabled(dev_priv, pipe, HDMID); } /** * intel_enable_pll - enable a PLL * @dev_priv: i915 private structure * @pipe: pipe PLL to enable * * Enable @pipe's PLL so we can start pumping pixels from a plane. Check to * make sure the PLL reg is writable first though, since the panel write * protect mechanism may be enabled. * * Note! This is for pre-ILK only. * * Unfortunately needed by dvo_ns2501 since the dvo depends on it running. */ static void intel_enable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg; u32 val; /* No really, not for ILK+ */ BUG_ON(!IS_VALLEYVIEW(dev_priv->dev) && dev_priv->info->gen >= 5); /* PLL is protected by panel, make sure we can write it */ if (IS_MOBILE(dev_priv->dev) && !IS_I830(dev_priv->dev)) assert_panel_unlocked(dev_priv, pipe); reg = DPLL(pipe); val = I915_READ(reg); val |= DPLL_VCO_ENABLE; /* We do this three times for luck */ I915_WRITE(reg, val); POSTING_READ(reg); udelay(150); /* wait for warmup */ I915_WRITE(reg, val); POSTING_READ(reg); udelay(150); /* wait for warmup */ I915_WRITE(reg, val); POSTING_READ(reg); udelay(150); /* wait for warmup */ } /** * intel_disable_pll - disable a PLL * @dev_priv: i915 private structure * @pipe: pipe PLL to disable * * Disable the PLL for @pipe, making sure the pipe is off first. * * Note! This is for pre-ILK only. */ static void intel_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) { int reg; u32 val; /* Don't disable pipe A or pipe A PLLs if needed */ if (pipe == PIPE_A && (dev_priv->quirks & QUIRK_PIPEA_FORCE)) return; /* Make sure the pipe isn't still relying on us */ assert_pipe_disabled(dev_priv, pipe); reg = DPLL(pipe); val = I915_READ(reg); val &= ~DPLL_VCO_ENABLE; I915_WRITE(reg, val); POSTING_READ(reg); } /* SBI access */ static void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, enum intel_sbi_destination destination) { u32 tmp; sx_xlock(&dev_priv->dpio_lock); if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0, 100)) { DRM_ERROR("timeout waiting for SBI to become ready\n"); goto out_unlock; } I915_WRITE(SBI_ADDR, (reg << 16)); I915_WRITE(SBI_DATA, value); if (destination == SBI_ICLK) tmp = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRWR; else tmp = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IOWR; I915_WRITE(SBI_CTL_STAT, SBI_BUSY | tmp); if (wait_for((I915_READ(SBI_CTL_STAT) & (SBI_BUSY | SBI_RESPONSE_FAIL)) == 0, 100)) { DRM_ERROR("timeout waiting for SBI to complete write transaction\n"); goto out_unlock; } out_unlock: sx_xunlock(&dev_priv->dpio_lock); } static u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, enum intel_sbi_destination destination) { u32 value = 0; sx_xlock(&dev_priv->dpio_lock); if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0, 100)) { DRM_ERROR("timeout waiting for SBI to become ready\n"); goto out_unlock; } I915_WRITE(SBI_ADDR, (reg << 16)); if (destination == SBI_ICLK) value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD; else value = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD; I915_WRITE(SBI_CTL_STAT, value | SBI_BUSY); if (wait_for((I915_READ(SBI_CTL_STAT) & (SBI_BUSY | SBI_RESPONSE_FAIL)) == 0, 100)) { DRM_ERROR("timeout waiting for SBI to complete read transaction\n"); goto out_unlock; } value = I915_READ(SBI_DATA); out_unlock: sx_xunlock(&dev_priv->dpio_lock); return value; } /** * ironlake_enable_pch_pll - enable PCH PLL * @dev_priv: i915 private structure * @pipe: pipe PLL to enable * * The PCH PLL needs to be enabled before the PCH transcoder, since it * drives the transcoder clock. */ static void ironlake_enable_pch_pll(struct intel_crtc *intel_crtc) { struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private; struct intel_pch_pll *pll; int reg; u32 val; /* PCH PLLs only available on ILK, SNB and IVB */ BUG_ON(dev_priv->info->gen < 5); pll = intel_crtc->pch_pll; if (pll == NULL) return; if (WARN_ON(pll->refcount == 0)) return; DRM_DEBUG_KMS("enable PCH PLL %x (active %d, on? %d)for crtc %d\n", pll->pll_reg, pll->active, pll->on, intel_crtc->base.base.id); /* PCH refclock must be enabled first */ assert_pch_refclk_enabled(dev_priv); if (pll->active++ && pll->on) { assert_pch_pll_enabled(dev_priv, pll, NULL); return; } DRM_DEBUG_KMS("enabling PCH PLL %x\n", pll->pll_reg); reg = pll->pll_reg; val = I915_READ(reg); val |= DPLL_VCO_ENABLE; I915_WRITE(reg, val); POSTING_READ(reg); udelay(200); pll->on = true; } static void intel_disable_pch_pll(struct intel_crtc *intel_crtc) { struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private; struct intel_pch_pll *pll = intel_crtc->pch_pll; int reg; u32 val; /* PCH only available on ILK+ */ BUG_ON(dev_priv->info->gen < 5); if (pll == NULL) return; if (WARN_ON(pll->refcount == 0)) return; DRM_DEBUG_KMS("disable PCH PLL %x (active %d, on? %d) for crtc %d\n", pll->pll_reg, pll->active, pll->on, intel_crtc->base.base.id); if (WARN_ON(pll->active == 0)) { assert_pch_pll_disabled(dev_priv, pll, NULL); return; } if (--pll->active) { assert_pch_pll_enabled(dev_priv, pll, NULL); return; } DRM_DEBUG_KMS("disabling PCH PLL %x\n", pll->pll_reg); /* Make sure transcoder isn't still depending on us */ assert_transcoder_disabled(dev_priv, intel_crtc->pipe); reg = pll->pll_reg; val = I915_READ(reg); val &= ~DPLL_VCO_ENABLE; I915_WRITE(reg, val); POSTING_READ(reg); udelay(200); pll->on = false; } static void ironlake_enable_pch_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { struct drm_device *dev = dev_priv->dev; struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; uint32_t reg, val, pipeconf_val; /* PCH only available on ILK+ */ BUG_ON(dev_priv->info->gen < 5); /* Make sure PCH DPLL is enabled */ assert_pch_pll_enabled(dev_priv, to_intel_crtc(crtc)->pch_pll, to_intel_crtc(crtc)); /* FDI must be feeding us bits for PCH ports */ assert_fdi_tx_enabled(dev_priv, pipe); assert_fdi_rx_enabled(dev_priv, pipe); if (HAS_PCH_CPT(dev)) { /* Workaround: Set the timing override bit before enabling the * pch transcoder. */ reg = TRANS_CHICKEN2(pipe); val = I915_READ(reg); val |= TRANS_CHICKEN2_TIMING_OVERRIDE; I915_WRITE(reg, val); } reg = TRANSCONF(pipe); val = I915_READ(reg); pipeconf_val = I915_READ(PIPECONF(pipe)); if (HAS_PCH_IBX(dev_priv->dev)) { /* * make the BPC in transcoder be consistent with * that in pipeconf reg. */ val &= ~PIPE_BPC_MASK; val |= pipeconf_val & PIPE_BPC_MASK; } val &= ~TRANS_INTERLACE_MASK; if ((pipeconf_val & PIPECONF_INTERLACE_MASK) == PIPECONF_INTERLACED_ILK) if (HAS_PCH_IBX(dev_priv->dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO)) val |= TRANS_LEGACY_INTERLACED_ILK; else val |= TRANS_INTERLACED; else val |= TRANS_PROGRESSIVE; I915_WRITE(reg, val | TRANS_ENABLE); if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100)) DRM_ERROR("failed to enable transcoder %d\n", pipe); } static void lpt_enable_pch_transcoder(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder) { u32 val, pipeconf_val; /* PCH only available on ILK+ */ BUG_ON(dev_priv->info->gen < 5); /* FDI must be feeding us bits for PCH ports */ assert_fdi_tx_enabled(dev_priv, (enum pipe)cpu_transcoder); assert_fdi_rx_enabled(dev_priv, (enum pipe)TRANSCODER_A); /* Workaround: set timing override bit. */ val = I915_READ(_TRANSA_CHICKEN2); val |= TRANS_CHICKEN2_TIMING_OVERRIDE; I915_WRITE(_TRANSA_CHICKEN2, val); val = TRANS_ENABLE; pipeconf_val = I915_READ(PIPECONF(cpu_transcoder)); if ((pipeconf_val & PIPECONF_INTERLACE_MASK_HSW) == PIPECONF_INTERLACED_ILK) val |= TRANS_INTERLACED; else val |= TRANS_PROGRESSIVE; I915_WRITE(TRANSCONF(TRANSCODER_A), val); if (wait_for(I915_READ(_TRANSACONF) & TRANS_STATE_ENABLE, 100)) DRM_ERROR("Failed to enable PCH transcoder\n"); } static void ironlake_disable_pch_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { struct drm_device *dev = dev_priv->dev; uint32_t reg, val; /* FDI relies on the transcoder */ assert_fdi_tx_disabled(dev_priv, pipe); assert_fdi_rx_disabled(dev_priv, pipe); /* Ports must be off as well */ assert_pch_ports_disabled(dev_priv, pipe); reg = TRANSCONF(pipe); val = I915_READ(reg); val &= ~TRANS_ENABLE; I915_WRITE(reg, val); /* wait for PCH transcoder off, transcoder state */ if (wait_for((I915_READ(reg) & TRANS_STATE_ENABLE) == 0, 50)) DRM_ERROR("failed to disable transcoder %d\n", pipe); if (!HAS_PCH_IBX(dev)) { /* Workaround: Clear the timing override chicken bit again. */ reg = TRANS_CHICKEN2(pipe); val = I915_READ(reg); val &= ~TRANS_CHICKEN2_TIMING_OVERRIDE; I915_WRITE(reg, val); } } static void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv) { u32 val; val = I915_READ(_TRANSACONF); val &= ~TRANS_ENABLE; I915_WRITE(_TRANSACONF, val); /* wait for PCH transcoder off, transcoder state */ if (wait_for((I915_READ(_TRANSACONF) & TRANS_STATE_ENABLE) == 0, 50)) DRM_ERROR("Failed to disable PCH transcoder\n"); /* Workaround: clear timing override bit. */ val = I915_READ(_TRANSA_CHICKEN2); val &= ~TRANS_CHICKEN2_TIMING_OVERRIDE; I915_WRITE(_TRANSA_CHICKEN2, val); } /** * intel_enable_pipe - enable a pipe, asserting requirements * @dev_priv: i915 private structure * @pipe: pipe to enable * @pch_port: on ILK+, is this pipe driving a PCH port or not * * Enable @pipe, making sure that various hardware specific requirements * are met, if applicable, e.g. PLL enabled, LVDS pairs enabled, etc. * * @pipe should be %PIPE_A or %PIPE_B. * * Will wait until the pipe is actually running (i.e. first vblank) before * returning. */ static void intel_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool pch_port) { enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); enum transcoder pch_transcoder; int reg; u32 val; if (IS_HASWELL(dev_priv->dev)) pch_transcoder = TRANSCODER_A; else pch_transcoder = (enum transcoder)pipe; /* * A pipe without a PLL won't actually be able to drive bits from * a plane. On ILK+ the pipe PLLs are integrated, so we don't * need the check. */ if (!HAS_PCH_SPLIT(dev_priv->dev)) assert_pll_enabled(dev_priv, pipe); else { if (pch_port) { /* if driving the PCH, we need FDI enabled */ assert_fdi_rx_pll_enabled(dev_priv, (enum pipe)pch_transcoder); assert_fdi_tx_pll_enabled(dev_priv, (enum pipe)cpu_transcoder); } /* FIXME: assert CPU port conditions for SNB+ */ } reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); if (val & PIPECONF_ENABLE) return; I915_WRITE(reg, val | PIPECONF_ENABLE); intel_wait_for_vblank(dev_priv->dev, pipe); } /** * intel_disable_pipe - disable a pipe, asserting requirements * @dev_priv: i915 private structure * @pipe: pipe to disable * * Disable @pipe, making sure that various hardware specific requirements * are met, if applicable, e.g. plane disabled, panel fitter off, etc. * * @pipe should be %PIPE_A or %PIPE_B. * * Will wait until the pipe has shut down before returning. */ static void intel_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); int reg; u32 val; /* * Make sure planes won't keep trying to pump pixels to us, * or we might hang the display. */ assert_planes_disabled(dev_priv, pipe); /* Don't disable pipe A or pipe A PLLs if needed */ if (pipe == PIPE_A && (dev_priv->quirks & QUIRK_PIPEA_FORCE)) return; reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); if ((val & PIPECONF_ENABLE) == 0) return; I915_WRITE(reg, val & ~PIPECONF_ENABLE); intel_wait_for_pipe_off(dev_priv->dev, pipe); } /* * Plane regs are double buffered, going from enabled->disabled needs a * trigger in order to latch. The display address reg provides this. */ void intel_flush_display_plane(struct drm_i915_private *dev_priv, enum plane plane) { if (dev_priv->info->gen >= 4) I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane))); else I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane))); } /** * intel_enable_plane - enable a display plane on a given pipe * @dev_priv: i915 private structure * @plane: plane to enable * @pipe: pipe being fed * * Enable @plane on @pipe, making sure that @pipe is running first. */ static void intel_enable_plane(struct drm_i915_private *dev_priv, enum plane plane, enum pipe pipe) { int reg; u32 val; /* If the pipe isn't enabled, we can't pump pixels and may hang */ assert_pipe_enabled(dev_priv, pipe); reg = DSPCNTR(plane); val = I915_READ(reg); if (val & DISPLAY_PLANE_ENABLE) return; I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); intel_flush_display_plane(dev_priv, plane); intel_wait_for_vblank(dev_priv->dev, pipe); } /** * intel_disable_plane - disable a display plane * @dev_priv: i915 private structure * @plane: plane to disable * @pipe: pipe consuming the data * * Disable @plane; should be an independent operation. */ static void intel_disable_plane(struct drm_i915_private *dev_priv, enum plane plane, enum pipe pipe) { int reg; u32 val; reg = DSPCNTR(plane); val = I915_READ(reg); if ((val & DISPLAY_PLANE_ENABLE) == 0) return; I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE); intel_flush_display_plane(dev_priv, plane); intel_wait_for_vblank(dev_priv->dev, pipe); } int intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_i915_gem_object *obj, struct intel_ring_buffer *pipelined) { struct drm_i915_private *dev_priv = dev->dev_private; u32 alignment; int ret; switch (obj->tiling_mode) { case I915_TILING_NONE: if (IS_BROADWATER(dev) || IS_CRESTLINE(dev)) alignment = 128 * 1024; else if (INTEL_INFO(dev)->gen >= 4) alignment = 4 * 1024; else alignment = 64 * 1024; break; case I915_TILING_X: /* pin() will align the object as required by fence */ alignment = 0; break; case I915_TILING_Y: /* FIXME: Is this true? */ DRM_ERROR("Y tiled not allowed for scan out buffers\n"); return -EINVAL; default: BUG(); } dev_priv->mm.interruptible = false; ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined); if (ret) goto err_interruptible; /* Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using * framebuffer compression. For simplicity, we always install * a fence as the cost is not that onerous. */ ret = i915_gem_object_get_fence(obj); if (ret) goto err_unpin; i915_gem_object_pin_fence(obj); dev_priv->mm.interruptible = true; return 0; err_unpin: i915_gem_object_unpin(obj); err_interruptible: dev_priv->mm.interruptible = true; return ret; } void intel_unpin_fb_obj(struct drm_i915_gem_object *obj) { i915_gem_object_unpin_fence(obj); i915_gem_object_unpin(obj); } /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel * is assumed to be a power-of-two. */ unsigned long intel_gen4_compute_page_offset(int *x, int *y, unsigned int tiling_mode, unsigned int cpp, unsigned int pitch) { if (tiling_mode != I915_TILING_NONE) { unsigned int tile_rows, tiles; tile_rows = *y / 8; *y %= 8; tiles = *x / (512/cpp); *x %= 512/cpp; return tile_rows * pitch * 8 + tiles * 4096; } else { unsigned int offset; offset = *y * pitch + *x * cpp; *y = 0; *x = (offset & 4095) / cpp; return offset & -4096; } } static int i9xx_update_plane(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_framebuffer *intel_fb; struct drm_i915_gem_object *obj; int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; u32 reg; switch (plane) { case 0: case 1: break; default: DRM_ERROR("Can't update plane %d in SAREA\n", plane); return -EINVAL; } intel_fb = to_intel_framebuffer(fb); obj = intel_fb->obj; reg = DSPCNTR(plane); dspcntr = I915_READ(reg); /* Mask out pixel format bits in case we change it */ dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; break; case DRM_FORMAT_XRGB1555: case DRM_FORMAT_ARGB1555: dspcntr |= DISPPLANE_BGRX555; break; case DRM_FORMAT_RGB565: dspcntr |= DISPPLANE_BGRX565; break; case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: dspcntr |= DISPPLANE_BGRX888; break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: dspcntr |= DISPPLANE_RGBX888; break; case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_ARGB2101010: dspcntr |= DISPPLANE_BGRX101010; break; case DRM_FORMAT_XBGR2101010: case DRM_FORMAT_ABGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; default: DRM_ERROR("Unknown pixel format 0x%08x\n", fb->pixel_format); return -EINVAL; } if (INTEL_INFO(dev)->gen >= 4) { if (obj->tiling_mode != I915_TILING_NONE) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; } I915_WRITE(reg, dspcntr); linear_offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8); if (INTEL_INFO(dev)->gen >= 4) { intel_crtc->dspaddr_offset = intel_gen4_compute_page_offset(&x, &y, obj->tiling_mode, fb->bits_per_pixel / 8, fb->pitches[0]); linear_offset -= intel_crtc->dspaddr_offset; } else { intel_crtc->dspaddr_offset = linear_offset; } DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n", obj->gtt_offset, linear_offset, x, y, fb->pitches[0]); I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); if (INTEL_INFO(dev)->gen >= 4) { I915_MODIFY_DISPBASE(DSPSURF(plane), obj->gtt_offset + intel_crtc->dspaddr_offset); I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); } else I915_WRITE(DSPADDR(plane), obj->gtt_offset + linear_offset); POSTING_READ(reg); return 0; } static int ironlake_update_plane(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_framebuffer *intel_fb; struct drm_i915_gem_object *obj; int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; u32 reg; switch (plane) { case 0: case 1: case 2: break; default: DRM_ERROR("Can't update plane %d in SAREA\n", plane); return -EINVAL; } intel_fb = to_intel_framebuffer(fb); obj = intel_fb->obj; reg = DSPCNTR(plane); dspcntr = I915_READ(reg); /* Mask out pixel format bits in case we change it */ dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; break; case DRM_FORMAT_RGB565: dspcntr |= DISPPLANE_BGRX565; break; case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: dspcntr |= DISPPLANE_BGRX888; break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: dspcntr |= DISPPLANE_RGBX888; break; case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_ARGB2101010: dspcntr |= DISPPLANE_BGRX101010; break; case DRM_FORMAT_XBGR2101010: case DRM_FORMAT_ABGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; default: DRM_ERROR("Unknown pixel format 0x%08x\n", fb->pixel_format); return -EINVAL; } if (obj->tiling_mode != I915_TILING_NONE) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; /* must disable */ dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; I915_WRITE(reg, dspcntr); linear_offset = y * fb->pitches[0] + x * (fb->bits_per_pixel / 8); intel_crtc->dspaddr_offset = intel_gen4_compute_page_offset(&x, &y, obj->tiling_mode, fb->bits_per_pixel / 8, fb->pitches[0]); linear_offset -= intel_crtc->dspaddr_offset; DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n", obj->gtt_offset, linear_offset, x, y, fb->pitches[0]); I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); I915_MODIFY_DISPBASE(DSPSURF(plane), obj->gtt_offset + intel_crtc->dspaddr_offset); if (IS_HASWELL(dev)) { I915_WRITE(DSPOFFSET(plane), (y << 16) | x); } else { I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); } POSTING_READ(reg); return 0; } /* Assume fb object is pinned & idle & fenced and just update base pointers */ static int intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; if (dev_priv->display.disable_fbc) dev_priv->display.disable_fbc(dev); intel_increase_pllclock(crtc); return dev_priv->display.update_plane(crtc, fb, x, y); } static int intel_finish_fb(struct drm_framebuffer *old_fb) { struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj; struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = obj->base.dev->dev_private; bool was_interruptible = dev_priv->mm.interruptible; int ret; mtx_lock(&dev->event_lock); while (!(atomic_read(&dev_priv->mm.wedged) || atomic_read(&obj->pending_flip) == 0)) { msleep(&dev_priv->pending_flip_queue, &dev->event_lock, 0, "915flp", 0); } mtx_unlock(&dev->event_lock); /* Big Hammer, we also need to ensure that any pending * MI_WAIT_FOR_EVENT inside a user batch buffer on the * current scanout is retired before unpinning the old * framebuffer. * * This should only fail upon a hung GPU, in which case we * can safely continue. */ dev_priv->mm.interruptible = false; ret = i915_gem_object_finish_gpu(obj); dev_priv->mm.interruptible = was_interruptible; return ret; } static void intel_crtc_update_sarea_pos(struct drm_crtc *crtc, int x, int y) { struct drm_device *dev = crtc->dev; struct drm_i915_master_private *master_priv; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); if (!dev->primary->master) return; master_priv = dev->primary->master->driver_priv; if (!master_priv->sarea_priv) return; switch (intel_crtc->pipe) { case 0: master_priv->sarea_priv->pipeA_x = x; master_priv->sarea_priv->pipeA_y = y; break; case 1: master_priv->sarea_priv->pipeB_x = x; master_priv->sarea_priv->pipeB_y = y; break; default: break; } } static int intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *fb) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_framebuffer *old_fb; int ret; /* no fb bound */ if (!fb) { DRM_ERROR("No FB bound\n"); return 0; } if(intel_crtc->plane > dev_priv->num_pipe) { DRM_ERROR("no plane for crtc: plane %d, num_pipes %d\n", intel_crtc->plane, dev_priv->num_pipe); return -EINVAL; } DRM_LOCK(dev); ret = intel_pin_and_fence_fb_obj(dev, to_intel_framebuffer(fb)->obj, NULL); if (ret != 0) { DRM_UNLOCK(dev); DRM_ERROR("pin & fence failed\n"); return ret; } if (crtc->fb) intel_finish_fb(crtc->fb); ret = dev_priv->display.update_plane(crtc, fb, x, y); if (ret) { intel_unpin_fb_obj(to_intel_framebuffer(fb)->obj); DRM_UNLOCK(dev); DRM_ERROR("failed to update base address\n"); return ret; } old_fb = crtc->fb; crtc->fb = fb; crtc->x = x; crtc->y = y; if (old_fb) { intel_wait_for_vblank(dev, intel_crtc->pipe); intel_unpin_fb_obj(to_intel_framebuffer(old_fb)->obj); } intel_update_fbc(dev); DRM_UNLOCK(dev); intel_crtc_update_sarea_pos(crtc, x, y); return 0; } static void ironlake_set_pll_edp(struct drm_crtc *crtc, int clock) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 dpa_ctl; DRM_DEBUG_KMS("eDP PLL enable for clock %d\n", clock); dpa_ctl = I915_READ(DP_A); dpa_ctl &= ~DP_PLL_FREQ_MASK; if (clock < 200000) { u32 temp; dpa_ctl |= DP_PLL_FREQ_160MHZ; /* workaround for 160Mhz: 1) program 0x4600c bits 15:0 = 0x8124 2) program 0x46010 bit 0 = 1 3) program 0x46034 bit 24 = 1 4) program 0x64000 bit 14 = 1 */ temp = I915_READ(0x4600c); temp &= 0xffff0000; I915_WRITE(0x4600c, temp | 0x8124); temp = I915_READ(0x46010); I915_WRITE(0x46010, temp | 1); temp = I915_READ(0x46034); I915_WRITE(0x46034, temp | (1 << 24)); } else { dpa_ctl |= DP_PLL_FREQ_270MHZ; } I915_WRITE(DP_A, dpa_ctl); POSTING_READ(DP_A); udelay(500); } static void intel_fdi_normal_train(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 reg, temp; /* enable normal train */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); if (IS_IVYBRIDGE(dev)) { temp &= ~FDI_LINK_TRAIN_NONE_IVB; temp |= FDI_LINK_TRAIN_NONE_IVB | FDI_TX_ENHANCE_FRAME_ENABLE; } else { temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_NONE | FDI_TX_ENHANCE_FRAME_ENABLE; } I915_WRITE(reg, temp); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); if (HAS_PCH_CPT(dev)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_NORMAL_CPT; } else { temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_NONE; } I915_WRITE(reg, temp | FDI_RX_ENHANCE_FRAME_ENABLE); /* wait one idle pattern time */ POSTING_READ(reg); udelay(1000); /* IVB wants error correction enabled */ if (IS_IVYBRIDGE(dev)) I915_WRITE(reg, I915_READ(reg) | FDI_FS_ERRC_ENABLE | FDI_FE_ERRC_ENABLE); } static void ivb_modeset_global_resources(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *pipe_B_crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_B]); struct intel_crtc *pipe_C_crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_C]); uint32_t temp; /* When everything is off disable fdi C so that we could enable fdi B * with all lanes. XXX: This misses the case where a pipe is not using * any pch resources and so doesn't need any fdi lanes. */ if (!pipe_B_crtc->base.enabled && !pipe_C_crtc->base.enabled) { WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE); WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE); temp = I915_READ(SOUTH_CHICKEN1); temp &= ~FDI_BC_BIFURCATION_SELECT; DRM_DEBUG_KMS("disabling fdi C rx\n"); I915_WRITE(SOUTH_CHICKEN1, temp); } } /* The FDI link training functions for ILK/Ibexpeak. */ static void ironlake_fdi_link_train(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; u32 reg, temp, tries; /* FDI needs bits from pipe & plane first */ assert_pipe_enabled(dev_priv, pipe); assert_plane_enabled(dev_priv, plane); /* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit for train result */ reg = FDI_RX_IMR(pipe); temp = I915_READ(reg); temp &= ~FDI_RX_SYMBOL_LOCK; temp &= ~FDI_RX_BIT_LOCK; I915_WRITE(reg, temp); I915_READ(reg); udelay(150); /* enable CPU FDI TX and PCH FDI RX */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~(7 << 19); temp |= (intel_crtc->fdi_lanes - 1) << 19; temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_RX_ENABLE); POSTING_READ(reg); udelay(150); /* Ironlake workaround, enable clock pointer after FDI enable*/ I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR); I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR | FDI_RX_PHASE_SYNC_POINTER_EN); reg = FDI_RX_IIR(pipe); for (tries = 0; tries < 5; tries++) { temp = I915_READ(reg); DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); if ((temp & FDI_RX_BIT_LOCK)) { DRM_DEBUG_KMS("FDI train 1 done.\n"); I915_WRITE(reg, temp | FDI_RX_BIT_LOCK); break; } } if (tries == 5) DRM_ERROR("FDI train 1 fail!\n"); /* Train 2 */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_2; I915_WRITE(reg, temp); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_2; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(150); reg = FDI_RX_IIR(pipe); for (tries = 0; tries < 5; tries++) { temp = I915_READ(reg); DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); if (temp & FDI_RX_SYMBOL_LOCK) { I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK); DRM_DEBUG_KMS("FDI train 2 done.\n"); break; } } if (tries == 5) DRM_ERROR("FDI train 2 fail!\n"); DRM_DEBUG_KMS("FDI train done\n"); } static const int snb_b_fdi_train_param[] = { FDI_LINK_TRAIN_400MV_0DB_SNB_B, FDI_LINK_TRAIN_400MV_6DB_SNB_B, FDI_LINK_TRAIN_600MV_3_5DB_SNB_B, FDI_LINK_TRAIN_800MV_0DB_SNB_B, }; /* The FDI link training functions for SNB/Cougarpoint. */ static void gen6_fdi_link_train(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 reg, temp, i, retry; /* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit for train result */ reg = FDI_RX_IMR(pipe); temp = I915_READ(reg); temp &= ~FDI_RX_SYMBOL_LOCK; temp &= ~FDI_RX_BIT_LOCK; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(150); /* enable CPU FDI TX and PCH FDI RX */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~(7 << 19); temp |= (intel_crtc->fdi_lanes - 1) << 19; temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; /* SNB-B */ temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B; I915_WRITE(reg, temp | FDI_TX_ENABLE); I915_WRITE(FDI_RX_MISC(pipe), FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); if (HAS_PCH_CPT(dev)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; } else { temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; } I915_WRITE(reg, temp | FDI_RX_ENABLE); POSTING_READ(reg); udelay(150); for (i = 0; i < 4; i++) { reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[i]; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(500); for (retry = 0; retry < 5; retry++) { reg = FDI_RX_IIR(pipe); temp = I915_READ(reg); DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); if (temp & FDI_RX_BIT_LOCK) { I915_WRITE(reg, temp | FDI_RX_BIT_LOCK); DRM_DEBUG_KMS("FDI train 1 done.\n"); break; } udelay(50); } if (retry < 5) break; } if (i == 4) DRM_ERROR("FDI train 1 fail!\n"); /* Train 2 */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_2; if (IS_GEN6(dev)) { temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; /* SNB-B */ temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B; } I915_WRITE(reg, temp); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); if (HAS_PCH_CPT(dev)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_2_CPT; } else { temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_2; } I915_WRITE(reg, temp); POSTING_READ(reg); udelay(150); for (i = 0; i < 4; i++) { reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[i]; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(500); for (retry = 0; retry < 5; retry++) { reg = FDI_RX_IIR(pipe); temp = I915_READ(reg); DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); if (temp & FDI_RX_SYMBOL_LOCK) { I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK); DRM_DEBUG_KMS("FDI train 2 done.\n"); break; } udelay(50); } if (retry < 5) break; } if (i == 4) DRM_ERROR("FDI train 2 fail!\n"); DRM_DEBUG_KMS("FDI train done.\n"); } /* Manual link training for Ivy Bridge A0 parts */ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 reg, temp, i; /* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit for train result */ reg = FDI_RX_IMR(pipe); temp = I915_READ(reg); temp &= ~FDI_RX_SYMBOL_LOCK; temp &= ~FDI_RX_BIT_LOCK; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(150); DRM_DEBUG_KMS("FDI_RX_IIR before link train 0x%x\n", I915_READ(FDI_RX_IIR(pipe))); /* enable CPU FDI TX and PCH FDI RX */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~(7 << 19); temp |= (intel_crtc->fdi_lanes - 1) << 19; temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B; temp |= FDI_COMPOSITE_SYNC; I915_WRITE(reg, temp | FDI_TX_ENABLE); I915_WRITE(FDI_RX_MISC(pipe), FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_AUTO; temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; temp |= FDI_COMPOSITE_SYNC; I915_WRITE(reg, temp | FDI_RX_ENABLE); POSTING_READ(reg); udelay(150); for (i = 0; i < 4; i++) { reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[i]; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(500); reg = FDI_RX_IIR(pipe); temp = I915_READ(reg); DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); if (temp & FDI_RX_BIT_LOCK || (I915_READ(reg) & FDI_RX_BIT_LOCK)) { I915_WRITE(reg, temp | FDI_RX_BIT_LOCK); DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", i); break; } } if (i == 4) DRM_ERROR("FDI train 1 fail!\n"); /* Train 2 */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_NONE_IVB; temp |= FDI_LINK_TRAIN_PATTERN_2_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B; I915_WRITE(reg, temp); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_2_CPT; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(150); for (i = 0; i < 4; i++) { reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[i]; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(500); reg = FDI_RX_IIR(pipe); temp = I915_READ(reg); DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp); if (temp & FDI_RX_SYMBOL_LOCK) { I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK); DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", i); break; } } if (i == 4) DRM_ERROR("FDI train 2 fail!\n"); DRM_DEBUG_KMS("FDI train done.\n"); } static void ironlake_fdi_pll_enable(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; int pipe = intel_crtc->pipe; u32 reg, temp; /* enable PCH FDI RX PLL, wait warmup plus DMI latency */ reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); temp &= ~((0x7 << 19) | (0x7 << 16)); temp |= (intel_crtc->fdi_lanes - 1) << 19; temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11; I915_WRITE(reg, temp | FDI_RX_PLL_ENABLE); POSTING_READ(reg); udelay(200); /* Switch from Rawclk to PCDclk */ temp = I915_READ(reg); I915_WRITE(reg, temp | FDI_PCDCLK); POSTING_READ(reg); udelay(200); /* On Haswell, the PLL configuration for ports and pipes is handled * separately, as part of DDI setup */ if (!IS_HASWELL(dev)) { /* Enable CPU FDI TX PLL, always on for Ironlake */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); if ((temp & FDI_TX_PLL_ENABLE) == 0) { I915_WRITE(reg, temp | FDI_TX_PLL_ENABLE); POSTING_READ(reg); udelay(100); } } } static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; int pipe = intel_crtc->pipe; u32 reg, temp; /* Switch from PCDclk to Rawclk */ reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); I915_WRITE(reg, temp & ~FDI_PCDCLK); /* Disable CPU FDI TX PLL */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); I915_WRITE(reg, temp & ~FDI_TX_PLL_ENABLE); POSTING_READ(reg); udelay(100); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); I915_WRITE(reg, temp & ~FDI_RX_PLL_ENABLE); /* Wait for the clocks to turn off. */ POSTING_READ(reg); udelay(100); } static void ironlake_fdi_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 reg, temp; /* disable CPU FDI tx and PCH FDI rx */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); I915_WRITE(reg, temp & ~FDI_TX_ENABLE); POSTING_READ(reg); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); temp &= ~(0x7 << 16); temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11; I915_WRITE(reg, temp & ~FDI_RX_ENABLE); POSTING_READ(reg); udelay(100); /* Ironlake workaround, disable clock pointer after downing FDI */ if (HAS_PCH_IBX(dev)) { I915_WRITE(FDI_RX_CHICKEN(pipe), FDI_RX_PHASE_SYNC_POINTER_OVR); } /* still set train pattern 1 */ reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp); reg = FDI_RX_CTL(pipe); temp = I915_READ(reg); if (HAS_PCH_CPT(dev)) { temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT; temp |= FDI_LINK_TRAIN_PATTERN_1_CPT; } else { temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; } /* BPC in FDI rx is consistent with that in PIPECONF */ temp &= ~(0x07 << 16); temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11; I915_WRITE(reg, temp); POSTING_READ(reg); udelay(100); } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; bool pending; if (atomic_read(&dev_priv->mm.wedged)) return false; /* * NOTE Linux<->FreeBSD dev->event_lock is already locked in * intel_crtc_wait_for_pending_flips(). */ pending = to_intel_crtc(crtc)->unpin_work != NULL; return pending; } static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; if (crtc->fb == NULL) return; mtx_lock(&dev->event_lock); while (intel_crtc_has_pending_flip(crtc)) { msleep(&dev_priv->pending_flip_queue, &dev->event_lock, 0, "915flp", 0); } mtx_unlock(&dev->event_lock); DRM_LOCK(dev); intel_finish_fb(crtc->fb); DRM_UNLOCK(dev); } static bool ironlake_crtc_driving_pch(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct intel_encoder *intel_encoder; /* * If there's a non-PCH eDP on this crtc, it must be DP_A, and that * must be driven by its own crtc; no sharing is possible. */ for_each_encoder_on_crtc(dev, crtc, intel_encoder) { switch (intel_encoder->type) { case INTEL_OUTPUT_EDP: if (!intel_encoder_is_pch_edp(&intel_encoder->base)) return false; continue; } } return true; } static bool haswell_crtc_driving_pch(struct drm_crtc *crtc) { return intel_pipe_has_type(crtc, INTEL_OUTPUT_ANALOG); } /* Program iCLKIP clock to the desired frequency */ static void lpt_program_iclkip(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; /* It is necessary to ungate the pixclk gate prior to programming * the divisors, and gate it back when it is done. */ I915_WRITE(PIXCLK_GATE, PIXCLK_GATE_GATE); /* Disable SSCCTL */ intel_sbi_write(dev_priv, SBI_SSCCTL6, intel_sbi_read(dev_priv, SBI_SSCCTL6, SBI_ICLK) | SBI_SSCCTL_DISABLE, SBI_ICLK); /* 20MHz is a corner case which is out of range for the 7-bit divisor */ if (crtc->mode.clock == 20000) { auxdiv = 1; divsel = 0x41; phaseinc = 0x20; } else { /* The iCLK virtual clock root frequency is in MHz, * but the crtc->mode.clock in in KHz. To get the divisors, * it is necessary to divide one by another, so we * convert the virtual clock precision to KHz here for higher * precision. */ u32 iclk_virtual_root_freq = 172800 * 1000; u32 iclk_pi_range = 64; u32 desired_divisor, msb_divisor_value, pi_value; desired_divisor = (iclk_virtual_root_freq / crtc->mode.clock); msb_divisor_value = desired_divisor / iclk_pi_range; pi_value = desired_divisor % iclk_pi_range; auxdiv = 0; divsel = msb_divisor_value - 2; phaseinc = pi_value; } /* This should not happen with any sane values */ WARN_ON(SBI_SSCDIVINTPHASE_DIVSEL(divsel) & ~SBI_SSCDIVINTPHASE_DIVSEL_MASK); WARN_ON(SBI_SSCDIVINTPHASE_DIR(phasedir) & ~SBI_SSCDIVINTPHASE_INCVAL_MASK); DRM_DEBUG_KMS("iCLKIP clock: found settings for %dKHz refresh rate: auxdiv=%x, divsel=%x, phasedir=%x, phaseinc=%x\n", crtc->mode.clock, auxdiv, divsel, phasedir, phaseinc); /* Program SSCDIVINTPHASE6 */ temp = intel_sbi_read(dev_priv, SBI_SSCDIVINTPHASE6, SBI_ICLK); temp &= ~SBI_SSCDIVINTPHASE_DIVSEL_MASK; temp |= SBI_SSCDIVINTPHASE_DIVSEL(divsel); temp &= ~SBI_SSCDIVINTPHASE_INCVAL_MASK; temp |= SBI_SSCDIVINTPHASE_INCVAL(phaseinc); temp |= SBI_SSCDIVINTPHASE_DIR(phasedir); temp |= SBI_SSCDIVINTPHASE_PROPAGATE; intel_sbi_write(dev_priv, SBI_SSCDIVINTPHASE6, temp, SBI_ICLK); /* Program SSCAUXDIV */ temp = intel_sbi_read(dev_priv, SBI_SSCAUXDIV6, SBI_ICLK); temp &= ~SBI_SSCAUXDIV_FINALDIV2SEL(1); temp |= SBI_SSCAUXDIV_FINALDIV2SEL(auxdiv); intel_sbi_write(dev_priv, SBI_SSCAUXDIV6, temp, SBI_ICLK); /* Enable modulator and associated divider */ temp = intel_sbi_read(dev_priv, SBI_SSCCTL6, SBI_ICLK); temp &= ~SBI_SSCCTL_DISABLE; intel_sbi_write(dev_priv, SBI_SSCCTL6, temp, SBI_ICLK); /* Wait for initialization time */ udelay(24); I915_WRITE(PIXCLK_GATE, PIXCLK_GATE_UNGATE); } /* * Enable PCH resources required for PCH ports: * - PCH PLLs * - FDI training & RX/TX * - update transcoder timings * - DP transcoding bits * - transcoder */ static void ironlake_pch_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 reg, temp; assert_transcoder_disabled(dev_priv, pipe); /* Write the TU size bits before fdi link training, so that error * detection works. */ I915_WRITE(FDI_RX_TUSIZE1(pipe), I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK); /* For PCH output, training FDI link */ dev_priv->display.fdi_link_train(crtc); /* XXX: pch pll's can be enabled any time before we enable the PCH * transcoder, and we actually should do this to not upset any PCH * transcoder that already use the clock when we share it. * * Note that enable_pch_pll tries to do the right thing, but get_pch_pll * unconditionally resets the pll - we need that to have the right LVDS * enable sequence. */ ironlake_enable_pch_pll(intel_crtc); if (HAS_PCH_CPT(dev)) { u32 sel; temp = I915_READ(PCH_DPLL_SEL); switch (pipe) { default: case 0: temp |= TRANSA_DPLL_ENABLE; sel = TRANSA_DPLLB_SEL; break; case 1: temp |= TRANSB_DPLL_ENABLE; sel = TRANSB_DPLLB_SEL; break; case 2: temp |= TRANSC_DPLL_ENABLE; sel = TRANSC_DPLLB_SEL; break; } if (intel_crtc->pch_pll->pll_reg == _PCH_DPLL_B) temp |= sel; else temp &= ~sel; I915_WRITE(PCH_DPLL_SEL, temp); } /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); I915_WRITE(TRANS_HTOTAL(pipe), I915_READ(HTOTAL(pipe))); I915_WRITE(TRANS_HBLANK(pipe), I915_READ(HBLANK(pipe))); I915_WRITE(TRANS_HSYNC(pipe), I915_READ(HSYNC(pipe))); I915_WRITE(TRANS_VTOTAL(pipe), I915_READ(VTOTAL(pipe))); I915_WRITE(TRANS_VBLANK(pipe), I915_READ(VBLANK(pipe))); I915_WRITE(TRANS_VSYNC(pipe), I915_READ(VSYNC(pipe))); I915_WRITE(TRANS_VSYNCSHIFT(pipe), I915_READ(VSYNCSHIFT(pipe))); intel_fdi_normal_train(crtc); /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev) && (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) || intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))) { u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) >> 5; reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); temp &= ~(TRANS_DP_PORT_SEL_MASK | TRANS_DP_SYNC_MASK | TRANS_DP_BPC_MASK); temp |= (TRANS_DP_OUTPUT_ENABLE | TRANS_DP_ENH_FRAMING); temp |= bpc << 9; /* same format but at 11:9 */ if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DP_HSYNC_ACTIVE_HIGH; if (crtc->mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DP_VSYNC_ACTIVE_HIGH; switch (intel_trans_dp_port_sel(crtc)) { case PCH_DP_B: temp |= TRANS_DP_PORT_SEL_B; break; case PCH_DP_C: temp |= TRANS_DP_PORT_SEL_C; break; case PCH_DP_D: temp |= TRANS_DP_PORT_SEL_D; break; default: BUG(); } I915_WRITE(reg, temp); } ironlake_enable_pch_transcoder(dev_priv, pipe); } static void lpt_pch_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; assert_transcoder_disabled(dev_priv, (enum pipe)TRANSCODER_A); lpt_program_iclkip(crtc); /* Set transcoder timing. */ I915_WRITE(_TRANS_HTOTAL_A, I915_READ(HTOTAL(cpu_transcoder))); I915_WRITE(_TRANS_HBLANK_A, I915_READ(HBLANK(cpu_transcoder))); I915_WRITE(_TRANS_HSYNC_A, I915_READ(HSYNC(cpu_transcoder))); I915_WRITE(_TRANS_VTOTAL_A, I915_READ(VTOTAL(cpu_transcoder))); I915_WRITE(_TRANS_VBLANK_A, I915_READ(VBLANK(cpu_transcoder))); I915_WRITE(_TRANS_VSYNC_A, I915_READ(VSYNC(cpu_transcoder))); I915_WRITE(_TRANS_VSYNCSHIFT_A, I915_READ(VSYNCSHIFT(cpu_transcoder))); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } static void intel_put_pch_pll(struct intel_crtc *intel_crtc) { struct intel_pch_pll *pll = intel_crtc->pch_pll; if (pll == NULL) return; if (pll->refcount == 0) { WARN(1, "bad PCH PLL refcount\n"); return; } --pll->refcount; intel_crtc->pch_pll = NULL; } static struct intel_pch_pll *intel_get_pch_pll(struct intel_crtc *intel_crtc, u32 dpll, u32 fp) { struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private; struct intel_pch_pll *pll; int i; pll = intel_crtc->pch_pll; if (pll) { DRM_DEBUG_KMS("CRTC:%d reusing existing PCH PLL %x\n", intel_crtc->base.base.id, pll->pll_reg); goto prepare; } if (HAS_PCH_IBX(dev_priv->dev)) { /* Ironlake PCH has a fixed PLL->PCH pipe mapping. */ i = intel_crtc->pipe; pll = &dev_priv->pch_plls[i]; DRM_DEBUG_KMS("CRTC:%d using pre-allocated PCH PLL %x\n", intel_crtc->base.base.id, pll->pll_reg); goto found; } for (i = 0; i < dev_priv->num_pch_pll; i++) { pll = &dev_priv->pch_plls[i]; /* Only want to check enabled timings first */ if (pll->refcount == 0) continue; if (dpll == (I915_READ(pll->pll_reg) & 0x7fffffff) && fp == I915_READ(pll->fp0_reg)) { DRM_DEBUG_KMS("CRTC:%d sharing existing PCH PLL %x (refcount %d, ative %d)\n", intel_crtc->base.base.id, pll->pll_reg, pll->refcount, pll->active); goto found; } } /* Ok no matching timings, maybe there's a free one? */ for (i = 0; i < dev_priv->num_pch_pll; i++) { pll = &dev_priv->pch_plls[i]; if (pll->refcount == 0) { DRM_DEBUG_KMS("CRTC:%d allocated PCH PLL %x\n", intel_crtc->base.base.id, pll->pll_reg); goto found; } } return NULL; found: intel_crtc->pch_pll = pll; pll->refcount++; DRM_DEBUG_DRIVER("using pll %d for pipe %d\n", i, intel_crtc->pipe); prepare: /* separate function? */ DRM_DEBUG_DRIVER("switching PLL %x off\n", pll->pll_reg); /* Wait for the clocks to stabilize before rewriting the regs */ I915_WRITE(pll->pll_reg, dpll & ~DPLL_VCO_ENABLE); POSTING_READ(pll->pll_reg); udelay(150); I915_WRITE(pll->fp0_reg, fp); I915_WRITE(pll->pll_reg, dpll & ~DPLL_VCO_ENABLE); pll->on = false; return pll; } void intel_cpt_verify_modeset(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = dev->dev_private; int dslreg = PIPEDSL(pipe); u32 temp; temp = I915_READ(dslreg); udelay(500); if (wait_for(I915_READ(dslreg) != temp, 5)) { if (wait_for(I915_READ(dslreg) != temp, 5)) DRM_ERROR("mode set failed: pipe %d stuck\n", pipe); } } static void ironlake_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; u32 temp; bool is_pch_port; WARN_ON(!crtc->enabled); if (intel_crtc->active) return; intel_crtc->active = true; intel_update_watermarks(dev); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { temp = I915_READ(PCH_LVDS); if ((temp & LVDS_PORT_EN) == 0) I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN); } is_pch_port = ironlake_crtc_driving_pch(crtc); if (is_pch_port) { /* Note: FDI PLL enabling _must_ be done before we enable the * cpu pipes, hence this is separate from all the other fdi/pch * enabling. */ ironlake_fdi_pll_enable(intel_crtc); } else { assert_fdi_tx_disabled(dev_priv, pipe); assert_fdi_rx_disabled(dev_priv, pipe); } for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->pre_enable) encoder->pre_enable(encoder); /* Enable panel fitting for LVDS */ if (dev_priv->pch_pf_size && (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) || intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))) { /* Force use of hard-coded filter coefficients * as some pre-programmed values are broken, * e.g. x201. */ if (IS_IVYBRIDGE(dev)) I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3 | PF_PIPE_SEL_IVB(pipe)); else I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3); I915_WRITE(PF_WIN_POS(pipe), dev_priv->pch_pf_pos); I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size); } /* * On ILK+ LUT must be loaded before the pipe is running but with * clocks enabled */ intel_crtc_load_lut(crtc); intel_enable_pipe(dev_priv, pipe, is_pch_port); intel_enable_plane(dev_priv, plane, pipe); if (is_pch_port) ironlake_pch_enable(crtc); DRM_LOCK(dev); intel_update_fbc(dev); DRM_UNLOCK(dev); intel_crtc_update_cursor(crtc, true); for_each_encoder_on_crtc(dev, crtc, encoder) encoder->enable(encoder); if (HAS_PCH_CPT(dev)) intel_cpt_verify_modeset(dev, intel_crtc->pipe); /* * There seems to be a race in PCH platform hw (at least on some * outputs) where an enabled pipe still completes any pageflip right * away (as if the pipe is off) instead of waiting for vblank. As soon * as the first vblank happened, everything works as expected. Hence just * wait for one vblank before returning to avoid strange things * happening. */ intel_wait_for_vblank(dev, intel_crtc->pipe); } static void haswell_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; bool is_pch_port; WARN_ON(!crtc->enabled); if (intel_crtc->active) return; intel_crtc->active = true; intel_update_watermarks(dev); is_pch_port = haswell_crtc_driving_pch(crtc); if (is_pch_port) dev_priv->display.fdi_link_train(crtc); for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->pre_enable) encoder->pre_enable(encoder); intel_ddi_enable_pipe_clock(intel_crtc); /* Enable panel fitting for eDP */ if (dev_priv->pch_pf_size && intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) { /* Force use of hard-coded filter coefficients * as some pre-programmed values are broken, * e.g. x201. */ I915_WRITE(PF_CTL(pipe), PF_ENABLE | PF_FILTER_MED_3x3 | PF_PIPE_SEL_IVB(pipe)); I915_WRITE(PF_WIN_POS(pipe), dev_priv->pch_pf_pos); I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size); } /* * On ILK+ LUT must be loaded before the pipe is running but with * clocks enabled */ intel_crtc_load_lut(crtc); intel_ddi_set_pipe_settings(crtc); intel_ddi_enable_pipe_func(crtc); intel_enable_pipe(dev_priv, pipe, is_pch_port); intel_enable_plane(dev_priv, plane, pipe); if (is_pch_port) lpt_pch_enable(crtc); DRM_LOCK(dev); intel_update_fbc(dev); DRM_UNLOCK(dev); intel_crtc_update_cursor(crtc, true); for_each_encoder_on_crtc(dev, crtc, encoder) encoder->enable(encoder); /* * There seems to be a race in PCH platform hw (at least on some * outputs) where an enabled pipe still completes any pageflip right * away (as if the pipe is off) instead of waiting for vblank. As soon * as the first vblank happened, everything works as expected. Hence just * wait for one vblank before returning to avoid strange things * happening. */ intel_wait_for_vblank(dev, intel_crtc->pipe); } static void ironlake_crtc_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; u32 reg, temp; if (!intel_crtc->active) return; for_each_encoder_on_crtc(dev, crtc, encoder) encoder->disable(encoder); intel_crtc_wait_for_pending_flips(crtc); drm_vblank_off(dev, pipe); intel_crtc_update_cursor(crtc, false); intel_disable_plane(dev_priv, plane, pipe); if (dev_priv->cfb_plane == plane) intel_disable_fbc(dev); intel_disable_pipe(dev_priv, pipe); /* Disable PF */ I915_WRITE(PF_CTL(pipe), 0); I915_WRITE(PF_WIN_SZ(pipe), 0); for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->post_disable) encoder->post_disable(encoder); ironlake_fdi_disable(crtc); ironlake_disable_pch_transcoder(dev_priv, pipe); if (HAS_PCH_CPT(dev)) { /* disable TRANS_DP_CTL */ reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); temp &= ~(TRANS_DP_OUTPUT_ENABLE | TRANS_DP_PORT_SEL_MASK); temp |= TRANS_DP_PORT_SEL_NONE; I915_WRITE(reg, temp); /* disable DPLL_SEL */ temp = I915_READ(PCH_DPLL_SEL); switch (pipe) { case 0: temp &= ~(TRANSA_DPLL_ENABLE | TRANSA_DPLLB_SEL); break; case 1: temp &= ~(TRANSB_DPLL_ENABLE | TRANSB_DPLLB_SEL); break; case 2: /* C shares PLL A or B */ temp &= ~(TRANSC_DPLL_ENABLE | TRANSC_DPLLB_SEL); break; default: BUG(); /* wtf */ } I915_WRITE(PCH_DPLL_SEL, temp); } /* disable PCH DPLL */ intel_disable_pch_pll(intel_crtc); ironlake_fdi_pll_disable(intel_crtc); intel_crtc->active = false; intel_update_watermarks(dev); DRM_LOCK(dev); intel_update_fbc(dev); DRM_UNLOCK(dev); } static void haswell_crtc_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; bool is_pch_port; if (!intel_crtc->active) return; is_pch_port = haswell_crtc_driving_pch(crtc); for_each_encoder_on_crtc(dev, crtc, encoder) encoder->disable(encoder); intel_crtc_wait_for_pending_flips(crtc); drm_vblank_off(dev, pipe); intel_crtc_update_cursor(crtc, false); intel_disable_plane(dev_priv, plane, pipe); if (dev_priv->cfb_plane == plane) intel_disable_fbc(dev); intel_disable_pipe(dev_priv, pipe); intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); /* Disable PF */ I915_WRITE(PF_CTL(pipe), 0); I915_WRITE(PF_WIN_SZ(pipe), 0); intel_ddi_disable_pipe_clock(intel_crtc); for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->post_disable) encoder->post_disable(encoder); if (is_pch_port) { lpt_disable_pch_transcoder(dev_priv); intel_ddi_fdi_disable(crtc); } intel_crtc->active = false; intel_update_watermarks(dev); DRM_LOCK(dev); intel_update_fbc(dev); DRM_UNLOCK(dev); } static void ironlake_crtc_off(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); intel_put_pch_pll(intel_crtc); } static void haswell_crtc_off(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); /* Stop saying we're using TRANSCODER_EDP because some other CRTC might * start using it. */ intel_crtc->cpu_transcoder = (enum transcoder)intel_crtc->pipe; intel_ddi_put_crtc_pll(crtc); } static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) { if (!enable && intel_crtc->overlay) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; DRM_LOCK(dev); dev_priv->mm.interruptible = false; (void) intel_overlay_switch_off(intel_crtc->overlay); dev_priv->mm.interruptible = true; DRM_UNLOCK(dev); } /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. */ } static void i9xx_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; WARN_ON(!crtc->enabled); if (intel_crtc->active) return; intel_crtc->active = true; intel_update_watermarks(dev); intel_enable_pll(dev_priv, pipe); intel_enable_pipe(dev_priv, pipe, false); intel_enable_plane(dev_priv, plane, pipe); intel_crtc_load_lut(crtc); intel_update_fbc(dev); /* Give the overlay scaler a chance to enable if it's on this pipe */ intel_crtc_dpms_overlay(intel_crtc, true); intel_crtc_update_cursor(crtc, true); for_each_encoder_on_crtc(dev, crtc, encoder) encoder->enable(encoder); } static void i9xx_crtc_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; u32 pctl; if (!intel_crtc->active) return; for_each_encoder_on_crtc(dev, crtc, encoder) encoder->disable(encoder); /* Give the overlay scaler a chance to disable if it's on this pipe */ intel_crtc_wait_for_pending_flips(crtc); drm_vblank_off(dev, pipe); intel_crtc_dpms_overlay(intel_crtc, false); intel_crtc_update_cursor(crtc, false); if (dev_priv->cfb_plane == plane) intel_disable_fbc(dev); intel_disable_plane(dev_priv, plane, pipe); intel_disable_pipe(dev_priv, pipe); /* Disable pannel fitter if it is on this pipe. */ pctl = I915_READ(PFIT_CONTROL); if ((pctl & PFIT_ENABLE) && ((pctl & PFIT_PIPE_MASK) >> PFIT_PIPE_SHIFT) == pipe) I915_WRITE(PFIT_CONTROL, 0); intel_disable_pll(dev_priv, pipe); intel_crtc->active = false; intel_update_fbc(dev); intel_update_watermarks(dev); } static void i9xx_crtc_off(struct drm_crtc *crtc) { } static void intel_crtc_update_sarea(struct drm_crtc *crtc, bool enabled) { struct drm_device *dev = crtc->dev; struct drm_i915_master_private *master_priv; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; if (!dev->primary->master) return; master_priv = dev->primary->master->driver_priv; if (!master_priv->sarea_priv) return; switch (pipe) { case 0: master_priv->sarea_priv->pipeA_w = enabled ? crtc->mode.hdisplay : 0; master_priv->sarea_priv->pipeA_h = enabled ? crtc->mode.vdisplay : 0; break; case 1: master_priv->sarea_priv->pipeB_w = enabled ? crtc->mode.hdisplay : 0; master_priv->sarea_priv->pipeB_h = enabled ? crtc->mode.vdisplay : 0; break; default: DRM_ERROR("Can't update pipe %c in SAREA\n", pipe_name(pipe)); break; } } /** * Sets the power management mode of the pipe and plane. */ void intel_crtc_update_dpms(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_encoder *intel_encoder; bool enable = false; for_each_encoder_on_crtc(dev, crtc, intel_encoder) enable |= intel_encoder->connectors_active; if (enable) dev_priv->display.crtc_enable(crtc); else dev_priv->display.crtc_disable(crtc); intel_crtc_update_sarea(crtc, enable); } static void intel_crtc_noop(struct drm_crtc *crtc) { } static void intel_crtc_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_connector *connector; struct drm_i915_private *dev_priv = dev->dev_private; /* crtc should still be enabled when we disable it. */ WARN_ON(!crtc->enabled); dev_priv->display.crtc_disable(crtc); intel_crtc_update_sarea(crtc, false); dev_priv->display.off(crtc); assert_plane_disabled(dev->dev_private, to_intel_crtc(crtc)->plane); assert_pipe_disabled(dev->dev_private, to_intel_crtc(crtc)->pipe); if (crtc->fb) { DRM_LOCK(dev); intel_unpin_fb_obj(to_intel_framebuffer(crtc->fb)->obj); DRM_UNLOCK(dev); crtc->fb = NULL; } /* Update computed state. */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (!connector->encoder || !connector->encoder->crtc) continue; if (connector->encoder->crtc != crtc) continue; connector->dpms = DRM_MODE_DPMS_OFF; to_intel_encoder(connector->encoder)->connectors_active = false; } } void intel_modeset_disable(struct drm_device *dev) { struct drm_crtc *crtc; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (crtc->enabled) intel_crtc_disable(crtc); } } void intel_encoder_noop(struct drm_encoder *encoder) { } void intel_encoder_destroy(struct drm_encoder *encoder) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); drm_encoder_cleanup(encoder); free(intel_encoder, DRM_MEM_KMS); } /* Simple dpms helper for encodres with just one connector, no cloning and only * one kind of off state. It clamps all !ON modes to fully OFF and changes the * state of the entire output pipe. */ void intel_encoder_dpms(struct intel_encoder *encoder, int mode) { if (mode == DRM_MODE_DPMS_ON) { encoder->connectors_active = true; intel_crtc_update_dpms(encoder->base.crtc); } else { encoder->connectors_active = false; intel_crtc_update_dpms(encoder->base.crtc); } } /* Cross check the actual hw state with our own modeset state tracking (and it's * internal consistency). */ static void intel_connector_check_state(struct intel_connector *connector) { if (connector->get_hw_state(connector)) { struct intel_encoder *encoder = connector->encoder; struct drm_crtc *crtc; bool encoder_enabled; enum pipe pipe; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.base.id, drm_get_connector_name(&connector->base)); WARN(connector->base.dpms == DRM_MODE_DPMS_OFF, "wrong connector dpms state\n"); WARN(connector->base.encoder != &encoder->base, "active connector not linked to encoder\n"); WARN(!encoder->connectors_active, "encoder->connectors_active not set\n"); encoder_enabled = encoder->get_hw_state(encoder, &pipe); WARN(!encoder_enabled, "encoder not enabled\n"); if (WARN_ON(!encoder->base.crtc)) return; crtc = encoder->base.crtc; WARN(!crtc->enabled, "crtc not enabled\n"); WARN(!to_intel_crtc(crtc)->active, "crtc not active\n"); WARN(pipe != to_intel_crtc(crtc)->pipe, "encoder active on the wrong pipe\n"); } } /* Even simpler default implementation, if there's really no special case to * consider. */ void intel_connector_dpms(struct drm_connector *connector, int mode) { struct intel_encoder *encoder = intel_attached_encoder(connector); /* All the simple cases only support two dpms states. */ if (mode != DRM_MODE_DPMS_ON) mode = DRM_MODE_DPMS_OFF; if (mode == connector->dpms) return; connector->dpms = mode; /* Only need to change hw state when actually enabled */ if (encoder->base.crtc) intel_encoder_dpms(encoder, mode); else WARN_ON(encoder->connectors_active != false); intel_modeset_check_state(connector->dev); } /* Simple connector->get_hw_state implementation for encoders that support only * one connector and no cloning and hence the encoder state determines the state * of the connector. */ bool intel_connector_get_hw_state(struct intel_connector *connector) { enum pipe pipe = 0; struct intel_encoder *encoder = connector->encoder; return encoder->get_hw_state(encoder, &pipe); } static bool intel_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; if (HAS_PCH_SPLIT(dev)) { /* FDI link clock is fixed at 2.7G */ if (mode->clock * 3 > IRONLAKE_FDI_FREQ * 4) return false; } /* All interlaced capable intel hw wants timings in frames. Note though * that intel_lvds_mode_fixup does some funny tricks with the crtc * timings, so we need to be careful not to clobber these.*/ if (!(adjusted_mode->private_flags & INTEL_MODE_CRTC_TIMINGS_SET)) drm_mode_set_crtcinfo(adjusted_mode, 0); /* WaPruneModeWithIncorrectHsyncOffset: Cantiga+ cannot handle modes * with a hsync front porch of 0. */ if ((INTEL_INFO(dev)->gen > 4 || IS_G4X(dev)) && adjusted_mode->hsync_start == adjusted_mode->hdisplay) return false; return true; } static int valleyview_get_display_clock_speed(struct drm_device *dev) { return 400000; /* FIXME */ } static int i945_get_display_clock_speed(struct drm_device *dev) { return 400000; } static int i915_get_display_clock_speed(struct drm_device *dev) { return 333000; } static int i9xx_misc_get_display_clock_speed(struct drm_device *dev) { return 200000; } static int i915gm_get_display_clock_speed(struct drm_device *dev) { u16 gcfgc = 0; pci_read_config_word(dev->dev, GCFGC, &gcfgc); if (gcfgc & GC_LOW_FREQUENCY_ENABLE) return 133000; else { switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_333_MHZ: return 333000; default: case GC_DISPLAY_CLOCK_190_200_MHZ: return 190000; } } } static int i865_get_display_clock_speed(struct drm_device *dev) { return 266000; } static int i855_get_display_clock_speed(struct drm_device *dev) { u16 hpllcc = 0; /* Assume that the hardware is in the high speed state. This * should be the default. */ switch (hpllcc & GC_CLOCK_CONTROL_MASK) { case GC_CLOCK_133_200: case GC_CLOCK_100_200: return 200000; case GC_CLOCK_166_250: return 250000; case GC_CLOCK_100_133: return 133000; } /* Shouldn't happen */ return 0; } static int i830_get_display_clock_speed(struct drm_device *dev) { return 133000; } struct fdi_m_n { u32 tu; u32 gmch_m; u32 gmch_n; u32 link_m; u32 link_n; }; static void fdi_reduce_ratio(u32 *num, u32 *den) { while (*num > 0xffffff || *den > 0xffffff) { *num >>= 1; *den >>= 1; } } static void ironlake_compute_m_n(int bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct fdi_m_n *m_n) { m_n->tu = 64; /* default size */ /* BUG_ON(pixel_clock > INT_MAX / 36); */ m_n->gmch_m = bits_per_pixel * pixel_clock; m_n->gmch_n = link_clock * nlanes * 8; fdi_reduce_ratio(&m_n->gmch_m, &m_n->gmch_n); m_n->link_m = pixel_clock; m_n->link_n = link_clock; fdi_reduce_ratio(&m_n->link_m, &m_n->link_n); } static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { if (i915_panel_use_ssc >= 0) return i915_panel_use_ssc != 0; return dev_priv->lvds_use_ssc && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } /** * intel_choose_pipe_bpp_dither - figure out what color depth the pipe should send * @crtc: CRTC structure * @mode: requested mode * * A pipe may be connected to one or more outputs. Based on the depth of the * attached framebuffer, choose a good color depth to use on the pipe. * * If possible, match the pipe depth to the fb depth. In some cases, this * isn't ideal, because the connected output supports a lesser or restricted * set of depths. Resolve that here: * LVDS typically supports only 6bpc, so clamp down in that case * HDMI supports only 8bpc or 12bpc, so clamp to 8bpc with dither for 10bpc * Displays may support a restricted set as well, check EDID and clamp as * appropriate. * DP may want to dither down to 6bpc to fit larger modes * * RETURNS: * Dithering requirement (i.e. false if display bpc and pipe bpc match, * true if they don't match). */ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, struct drm_framebuffer *fb, unsigned int *pipe_bpp, struct drm_display_mode *mode) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_connector *connector; struct intel_encoder *intel_encoder; unsigned int display_bpc = UINT_MAX, bpc; /* Walk the encoders & connectors on this crtc, get min bpc */ for_each_encoder_on_crtc(dev, crtc, intel_encoder) { if (intel_encoder->type == INTEL_OUTPUT_LVDS) { unsigned int lvds_bpc; if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP) lvds_bpc = 8; else lvds_bpc = 6; if (lvds_bpc < display_bpc) { DRM_DEBUG_KMS("clamping display bpc (was %d) to LVDS (%d)\n", display_bpc, lvds_bpc); display_bpc = lvds_bpc; } continue; } /* Not one of the known troublemakers, check the EDID */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (connector->encoder != &intel_encoder->base) continue; /* Don't use an invalid EDID bpc value */ if (connector->display_info.bpc && connector->display_info.bpc < display_bpc) { DRM_DEBUG_KMS("clamping display bpc (was %d) to EDID reported max of %d\n", display_bpc, connector->display_info.bpc); display_bpc = connector->display_info.bpc; } } if (intel_encoder->type == INTEL_OUTPUT_EDP) { /* Use VBT settings if we have an eDP panel */ unsigned int edp_bpc = dev_priv->edp.bpp / 3; if (edp_bpc && edp_bpc < display_bpc) { DRM_DEBUG_KMS("clamping display bpc (was %d) to eDP (%d)\n", display_bpc, edp_bpc); display_bpc = edp_bpc; } continue; } /* * HDMI is either 12 or 8, so if the display lets 10bpc sneak * through, clamp it down. (Note: >12bpc will be caught below.) */ if (intel_encoder->type == INTEL_OUTPUT_HDMI) { if (display_bpc > 8 && display_bpc < 12) { DRM_DEBUG_KMS("forcing bpc to 12 for HDMI\n"); display_bpc = 12; } else { DRM_DEBUG_KMS("forcing bpc to 8 for HDMI\n"); display_bpc = 8; } } } if (mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) { DRM_DEBUG_KMS("Dithering DP to 6bpc\n"); display_bpc = 6; } /* * We could just drive the pipe at the highest bpc all the time and * enable dithering as needed, but that costs bandwidth. So choose * the minimum value that expresses the full color range of the fb but * also stays within the max display bpc discovered above. */ switch (fb->depth) { case 8: bpc = 8; /* since we go through a colormap */ break; case 15: case 16: bpc = 6; /* min is 18bpp */ break; case 24: bpc = 8; break; case 30: bpc = 10; break; case 48: bpc = 12; break; default: DRM_DEBUG("unsupported depth, assuming 24 bits\n"); bpc = min((unsigned int)8, display_bpc); break; } display_bpc = min(display_bpc, bpc); DRM_DEBUG_KMS("setting pipe bpc to %d (max display bpc %d)\n", bpc, display_bpc); *pipe_bpp = display_bpc * 3; return display_bpc != bpc; } static int vlv_get_refclk(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; int refclk = 27000; /* for DP & HDMI */ return 100000; /* only one validated so far */ if (intel_pipe_has_type(crtc, INTEL_OUTPUT_ANALOG)) { refclk = 96000; } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { if (intel_panel_use_ssc(dev_priv)) refclk = 100000; else refclk = 96000; } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) { refclk = 100000; } return refclk; } static int i9xx_get_refclk(struct drm_crtc *crtc, int num_connectors) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; int refclk; if (IS_VALLEYVIEW(dev)) { refclk = vlv_get_refclk(crtc); } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) && intel_panel_use_ssc(dev_priv) && num_connectors < 2) { refclk = dev_priv->lvds_ssc_freq * 1000; DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n", refclk / 1000); } else if (!IS_GEN2(dev)) { refclk = 96000; } else { refclk = 48000; } return refclk; } static void i9xx_adjust_sdvo_tv_clock(struct drm_display_mode *adjusted_mode, intel_clock_t *clock) { /* SDVO TV has fixed PLL values depend on its clock range, this mirrors vbios setting. */ if (adjusted_mode->clock >= 100000 && adjusted_mode->clock < 140500) { clock->p1 = 2; clock->p2 = 10; clock->n = 3; clock->m1 = 16; clock->m2 = 8; } else if (adjusted_mode->clock >= 140500 && adjusted_mode->clock <= 200000) { clock->p1 = 1; clock->p2 = 10; clock->n = 6; clock->m1 = 12; clock->m2 = 8; } } static void i9xx_update_pll_dividers(struct drm_crtc *crtc, intel_clock_t *clock, intel_clock_t *reduced_clock) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 fp, fp2 = 0; if (IS_PINEVIEW(dev)) { fp = (1 << clock->n) << 16 | clock->m1 << 8 | clock->m2; if (reduced_clock) fp2 = (1 << reduced_clock->n) << 16 | reduced_clock->m1 << 8 | reduced_clock->m2; } else { fp = clock->n << 16 | clock->m1 << 8 | clock->m2; if (reduced_clock) fp2 = reduced_clock->n << 16 | reduced_clock->m1 << 8 | reduced_clock->m2; } I915_WRITE(FP0(pipe), fp); intel_crtc->lowfreq_avail = false; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) && reduced_clock && i915_powersave) { I915_WRITE(FP1(pipe), fp2); intel_crtc->lowfreq_avail = true; } else { I915_WRITE(FP1(pipe), fp); } } static void intel_update_lvds(struct drm_crtc *crtc, intel_clock_t *clock, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 temp; temp = I915_READ(LVDS); temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; if (pipe == 1) { temp |= LVDS_PIPEB_SELECT; } else { temp &= ~LVDS_PIPEB_SELECT; } /* set the corresponsding LVDS_BORDER bit */ temp |= dev_priv->lvds_border_bits; /* Set the B0-B3 data pairs corresponding to whether we're going to * set the DPLLs for dual-channel mode or not. */ if (clock->p2 == 7) temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP; else temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP); /* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP) * appropriately here, but we need to look more thoroughly into how * panels behave in the two modes. */ /* set the dithering flag on LVDS as needed */ if (INTEL_INFO(dev)->gen >= 4) { if (dev_priv->lvds_dither) temp |= LVDS_ENABLE_DITHER; else temp &= ~LVDS_ENABLE_DITHER; } temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY); if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) temp |= LVDS_HSYNC_POLARITY; if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) temp |= LVDS_VSYNC_POLARITY; I915_WRITE(LVDS, temp); } static void vlv_update_pll(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, intel_clock_t *reduced_clock, int num_connectors) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 dpll, mdiv, pdiv; u32 bestn, bestm1, bestm2, bestp1, bestp2; bool is_sdvo; u32 temp; is_sdvo = intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO) || intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI); dpll = DPLL_VGA_MODE_DIS; dpll |= DPLL_EXT_BUFFER_ENABLE_VLV; dpll |= DPLL_REFA_CLK_ENABLE_VLV; dpll |= DPLL_INTEGRATED_CLOCK_VLV; I915_WRITE(DPLL(pipe), dpll); POSTING_READ(DPLL(pipe)); bestn = clock->n; bestm1 = clock->m1; bestm2 = clock->m2; bestp1 = clock->p1; bestp2 = clock->p2; /* * In Valleyview PLL and program lane counter registers are exposed * through DPIO interface */ mdiv = ((bestm1 << DPIO_M1DIV_SHIFT) | (bestm2 & DPIO_M2DIV_MASK)); mdiv |= ((bestp1 << DPIO_P1_SHIFT) | (bestp2 << DPIO_P2_SHIFT)); mdiv |= ((bestn << DPIO_N_SHIFT)); mdiv |= (1 << DPIO_POST_DIV_SHIFT); mdiv |= (1 << DPIO_K_SHIFT); mdiv |= DPIO_ENABLE_CALIBRATION; intel_dpio_write(dev_priv, DPIO_DIV(pipe), mdiv); intel_dpio_write(dev_priv, DPIO_CORE_CLK(pipe), 0x01000000); pdiv = (1 << DPIO_REFSEL_OVERRIDE) | (5 << DPIO_PLL_MODESEL_SHIFT) | (3 << DPIO_BIAS_CURRENT_CTL_SHIFT) | (1<<20) | (7 << DPIO_PLL_REFCLK_SEL_SHIFT) | (8 << DPIO_DRIVER_CTL_SHIFT) | (5 << DPIO_CLK_BIAS_CTL_SHIFT); intel_dpio_write(dev_priv, DPIO_REFSFR(pipe), pdiv); intel_dpio_write(dev_priv, DPIO_LFP_COEFF(pipe), 0x005f003b); dpll |= DPLL_VCO_ENABLE; I915_WRITE(DPLL(pipe), dpll); POSTING_READ(DPLL(pipe)); if (wait_for(((I915_READ(DPLL(pipe)) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1)) DRM_ERROR("DPLL %d failed to lock\n", pipe); intel_dpio_write(dev_priv, DPIO_FASTCLK_DISABLE, 0x620); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) intel_dp_set_m_n(crtc, mode, adjusted_mode); I915_WRITE(DPLL(pipe), dpll); /* Wait for the clocks to stabilize. */ POSTING_READ(DPLL(pipe)); udelay(150); temp = 0; if (is_sdvo) { temp = intel_mode_get_pixel_multiplier(adjusted_mode); if (temp > 1) temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; else temp = 0; } I915_WRITE(DPLL_MD(pipe), temp); POSTING_READ(DPLL_MD(pipe)); /* Now program lane control registers */ if(intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) || intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI)) { temp = 0x1000C4; if(pipe == 1) temp |= (1 << 21); intel_dpio_write(dev_priv, DPIO_DATA_CHANNEL1, temp); } if(intel_pipe_has_type(crtc,INTEL_OUTPUT_EDP)) { temp = 0x1000C4; if(pipe == 1) temp |= (1 << 21); intel_dpio_write(dev_priv, DPIO_DATA_CHANNEL2, temp); } } static void i9xx_update_pll(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, intel_clock_t *reduced_clock, int num_connectors) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 dpll; bool is_sdvo; i9xx_update_pll_dividers(crtc, clock, reduced_clock); is_sdvo = intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO) || intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI); dpll = DPLL_VGA_MODE_DIS; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) dpll |= DPLLB_MODE_LVDS; else dpll |= DPLLB_MODE_DAC_SERIAL; if (is_sdvo) { int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); if (pixel_multiplier > 1) { if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) dpll |= (pixel_multiplier - 1) << SDVO_MULTIPLIER_SHIFT_HIRES; } dpll |= DPLL_DVO_HIGH_SPEED; } if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) dpll |= DPLL_DVO_HIGH_SPEED; /* compute bitmask from p1 value */ if (IS_PINEVIEW(dev)) dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW; else { dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; if (IS_G4X(dev) && reduced_clock) dpll |= (1 << (reduced_clock->p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT; } switch (clock->p2) { case 5: dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5; break; case 7: dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7; break; case 10: dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10; break; case 14: dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14; break; } if (INTEL_INFO(dev)->gen >= 4) dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT); if (is_sdvo && intel_pipe_has_type(crtc, INTEL_OUTPUT_TVOUT)) dpll |= PLL_REF_INPUT_TVCLKINBC; else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_TVOUT)) /* XXX: just matching BIOS for now */ /* dpll |= PLL_REF_INPUT_TVCLKINBC; */ dpll |= 3; else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) && intel_panel_use_ssc(dev_priv) && num_connectors < 2) dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN; else dpll |= PLL_REF_INPUT_DREFCLK; dpll |= DPLL_VCO_ENABLE; I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE); POSTING_READ(DPLL(pipe)); udelay(150); /* The LVDS pin pair needs to be on before the DPLLs are enabled. * This is an exception to the general rule that mode_set doesn't turn * things on. */ if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) intel_update_lvds(crtc, clock, adjusted_mode); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) intel_dp_set_m_n(crtc, mode, adjusted_mode); I915_WRITE(DPLL(pipe), dpll); /* Wait for the clocks to stabilize. */ POSTING_READ(DPLL(pipe)); udelay(150); if (INTEL_INFO(dev)->gen >= 4) { u32 temp = 0; if (is_sdvo) { temp = intel_mode_get_pixel_multiplier(adjusted_mode); if (temp > 1) temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; else temp = 0; } I915_WRITE(DPLL_MD(pipe), temp); } else { /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. * * So write it again. */ I915_WRITE(DPLL(pipe), dpll); } } static void i8xx_update_pll(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, intel_clock_t *reduced_clock, int num_connectors) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 dpll; i9xx_update_pll_dividers(crtc, clock, reduced_clock); dpll = DPLL_VGA_MODE_DIS; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; } else { if (clock->p1 == 2) dpll |= PLL_P1_DIVIDE_BY_TWO; else dpll |= (clock->p1 - 2) << DPLL_FPA01_P1_POST_DIV_SHIFT; if (clock->p2 == 4) dpll |= PLL_P2_DIVIDE_BY_4; } if (intel_pipe_has_type(crtc, INTEL_OUTPUT_TVOUT)) /* XXX: just matching BIOS for now */ /* dpll |= PLL_REF_INPUT_TVCLKINBC; */ dpll |= 3; else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) && intel_panel_use_ssc(dev_priv) && num_connectors < 2) dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN; else dpll |= PLL_REF_INPUT_DREFCLK; dpll |= DPLL_VCO_ENABLE; I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE); POSTING_READ(DPLL(pipe)); udelay(150); /* The LVDS pin pair needs to be on before the DPLLs are enabled. * This is an exception to the general rule that mode_set doesn't turn * things on. */ if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) intel_update_lvds(crtc, clock, adjusted_mode); I915_WRITE(DPLL(pipe), dpll); /* Wait for the clocks to stabilize. */ POSTING_READ(DPLL(pipe)); udelay(150); /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. * * So write it again. */ I915_WRITE(DPLL(pipe), dpll); } static void intel_set_pipe_timings(struct intel_crtc *intel_crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe = intel_crtc->pipe; enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; uint32_t vsyncshift; if (!IS_GEN2(dev) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { /* the chip adds 2 halflines automatically */ adjusted_mode->crtc_vtotal -= 1; adjusted_mode->crtc_vblank_end -= 1; vsyncshift = adjusted_mode->crtc_hsync_start - adjusted_mode->crtc_htotal / 2; } else { vsyncshift = 0; } if (INTEL_INFO(dev)->gen > 3) I915_WRITE(VSYNCSHIFT(cpu_transcoder), vsyncshift); I915_WRITE(HTOTAL(cpu_transcoder), (adjusted_mode->crtc_hdisplay - 1) | ((adjusted_mode->crtc_htotal - 1) << 16)); I915_WRITE(HBLANK(cpu_transcoder), (adjusted_mode->crtc_hblank_start - 1) | ((adjusted_mode->crtc_hblank_end - 1) << 16)); I915_WRITE(HSYNC(cpu_transcoder), (adjusted_mode->crtc_hsync_start - 1) | ((adjusted_mode->crtc_hsync_end - 1) << 16)); I915_WRITE(VTOTAL(cpu_transcoder), (adjusted_mode->crtc_vdisplay - 1) | ((adjusted_mode->crtc_vtotal - 1) << 16)); I915_WRITE(VBLANK(cpu_transcoder), (adjusted_mode->crtc_vblank_start - 1) | ((adjusted_mode->crtc_vblank_end - 1) << 16)); I915_WRITE(VSYNC(cpu_transcoder), (adjusted_mode->crtc_vsync_start - 1) | ((adjusted_mode->crtc_vsync_end - 1) << 16)); /* Workaround: when the EDP input selection is B, the VTOTAL_B must be * programmed with the VTOTAL_EDP value. Same for VTOTAL_C. This is * documented on the DDI_FUNC_CTL register description, EDP Input Select * bits. */ if (IS_HASWELL(dev) && cpu_transcoder == TRANSCODER_EDP && (pipe == PIPE_B || pipe == PIPE_C)) I915_WRITE(VTOTAL(pipe), I915_READ(VTOTAL(cpu_transcoder))); /* pipesrc controls the size that is scaled from, which should * always be the user's requested size. */ I915_WRITE(PIPESRC(pipe), ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1)); } static int i9xx_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, int x, int y, struct drm_framebuffer *fb) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; int refclk, num_connectors = 0; intel_clock_t clock, reduced_clock; u32 dspcntr, pipeconf; bool ok, has_reduced_clock = false, is_sdvo = false; bool is_lvds = false, is_tv = false, is_dp = false; struct intel_encoder *encoder; const intel_limit_t *limit; int ret; for_each_encoder_on_crtc(dev, crtc, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; case INTEL_OUTPUT_SDVO: case INTEL_OUTPUT_HDMI: is_sdvo = true; if (encoder->needs_tv_clock) is_tv = true; break; case INTEL_OUTPUT_TVOUT: is_tv = true; break; case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; } num_connectors++; } refclk = i9xx_get_refclk(crtc, num_connectors); /* * Returns a set of divisors for the desired target clock with the given * refclk, or FALSE. The returned values represent the clock equation: * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2. */ limit = intel_limit(crtc, refclk); ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, NULL, &clock); if (!ok) { DRM_ERROR("Couldn't find PLL settings for mode!\n"); return -EINVAL; } /* Ensure that the cursor is valid for the new mode before changing... */ intel_crtc_update_cursor(crtc, true); if (is_lvds && dev_priv->lvds_downclock_avail) { /* * Ensure we match the reduced clock's P to the target clock. * If the clocks don't match, we can't switch the display clock * by using the FP0/FP1. In such case we will disable the LVDS * downclock feature. */ has_reduced_clock = limit->find_pll(limit, crtc, dev_priv->lvds_downclock, refclk, &clock, &reduced_clock); } if (is_sdvo && is_tv) i9xx_adjust_sdvo_tv_clock(adjusted_mode, &clock); if (IS_GEN2(dev)) i8xx_update_pll(crtc, adjusted_mode, &clock, has_reduced_clock ? &reduced_clock : NULL, num_connectors); else if (IS_VALLEYVIEW(dev)) vlv_update_pll(crtc, mode, adjusted_mode, &clock, has_reduced_clock ? &reduced_clock : NULL, num_connectors); else i9xx_update_pll(crtc, mode, adjusted_mode, &clock, has_reduced_clock ? &reduced_clock : NULL, num_connectors); /* setup pipeconf */ pipeconf = I915_READ(PIPECONF(pipe)); /* Set up the display plane register */ dspcntr = DISPPLANE_GAMMA_ENABLE; if (pipe == 0) dspcntr &= ~DISPPLANE_SEL_PIPE_MASK; else dspcntr |= DISPPLANE_SEL_PIPE_B; if (pipe == 0 && INTEL_INFO(dev)->gen < 4) { /* Enable pixel doubling when the dot clock is > 90% of the (display) * core speed. * * XXX: No double-wide on 915GM pipe B. Is that the only reason for the * pipe == 0 check? */ if (mode->clock > dev_priv->display.get_display_clock_speed(dev) * 9 / 10) pipeconf |= PIPECONF_DOUBLE_WIDE; else pipeconf &= ~PIPECONF_DOUBLE_WIDE; } /* default to 8bpc */ pipeconf &= ~(PIPECONF_BPP_MASK | PIPECONF_DITHER_EN); if (is_dp) { if (adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) { pipeconf |= PIPECONF_BPP_6 | PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP; } } if (IS_VALLEYVIEW(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) { if (adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) { pipeconf |= PIPECONF_BPP_6 | PIPECONF_ENABLE | I965_PIPECONF_ACTIVE; } } DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B'); drm_mode_debug_printmodeline(mode); if (HAS_PIPE_CXSR(dev)) { if (intel_crtc->lowfreq_avail) { DRM_DEBUG_KMS("enabling CxSR downclocking\n"); pipeconf |= PIPECONF_CXSR_DOWNCLOCK; } else { DRM_DEBUG_KMS("disabling CxSR downclocking\n"); pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK; } } pipeconf &= ~PIPECONF_INTERLACE_MASK; if (!IS_GEN2(dev) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION; else pipeconf |= PIPECONF_PROGRESSIVE; intel_set_pipe_timings(intel_crtc, mode, adjusted_mode); /* pipesrc and dspsize control the size that is scaled from, * which should always be the user's requested size. */ I915_WRITE(DSPSIZE(plane), ((mode->vdisplay - 1) << 16) | (mode->hdisplay - 1)); I915_WRITE(DSPPOS(plane), 0); I915_WRITE(PIPECONF(pipe), pipeconf); POSTING_READ(PIPECONF(pipe)); intel_enable_pipe(dev_priv, pipe, false); intel_wait_for_vblank(dev, pipe); I915_WRITE(DSPCNTR(plane), dspcntr); POSTING_READ(DSPCNTR(plane)); ret = intel_pipe_set_base(crtc, x, y, fb); intel_update_watermarks(dev); return ret; } static void ironlake_init_pch_refclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; u32 temp; bool has_lvds = false; bool has_cpu_edp = false; bool has_pch_edp = false; bool has_panel = false; bool has_ck505 = false; bool can_ssc = false; /* We need to take the global config into account */ list_for_each_entry(encoder, &mode_config->encoder_list, base.head) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: has_panel = true; has_lvds = true; break; case INTEL_OUTPUT_EDP: has_panel = true; if (intel_encoder_is_pch_edp(&encoder->base)) has_pch_edp = true; else has_cpu_edp = true; break; } } if (HAS_PCH_IBX(dev)) { has_ck505 = dev_priv->display_clock_mode; can_ssc = has_ck505; } else { has_ck505 = false; can_ssc = true; } DRM_DEBUG_KMS("has_panel %d has_lvds %d has_pch_edp %d has_cpu_edp %d has_ck505 %d\n", has_panel, has_lvds, has_pch_edp, has_cpu_edp, has_ck505); /* Ironlake: try to setup display ref clock before DPLL * enabling. This is only under driver's control after * PCH B stepping, previous chipset stepping should be * ignoring this setting. */ temp = I915_READ(PCH_DREF_CONTROL); /* Always enable nonspread source */ temp &= ~DREF_NONSPREAD_SOURCE_MASK; if (has_ck505) temp |= DREF_NONSPREAD_CK505_ENABLE; else temp |= DREF_NONSPREAD_SOURCE_ENABLE; if (has_panel) { temp &= ~DREF_SSC_SOURCE_MASK; temp |= DREF_SSC_SOURCE_ENABLE; /* SSC must be turned on before enabling the CPU output */ if (intel_panel_use_ssc(dev_priv) && can_ssc) { DRM_DEBUG_KMS("Using SSC on panel\n"); temp |= DREF_SSC1_ENABLE; } else temp &= ~DREF_SSC1_ENABLE; /* Get SSC going before enabling the outputs */ I915_WRITE(PCH_DREF_CONTROL, temp); POSTING_READ(PCH_DREF_CONTROL); udelay(200); temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; /* Enable CPU source on CPU attached eDP */ if (has_cpu_edp) { if (intel_panel_use_ssc(dev_priv) && can_ssc) { DRM_DEBUG_KMS("Using SSC on eDP\n"); temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD; } else temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; } else temp |= DREF_CPU_SOURCE_OUTPUT_DISABLE; I915_WRITE(PCH_DREF_CONTROL, temp); POSTING_READ(PCH_DREF_CONTROL); udelay(200); } else { DRM_DEBUG_KMS("Disabling SSC entirely\n"); temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; /* Turn off CPU output */ temp |= DREF_CPU_SOURCE_OUTPUT_DISABLE; I915_WRITE(PCH_DREF_CONTROL, temp); POSTING_READ(PCH_DREF_CONTROL); udelay(200); /* Turn off the SSC source */ temp &= ~DREF_SSC_SOURCE_MASK; temp |= DREF_SSC_SOURCE_DISABLE; /* Turn off SSC1 */ temp &= ~ DREF_SSC1_ENABLE; I915_WRITE(PCH_DREF_CONTROL, temp); POSTING_READ(PCH_DREF_CONTROL); udelay(200); } } /* Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O. */ static void lpt_init_pch_refclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; bool has_vga = false; bool is_sdv = false; u32 tmp; list_for_each_entry(encoder, &mode_config->encoder_list, base.head) { switch (encoder->type) { case INTEL_OUTPUT_ANALOG: has_vga = true; break; } } if (!has_vga) return; /* XXX: Rip out SDV support once Haswell ships for real. */ if (IS_HASWELL(dev) && (dev->pci_device & 0xFF00) == 0x0C00) is_sdv = true; tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK); tmp &= ~SBI_SSCCTL_DISABLE; tmp |= SBI_SSCCTL_PATHALT; intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK); udelay(24); tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK); tmp &= ~SBI_SSCCTL_PATHALT; intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK); if (!is_sdv) { tmp = I915_READ(SOUTH_CHICKEN2); tmp |= FDI_MPHY_IOSFSB_RESET_CTL; I915_WRITE(SOUTH_CHICKEN2, tmp); if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) & FDI_MPHY_IOSFSB_RESET_STATUS, 100)) DRM_ERROR("FDI mPHY reset assert timeout\n"); tmp = I915_READ(SOUTH_CHICKEN2); tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL; I915_WRITE(SOUTH_CHICKEN2, tmp); if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) & FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) DRM_ERROR("FDI mPHY reset de-assert timeout\n"); } tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY); tmp &= ~(0xFF << 24); tmp |= (0x12 << 24); intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY); if (!is_sdv) { tmp = intel_sbi_read(dev_priv, 0x808C, SBI_MPHY); tmp &= ~(0x3 << 6); tmp |= (1 << 6) | (1 << 0); intel_sbi_write(dev_priv, 0x808C, tmp, SBI_MPHY); } if (is_sdv) { tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY); tmp |= 0x7FFF; intel_sbi_write(dev_priv, 0x800C, tmp, SBI_MPHY); } tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY); tmp |= (1 << 11); intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x2108, SBI_MPHY); tmp |= (1 << 11); intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY); if (is_sdv) { tmp = intel_sbi_read(dev_priv, 0x2038, SBI_MPHY); tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16); intel_sbi_write(dev_priv, 0x2038, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x2138, SBI_MPHY); tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16); intel_sbi_write(dev_priv, 0x2138, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x203C, SBI_MPHY); tmp |= (0x3F << 8); intel_sbi_write(dev_priv, 0x203C, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x213C, SBI_MPHY); tmp |= (0x3F << 8); intel_sbi_write(dev_priv, 0x213C, tmp, SBI_MPHY); } tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY); tmp |= (1 << 24) | (1 << 21) | (1 << 18); intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x216C, SBI_MPHY); tmp |= (1 << 24) | (1 << 21) | (1 << 18); intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY); if (!is_sdv) { tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY); tmp &= ~(7 << 13); tmp |= (5 << 13); intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY); tmp &= ~(7 << 13); tmp |= (5 << 13); intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY); } tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY); tmp &= ~0xFF; tmp |= 0x1C; intel_sbi_write(dev_priv, 0x208C, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x218C, SBI_MPHY); tmp &= ~0xFF; tmp |= 0x1C; intel_sbi_write(dev_priv, 0x218C, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x2098, SBI_MPHY); tmp &= ~(0xFF << 16); tmp |= (0x1C << 16); intel_sbi_write(dev_priv, 0x2098, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x2198, SBI_MPHY); tmp &= ~(0xFF << 16); tmp |= (0x1C << 16); intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY); if (!is_sdv) { tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY); tmp |= (1 << 27); intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY); tmp |= (1 << 27); intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY); tmp &= ~(0xF << 28); tmp |= (4 << 28); intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY); tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY); tmp &= ~(0xF << 28); tmp |= (4 << 28); intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY); } /* ULT uses SBI_GEN0, but ULT doesn't have VGA, so we don't care. */ tmp = intel_sbi_read(dev_priv, SBI_DBUFF0, SBI_ICLK); tmp |= SBI_DBUFF0_ENABLE; intel_sbi_write(dev_priv, SBI_DBUFF0, tmp, SBI_ICLK); } /* * Initialize reference clocks when the driver loads */ void intel_init_pch_refclk(struct drm_device *dev) { if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) ironlake_init_pch_refclk(dev); else if (HAS_PCH_LPT(dev)) lpt_init_pch_refclk(dev); } static int ironlake_get_refclk(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_encoder *encoder; struct intel_encoder *edp_encoder = NULL; int num_connectors = 0; bool is_lvds = false; for_each_encoder_on_crtc(dev, crtc, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; case INTEL_OUTPUT_EDP: edp_encoder = encoder; break; } num_connectors++; } if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) { DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n", dev_priv->lvds_ssc_freq); return dev_priv->lvds_ssc_freq * 1000; } return 120000; } static void ironlake_set_pipeconf(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode, bool dither) { struct drm_i915_private *dev_priv = crtc->dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; uint32_t val; val = I915_READ(PIPECONF(pipe)); val &= ~PIPE_BPC_MASK; switch (intel_crtc->bpp) { case 18: val |= PIPE_6BPC; break; case 24: val |= PIPE_8BPC; break; case 30: val |= PIPE_10BPC; break; case 36: val |= PIPE_12BPC; break; default: /* Case prevented by intel_choose_pipe_bpp_dither. */ BUG(); } val &= ~(PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_MASK); if (dither) val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP); val &= ~PIPECONF_INTERLACE_MASK; if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) val |= PIPECONF_INTERLACED_ILK; else val |= PIPECONF_PROGRESSIVE; I915_WRITE(PIPECONF(pipe), val); POSTING_READ(PIPECONF(pipe)); } static void haswell_set_pipeconf(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode, bool dither) { struct drm_i915_private *dev_priv = crtc->dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; uint32_t val; val = I915_READ(PIPECONF(cpu_transcoder)); val &= ~(PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_MASK); if (dither) val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP); val &= ~PIPECONF_INTERLACE_MASK_HSW; if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) val |= PIPECONF_INTERLACED_ILK; else val |= PIPECONF_PROGRESSIVE; I915_WRITE(PIPECONF(cpu_transcoder), val); POSTING_READ(PIPECONF(cpu_transcoder)); } static bool ironlake_compute_clocks(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, bool *has_reduced_clock, intel_clock_t *reduced_clock) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_encoder *intel_encoder; int refclk; const intel_limit_t *limit; bool ret, is_sdvo = false, is_tv = false, is_lvds = false; for_each_encoder_on_crtc(dev, crtc, intel_encoder) { switch (intel_encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; case INTEL_OUTPUT_SDVO: case INTEL_OUTPUT_HDMI: is_sdvo = true; if (intel_encoder->needs_tv_clock) is_tv = true; break; case INTEL_OUTPUT_TVOUT: is_tv = true; break; } } refclk = ironlake_get_refclk(crtc); /* * Returns a set of divisors for the desired target clock with the given * refclk, or FALSE. The returned values represent the clock equation: * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2. */ limit = intel_limit(crtc, refclk); ret = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, NULL, clock); if (!ret) return false; if (is_lvds && dev_priv->lvds_downclock_avail) { /* * Ensure we match the reduced clock's P to the target clock. * If the clocks don't match, we can't switch the display clock * by using the FP0/FP1. In such case we will disable the LVDS * downclock feature. */ *has_reduced_clock = limit->find_pll(limit, crtc, dev_priv->lvds_downclock, refclk, clock, reduced_clock); } if (is_sdvo && is_tv) i9xx_adjust_sdvo_tv_clock(adjusted_mode, clock); return true; } static void cpt_enable_fdi_bc_bifurcation(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t temp; temp = I915_READ(SOUTH_CHICKEN1); if (temp & FDI_BC_BIFURCATION_SELECT) return; WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE); WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE); temp |= FDI_BC_BIFURCATION_SELECT; DRM_DEBUG_KMS("enabling fdi C rx\n"); I915_WRITE(SOUTH_CHICKEN1, temp); POSTING_READ(SOUTH_CHICKEN1); } static bool ironlake_check_fdi_lanes(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *pipe_B_crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_B]); DRM_DEBUG_KMS("checking fdi config on pipe %i, lanes %i\n", intel_crtc->pipe, intel_crtc->fdi_lanes); if (intel_crtc->fdi_lanes > 4) { DRM_DEBUG_KMS("invalid fdi lane config on pipe %i: %i lanes\n", intel_crtc->pipe, intel_crtc->fdi_lanes); /* Clamp lanes to avoid programming the hw with bogus values. */ intel_crtc->fdi_lanes = 4; return false; } if (dev_priv->num_pipe == 2) return true; switch (intel_crtc->pipe) { case PIPE_A: return true; case PIPE_B: if (dev_priv->pipe_to_crtc_mapping[PIPE_C]->enabled && intel_crtc->fdi_lanes > 2) { DRM_DEBUG_KMS("invalid shared fdi lane config on pipe %i: %i lanes\n", intel_crtc->pipe, intel_crtc->fdi_lanes); /* Clamp lanes to avoid programming the hw with bogus values. */ intel_crtc->fdi_lanes = 2; return false; } if (intel_crtc->fdi_lanes > 2) WARN_ON(I915_READ(SOUTH_CHICKEN1) & FDI_BC_BIFURCATION_SELECT); else cpt_enable_fdi_bc_bifurcation(dev); return true; case PIPE_C: if (!pipe_B_crtc->base.enabled || pipe_B_crtc->fdi_lanes <= 2) { if (intel_crtc->fdi_lanes > 2) { DRM_DEBUG_KMS("invalid shared fdi lane config on pipe %i: %i lanes\n", intel_crtc->pipe, intel_crtc->fdi_lanes); /* Clamp lanes to avoid programming the hw with bogus values. */ intel_crtc->fdi_lanes = 2; return false; } } else { DRM_DEBUG_KMS("fdi link B uses too many lanes to enable link C\n"); return false; } cpt_enable_fdi_bc_bifurcation(dev); return true; default: BUG(); } } int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) { /* * Account for spread spectrum to avoid * oversubscribing the link. Max center spread * is 2.5%; use 5% for safety's sake. */ u32 bps = target_clock * bpp * 21 / 20; return bps / (link_bw * 8) + 1; } static void ironlake_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; struct intel_encoder *intel_encoder, *edp_encoder = NULL; struct fdi_m_n m_n = {0}; int target_clock, pixel_multiplier, lane, link_bw; bool is_dp = false, is_cpu_edp = false; for_each_encoder_on_crtc(dev, crtc, intel_encoder) { switch (intel_encoder->type) { case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; case INTEL_OUTPUT_EDP: is_dp = true; if (!intel_encoder_is_pch_edp(&intel_encoder->base)) is_cpu_edp = true; edp_encoder = intel_encoder; break; } } /* FDI link */ pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); lane = 0; /* CPU eDP doesn't require FDI link, so just set DP M/N according to current link config */ if (is_cpu_edp) { intel_edp_link_config(edp_encoder, &lane, &link_bw); } else { /* FDI is a binary signal running at ~2.7GHz, encoding * each output octet as 10 bits. The actual frequency * is stored as a divider into a 100MHz clock, and the * mode pixel clock is stored in units of 1KHz. * Hence the bw of each lane in terms of the mode signal * is: */ link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10; } /* [e]DP over FDI requires target mode clock instead of link clock. */ if (edp_encoder) target_clock = intel_edp_target_clock(edp_encoder, mode); else if (is_dp) target_clock = mode->clock; else target_clock = adjusted_mode->clock; if (!lane) lane = ironlake_get_lanes_required(target_clock, link_bw, intel_crtc->bpp); intel_crtc->fdi_lanes = lane; if (pixel_multiplier > 1) link_bw *= pixel_multiplier; ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, &m_n); I915_WRITE(PIPE_DATA_M1(cpu_transcoder), TU_SIZE(m_n.tu) | m_n.gmch_m); I915_WRITE(PIPE_DATA_N1(cpu_transcoder), m_n.gmch_n); I915_WRITE(PIPE_LINK_M1(cpu_transcoder), m_n.link_m); I915_WRITE(PIPE_LINK_N1(cpu_transcoder), m_n.link_n); } static uint32_t ironlake_compute_dpll(struct intel_crtc *intel_crtc, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, u32 fp) { struct drm_crtc *crtc = &intel_crtc->base; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_encoder *intel_encoder; uint32_t dpll; int factor, pixel_multiplier, num_connectors = 0; bool is_lvds = false, is_sdvo = false, is_tv = false; bool is_dp = false, is_cpu_edp = false; for_each_encoder_on_crtc(dev, crtc, intel_encoder) { switch (intel_encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; case INTEL_OUTPUT_SDVO: case INTEL_OUTPUT_HDMI: is_sdvo = true; if (intel_encoder->needs_tv_clock) is_tv = true; break; case INTEL_OUTPUT_TVOUT: is_tv = true; break; case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; case INTEL_OUTPUT_EDP: is_dp = true; if (!intel_encoder_is_pch_edp(&intel_encoder->base)) is_cpu_edp = true; break; } num_connectors++; } /* Enable autotuning of the PLL clock (if permissible) */ factor = 21; if (is_lvds) { if ((intel_panel_use_ssc(dev_priv) && dev_priv->lvds_ssc_freq == 100) || (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) factor = 25; } else if (is_sdvo && is_tv) factor = 20; if (clock->m < factor * clock->n) fp |= FP_CB_TUNE; dpll = 0; if (is_lvds) dpll |= DPLLB_MODE_LVDS; else dpll |= DPLLB_MODE_DAC_SERIAL; if (is_sdvo) { pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); if (pixel_multiplier > 1) { dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT; } dpll |= DPLL_DVO_HIGH_SPEED; } if (is_dp && !is_cpu_edp) dpll |= DPLL_DVO_HIGH_SPEED; /* compute bitmask from p1 value */ dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; /* also FPA1 */ dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT; switch (clock->p2) { case 5: dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5; break; case 7: dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7; break; case 10: dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10; break; case 14: dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14; break; } if (is_sdvo && is_tv) dpll |= PLL_REF_INPUT_TVCLKINBC; else if (is_tv) /* XXX: just matching BIOS for now */ /* dpll |= PLL_REF_INPUT_TVCLKINBC; */ dpll |= 3; else if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN; else dpll |= PLL_REF_INPUT_DREFCLK; return dpll; } static int ironlake_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, int x, int y, struct drm_framebuffer *fb) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; int num_connectors = 0; intel_clock_t clock, reduced_clock; u32 dpll, fp = 0, fp2 = 0; bool ok, has_reduced_clock = false; bool is_lvds = false, is_dp = false, is_cpu_edp = false; struct intel_encoder *encoder; u32 temp; int ret; bool dither, fdi_config_ok; for_each_encoder_on_crtc(dev, crtc, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; case INTEL_OUTPUT_EDP: is_dp = true; if (!intel_encoder_is_pch_edp(&encoder->base)) is_cpu_edp = true; break; } num_connectors++; } WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)), "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev)); ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, &has_reduced_clock, &reduced_clock); if (!ok) { DRM_ERROR("Couldn't find PLL settings for mode!\n"); return -EINVAL; } /* Ensure that the cursor is valid for the new mode before changing... */ intel_crtc_update_cursor(crtc, true); /* determine panel color depth */ dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp, adjusted_mode); if (is_lvds && dev_priv->lvds_dither) dither = true; fp = clock.n << 16 | clock.m1 << 8 | clock.m2; if (has_reduced_clock) fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | reduced_clock.m2; dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, fp); DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe); drm_mode_debug_printmodeline(mode); /* CPU eDP is the only output that doesn't need a PCH PLL of its own. */ if (!is_cpu_edp) { struct intel_pch_pll *pll; pll = intel_get_pch_pll(intel_crtc, dpll, fp); if (pll == NULL) { DRM_DEBUG_DRIVER("failed to find PLL for pipe %d\n", pipe); return -EINVAL; } } else intel_put_pch_pll(intel_crtc); /* The LVDS pin pair needs to be on before the DPLLs are enabled. * This is an exception to the general rule that mode_set doesn't turn * things on. */ if (is_lvds) { temp = I915_READ(PCH_LVDS); temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; if (HAS_PCH_CPT(dev)) { temp &= ~PORT_TRANS_SEL_MASK; temp |= PORT_TRANS_SEL_CPT(pipe); } else { if (pipe == 1) temp |= LVDS_PIPEB_SELECT; else temp &= ~LVDS_PIPEB_SELECT; } /* set the corresponsding LVDS_BORDER bit */ temp |= dev_priv->lvds_border_bits; /* Set the B0-B3 data pairs corresponding to whether we're going to * set the DPLLs for dual-channel mode or not. */ if (clock.p2 == 7) temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP; else temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP); /* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP) * appropriately here, but we need to look more thoroughly into how * panels behave in the two modes. */ temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY); if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) temp |= LVDS_HSYNC_POLARITY; if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) temp |= LVDS_VSYNC_POLARITY; I915_WRITE(PCH_LVDS, temp); } if (is_dp && !is_cpu_edp) { intel_dp_set_m_n(crtc, mode, adjusted_mode); } else { /* For non-DP output, clear any trans DP clock recovery setting.*/ I915_WRITE(TRANSDATA_M1(pipe), 0); I915_WRITE(TRANSDATA_N1(pipe), 0); I915_WRITE(TRANSDPLINK_M1(pipe), 0); I915_WRITE(TRANSDPLINK_N1(pipe), 0); } if (intel_crtc->pch_pll) { I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); /* Wait for the clocks to stabilize. */ POSTING_READ(intel_crtc->pch_pll->pll_reg); udelay(150); /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. * * So write it again. */ I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); } intel_crtc->lowfreq_avail = false; if (intel_crtc->pch_pll) { if (is_lvds && has_reduced_clock && i915_powersave) { I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp2); intel_crtc->lowfreq_avail = true; } else { I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp); } } intel_set_pipe_timings(intel_crtc, mode, adjusted_mode); /* Note, this also computes intel_crtc->fdi_lanes which is used below in * ironlake_check_fdi_lanes. */ ironlake_set_m_n(crtc, mode, adjusted_mode); fdi_config_ok = ironlake_check_fdi_lanes(intel_crtc); if (is_cpu_edp) ironlake_set_pll_edp(crtc, adjusted_mode->clock); ironlake_set_pipeconf(crtc, adjusted_mode, dither); intel_wait_for_vblank(dev, pipe); /* Set up the display plane register */ I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); POSTING_READ(DSPCNTR(plane)); ret = intel_pipe_set_base(crtc, x, y, fb); intel_update_watermarks(dev); intel_update_linetime_watermarks(dev, pipe, adjusted_mode); return fdi_config_ok ? ret : -EINVAL; } static int haswell_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, int x, int y, struct drm_framebuffer *fb) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; int num_connectors = 0; intel_clock_t clock, reduced_clock; u32 dpll = 0, fp = 0, fp2 = 0; bool ok, has_reduced_clock = false; bool is_lvds = false, is_dp = false, is_cpu_edp = false; struct intel_encoder *encoder; u32 temp; int ret; bool dither; for_each_encoder_on_crtc(dev, crtc, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; case INTEL_OUTPUT_EDP: is_dp = true; if (!intel_encoder_is_pch_edp(&encoder->base)) is_cpu_edp = true; break; } num_connectors++; } if (is_cpu_edp) intel_crtc->cpu_transcoder = TRANSCODER_EDP; else intel_crtc->cpu_transcoder = pipe; /* We are not sure yet this won't happen. */ WARN(!HAS_PCH_LPT(dev), "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev)); WARN(num_connectors != 1, "%d connectors attached to pipe %c\n", num_connectors, pipe_name(pipe)); WARN_ON(I915_READ(PIPECONF(intel_crtc->cpu_transcoder)) & (PIPECONF_ENABLE | I965_PIPECONF_ACTIVE)); WARN_ON(I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE); if (!intel_ddi_pll_mode_set(crtc, adjusted_mode->clock)) return -EINVAL; if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, &has_reduced_clock, &reduced_clock); if (!ok) { DRM_ERROR("Couldn't find PLL settings for mode!\n"); return -EINVAL; } } /* Ensure that the cursor is valid for the new mode before changing... */ intel_crtc_update_cursor(crtc, true); /* determine panel color depth */ dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp, adjusted_mode); if (is_lvds && dev_priv->lvds_dither) dither = true; DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe); drm_mode_debug_printmodeline(mode); if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { fp = clock.n << 16 | clock.m1 << 8 | clock.m2; if (has_reduced_clock) fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | reduced_clock.m2; dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, fp); /* CPU eDP is the only output that doesn't need a PCH PLL of its * own on pre-Haswell/LPT generation */ if (!is_cpu_edp) { struct intel_pch_pll *pll; pll = intel_get_pch_pll(intel_crtc, dpll, fp); if (pll == NULL) { DRM_DEBUG_DRIVER("failed to find PLL for pipe %d\n", pipe); return -EINVAL; } } else intel_put_pch_pll(intel_crtc); /* The LVDS pin pair needs to be on before the DPLLs are * enabled. This is an exception to the general rule that * mode_set doesn't turn things on. */ if (is_lvds) { temp = I915_READ(PCH_LVDS); temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; if (HAS_PCH_CPT(dev)) { temp &= ~PORT_TRANS_SEL_MASK; temp |= PORT_TRANS_SEL_CPT(pipe); } else { if (pipe == 1) temp |= LVDS_PIPEB_SELECT; else temp &= ~LVDS_PIPEB_SELECT; } /* set the corresponsding LVDS_BORDER bit */ temp |= dev_priv->lvds_border_bits; /* Set the B0-B3 data pairs corresponding to whether * we're going to set the DPLLs for dual-channel mode or * not. */ if (clock.p2 == 7) temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP; else temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP); /* It would be nice to set 24 vs 18-bit mode * (LVDS_A3_POWER_UP) appropriately here, but we need to * look more thoroughly into how panels behave in the * two modes. */ temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY); if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) temp |= LVDS_HSYNC_POLARITY; if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) temp |= LVDS_VSYNC_POLARITY; I915_WRITE(PCH_LVDS, temp); } } if (is_dp && !is_cpu_edp) { intel_dp_set_m_n(crtc, mode, adjusted_mode); } else { if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { /* For non-DP output, clear any trans DP clock recovery * setting.*/ I915_WRITE(TRANSDATA_M1(pipe), 0); I915_WRITE(TRANSDATA_N1(pipe), 0); I915_WRITE(TRANSDPLINK_M1(pipe), 0); I915_WRITE(TRANSDPLINK_N1(pipe), 0); } } intel_crtc->lowfreq_avail = false; if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { if (intel_crtc->pch_pll) { I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); /* Wait for the clocks to stabilize. */ POSTING_READ(intel_crtc->pch_pll->pll_reg); udelay(150); /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. * * So write it again. */ I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); } if (intel_crtc->pch_pll) { if (is_lvds && has_reduced_clock && i915_powersave) { I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp2); intel_crtc->lowfreq_avail = true; } else { I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp); } } } intel_set_pipe_timings(intel_crtc, mode, adjusted_mode); if (!is_dp || is_cpu_edp) ironlake_set_m_n(crtc, mode, adjusted_mode); if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) if (is_cpu_edp) ironlake_set_pll_edp(crtc, adjusted_mode->clock); haswell_set_pipeconf(crtc, adjusted_mode, dither); /* Set up the display plane register */ I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); POSTING_READ(DSPCNTR(plane)); ret = intel_pipe_set_base(crtc, x, y, fb); intel_update_watermarks(dev); intel_update_linetime_watermarks(dev, pipe, adjusted_mode); return ret; } static int intel_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, int x, int y, struct drm_framebuffer *fb) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_encoder_helper_funcs *encoder_funcs; struct intel_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int ret; drm_vblank_pre_modeset(dev, pipe); ret = dev_priv->display.crtc_mode_set(crtc, mode, adjusted_mode, x, y, fb); drm_vblank_post_modeset(dev, pipe); if (ret != 0) return ret; for_each_encoder_on_crtc(dev, crtc, encoder) { DRM_DEBUG_KMS("[ENCODER:%d:%s] set [MODE:%d:%s]\n", encoder->base.base.id, drm_get_encoder_name(&encoder->base), mode->base.id, mode->name); encoder_funcs = encoder->base.helper_private; encoder_funcs->mode_set(&encoder->base, mode, adjusted_mode); } return 0; } static bool intel_eld_uptodate(struct drm_connector *connector, int reg_eldv, uint32_t bits_eldv, int reg_elda, uint32_t bits_elda, int reg_edid) { struct drm_i915_private *dev_priv = connector->dev->dev_private; uint8_t *eld = connector->eld; uint32_t i; i = I915_READ(reg_eldv); i &= bits_eldv; if (!eld[0]) return !i; if (!i) return false; i = I915_READ(reg_elda); i &= ~bits_elda; I915_WRITE(reg_elda, i); for (i = 0; i < eld[2]; i++) if (I915_READ(reg_edid) != *((uint32_t *)eld + i)) return false; return true; } static void g4x_write_eld(struct drm_connector *connector, struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = connector->dev->dev_private; uint8_t *eld = connector->eld; uint32_t eldv; uint32_t len; uint32_t i; i = I915_READ(G4X_AUD_VID_DID); if (i == INTEL_AUDIO_DEVBLC || i == INTEL_AUDIO_DEVCL) eldv = G4X_ELDV_DEVCL_DEVBLC; else eldv = G4X_ELDV_DEVCTG; if (intel_eld_uptodate(connector, G4X_AUD_CNTL_ST, eldv, G4X_AUD_CNTL_ST, G4X_ELD_ADDR, G4X_HDMIW_HDMIEDID)) return; i = I915_READ(G4X_AUD_CNTL_ST); i &= ~(eldv | G4X_ELD_ADDR); len = (i >> 9) & 0x1f; /* ELD buffer size */ I915_WRITE(G4X_AUD_CNTL_ST, i); if (!eld[0]) return; len = min_t(uint8_t, eld[2], len); DRM_DEBUG_DRIVER("ELD size %d\n", len); for (i = 0; i < len; i++) I915_WRITE(G4X_HDMIW_HDMIEDID, *((uint32_t *)eld + i)); i = I915_READ(G4X_AUD_CNTL_ST); i |= eldv; I915_WRITE(G4X_AUD_CNTL_ST, i); } static void haswell_write_eld(struct drm_connector *connector, struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = connector->dev->dev_private; uint8_t *eld = connector->eld; struct drm_device *dev = crtc->dev; uint32_t eldv; uint32_t i; int len; int pipe = to_intel_crtc(crtc)->pipe; int tmp; int hdmiw_hdmiedid = HSW_AUD_EDID_DATA(pipe); int aud_cntl_st = HSW_AUD_DIP_ELD_CTRL(pipe); int aud_config = HSW_AUD_CFG(pipe); int aud_cntrl_st2 = HSW_AUD_PIN_ELD_CP_VLD; DRM_DEBUG_DRIVER("HDMI: Haswell Audio initialize....\n"); /* Audio output enable */ DRM_DEBUG_DRIVER("HDMI audio: enable codec\n"); tmp = I915_READ(aud_cntrl_st2); tmp |= (AUDIO_OUTPUT_ENABLE_A << (pipe * 4)); I915_WRITE(aud_cntrl_st2, tmp); /* Wait for 1 vertical blank */ intel_wait_for_vblank(dev, pipe); /* Set ELD valid state */ tmp = I915_READ(aud_cntrl_st2); DRM_DEBUG_DRIVER("HDMI audio: pin eld vld status=0x%8x\n", tmp); tmp |= (AUDIO_ELD_VALID_A << (pipe * 4)); I915_WRITE(aud_cntrl_st2, tmp); tmp = I915_READ(aud_cntrl_st2); DRM_DEBUG_DRIVER("HDMI audio: eld vld status=0x%8x\n", tmp); /* Enable HDMI mode */ tmp = I915_READ(aud_config); DRM_DEBUG_DRIVER("HDMI audio: audio conf: 0x%8x\n", tmp); /* clear N_programing_enable and N_value_index */ tmp &= ~(AUD_CONFIG_N_VALUE_INDEX | AUD_CONFIG_N_PROG_ENABLE); I915_WRITE(aud_config, tmp); DRM_DEBUG_DRIVER("ELD on pipe %c\n", pipe_name(pipe)); eldv = AUDIO_ELD_VALID_A << (pipe * 4); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) { DRM_DEBUG_DRIVER("ELD: DisplayPort detected\n"); eld[5] |= (1 << 2); /* Conn_Type, 0x1 = DisplayPort */ I915_WRITE(aud_config, AUD_CONFIG_N_VALUE_INDEX); /* 0x1 = DP */ } else I915_WRITE(aud_config, 0); if (intel_eld_uptodate(connector, aud_cntrl_st2, eldv, aud_cntl_st, IBX_ELD_ADDRESS, hdmiw_hdmiedid)) return; i = I915_READ(aud_cntrl_st2); i &= ~eldv; I915_WRITE(aud_cntrl_st2, i); if (!eld[0]) return; i = I915_READ(aud_cntl_st); i &= ~IBX_ELD_ADDRESS; I915_WRITE(aud_cntl_st, i); i = (i >> 29) & DIP_PORT_SEL_MASK; /* DIP_Port_Select, 0x1 = PortB */ DRM_DEBUG_DRIVER("port num:%d\n", i); len = min_t(uint8_t, eld[2], 21); /* 84 bytes of hw ELD buffer */ DRM_DEBUG_DRIVER("ELD size %d\n", len); for (i = 0; i < len; i++) I915_WRITE(hdmiw_hdmiedid, *((uint32_t *)eld + i)); i = I915_READ(aud_cntrl_st2); i |= eldv; I915_WRITE(aud_cntrl_st2, i); } static void ironlake_write_eld(struct drm_connector *connector, struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = connector->dev->dev_private; uint8_t *eld = connector->eld; uint32_t eldv; uint32_t i; int len; int hdmiw_hdmiedid; int aud_config; int aud_cntl_st; int aud_cntrl_st2; int pipe = to_intel_crtc(crtc)->pipe; if (HAS_PCH_IBX(connector->dev)) { hdmiw_hdmiedid = IBX_HDMIW_HDMIEDID(pipe); aud_config = IBX_AUD_CFG(pipe); aud_cntl_st = IBX_AUD_CNTL_ST(pipe); aud_cntrl_st2 = IBX_AUD_CNTL_ST2; } else { hdmiw_hdmiedid = CPT_HDMIW_HDMIEDID(pipe); aud_config = CPT_AUD_CFG(pipe); aud_cntl_st = CPT_AUD_CNTL_ST(pipe); aud_cntrl_st2 = CPT_AUD_CNTRL_ST2; } DRM_DEBUG_DRIVER("ELD on pipe %c\n", pipe_name(pipe)); i = I915_READ(aud_cntl_st); i = (i >> 29) & DIP_PORT_SEL_MASK; /* DIP_Port_Select, 0x1 = PortB */ if (!i) { DRM_DEBUG_DRIVER("Audio directed to unknown port\n"); /* operate blindly on all ports */ eldv = IBX_ELD_VALIDB; eldv |= IBX_ELD_VALIDB << 4; eldv |= IBX_ELD_VALIDB << 8; } else { DRM_DEBUG_DRIVER("ELD on port %c\n", 'A' + i); eldv = IBX_ELD_VALIDB << ((i - 1) * 4); } if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) { DRM_DEBUG_DRIVER("ELD: DisplayPort detected\n"); eld[5] |= (1 << 2); /* Conn_Type, 0x1 = DisplayPort */ I915_WRITE(aud_config, AUD_CONFIG_N_VALUE_INDEX); /* 0x1 = DP */ } else I915_WRITE(aud_config, 0); if (intel_eld_uptodate(connector, aud_cntrl_st2, eldv, aud_cntl_st, IBX_ELD_ADDRESS, hdmiw_hdmiedid)) return; i = I915_READ(aud_cntrl_st2); i &= ~eldv; I915_WRITE(aud_cntrl_st2, i); if (!eld[0]) return; i = I915_READ(aud_cntl_st); i &= ~IBX_ELD_ADDRESS; I915_WRITE(aud_cntl_st, i); len = min_t(uint8_t, eld[2], 21); /* 84 bytes of hw ELD buffer */ DRM_DEBUG_DRIVER("ELD size %d\n", len); for (i = 0; i < len; i++) I915_WRITE(hdmiw_hdmiedid, *((uint32_t *)eld + i)); i = I915_READ(aud_cntrl_st2); i |= eldv; I915_WRITE(aud_cntrl_st2, i); } void intel_write_eld(struct drm_encoder *encoder, struct drm_display_mode *mode) { struct drm_crtc *crtc = encoder->crtc; struct drm_connector *connector; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = dev->dev_private; connector = drm_select_eld(encoder, mode); if (!connector) return; DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, drm_get_connector_name(connector), connector->encoder->base.id, drm_get_encoder_name(connector->encoder)); connector->eld[6] = drm_av_sync_delay(connector, mode) / 2; if (dev_priv->display.write_eld) dev_priv->display.write_eld(connector, crtc); } /** Loads the palette/gamma unit for the CRTC with the prepared values */ void intel_crtc_load_lut(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int palreg = PALETTE(intel_crtc->pipe); int i; /* The clocks have to be on to load the palette. */ if (!crtc->enabled || !intel_crtc->active) return; /* use legacy palette for Ironlake */ if (HAS_PCH_SPLIT(dev)) palreg = LGC_PALETTE(intel_crtc->pipe); for (i = 0; i < 256; i++) { I915_WRITE(palreg + 4 * i, (intel_crtc->lut_r[i] << 16) | (intel_crtc->lut_g[i] << 8) | intel_crtc->lut_b[i]); } } static void i845_update_cursor(struct drm_crtc *crtc, u32 base) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); bool visible = base != 0; u32 cntl; if (intel_crtc->cursor_visible == visible) return; cntl = I915_READ(_CURACNTR); if (visible) { /* On these chipsets we can only modify the base whilst * the cursor is disabled. */ I915_WRITE(_CURABASE, base); cntl &= ~(CURSOR_FORMAT_MASK); /* XXX width must be 64, stride 256 => 0x00 << 28 */ cntl |= CURSOR_ENABLE | CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB; } else cntl &= ~(CURSOR_ENABLE | CURSOR_GAMMA_ENABLE); I915_WRITE(_CURACNTR, cntl); intel_crtc->cursor_visible = visible; } static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; bool visible = base != 0; if (intel_crtc->cursor_visible != visible) { uint32_t cntl = I915_READ(CURCNTR(pipe)); if (base) { cntl &= ~(CURSOR_MODE | MCURSOR_PIPE_SELECT); cntl |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; cntl |= pipe << 28; /* Connect to correct pipe */ } else { cntl &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE); cntl |= CURSOR_MODE_DISABLE; } I915_WRITE(CURCNTR(pipe), cntl); intel_crtc->cursor_visible = visible; } /* and commit changes on next vblank */ I915_WRITE(CURBASE(pipe), base); } static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; bool visible = base != 0; if (intel_crtc->cursor_visible != visible) { uint32_t cntl = I915_READ(CURCNTR_IVB(pipe)); if (base) { cntl &= ~CURSOR_MODE; cntl |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; } else { cntl &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE); cntl |= CURSOR_MODE_DISABLE; } I915_WRITE(CURCNTR_IVB(pipe), cntl); intel_crtc->cursor_visible = visible; } /* and commit changes on next vblank */ I915_WRITE(CURBASE_IVB(pipe), base); } /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int x = intel_crtc->cursor_x; int y = intel_crtc->cursor_y; u32 base, pos; bool visible; pos = 0; if (on && crtc->enabled && crtc->fb) { base = intel_crtc->cursor_addr; if (x > (int) crtc->fb->width) base = 0; if (y > (int) crtc->fb->height) base = 0; } else base = 0; if (x < 0) { if (x + intel_crtc->cursor_width < 0) base = 0; pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT; x = -x; } pos |= x << CURSOR_X_SHIFT; if (y < 0) { if (y + intel_crtc->cursor_height < 0) base = 0; pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT; y = -y; } pos |= y << CURSOR_Y_SHIFT; visible = base != 0; if (!visible && !intel_crtc->cursor_visible) return; if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { I915_WRITE(CURPOS_IVB(pipe), pos); ivb_update_cursor(crtc, base); } else { I915_WRITE(CURPOS(pipe), pos); if (IS_845G(dev) || IS_I865G(dev)) i845_update_cursor(crtc, base); else i9xx_update_cursor(crtc, base); } } static int intel_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file, uint32_t handle, uint32_t width, uint32_t height) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_gem_object *obj; uint32_t addr; int ret; /* if we want to turn off the cursor ignore width and height */ if (!handle) { DRM_DEBUG_KMS("cursor off\n"); addr = 0; obj = NULL; DRM_LOCK(dev); goto finish; } /* Currently we only support 64x64 cursors */ if (width != 64 || height != 64) { DRM_ERROR("we currently only support 64x64 cursors\n"); return -EINVAL; } obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); if (&obj->base == NULL) return -ENOENT; if (obj->base.size < width * height * 4) { DRM_ERROR("buffer is to small\n"); ret = -ENOMEM; goto fail; } /* we only need to pin inside GTT if cursor is non-phy */ DRM_LOCK(dev); if (!dev_priv->info->cursor_needs_physical) { if (obj->tiling_mode) { DRM_ERROR("cursor cannot be tiled\n"); ret = -EINVAL; goto fail_locked; } ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL); if (ret) { DRM_ERROR("failed to move cursor bo into the GTT\n"); goto fail_locked; } ret = i915_gem_object_put_fence(obj); if (ret) { DRM_ERROR("failed to release fence for cursor"); goto fail_unpin; } addr = obj->gtt_offset; } else { int align = IS_I830(dev) ? 16 * 1024 : 256; ret = i915_gem_attach_phys_object(dev, obj, (intel_crtc->pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1, align); if (ret) { DRM_ERROR("failed to attach phys object\n"); goto fail_locked; } addr = obj->phys_obj->handle->busaddr; } if (IS_GEN2(dev)) I915_WRITE(CURSIZE, (height << 12) | width); finish: if (intel_crtc->cursor_bo) { if (dev_priv->info->cursor_needs_physical) { if (intel_crtc->cursor_bo != obj) i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo); } else i915_gem_object_unpin(intel_crtc->cursor_bo); drm_gem_object_unreference(&intel_crtc->cursor_bo->base); } DRM_UNLOCK(dev); intel_crtc->cursor_addr = addr; intel_crtc->cursor_bo = obj; intel_crtc->cursor_width = width; intel_crtc->cursor_height = height; intel_crtc_update_cursor(crtc, true); return 0; fail_unpin: i915_gem_object_unpin(obj); fail_locked: DRM_UNLOCK(dev); fail: drm_gem_object_unreference_unlocked(&obj->base); return ret; } static int intel_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); intel_crtc->cursor_x = x; intel_crtc->cursor_y = y; intel_crtc_update_cursor(crtc, true); return 0; } /** Sets the color ramps on behalf of RandR */ void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, u16 blue, int regno) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); intel_crtc->lut_r[regno] = red >> 8; intel_crtc->lut_g[regno] = green >> 8; intel_crtc->lut_b[regno] = blue >> 8; } void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, int regno) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); *red = intel_crtc->lut_r[regno] << 8; *green = intel_crtc->lut_g[regno] << 8; *blue = intel_crtc->lut_b[regno] << 8; } static void intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t start, uint32_t size) { int end = (start + size > 256) ? 256 : start + size, i; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); for (i = start; i < end; i++) { intel_crtc->lut_r[i] = red[i] >> 8; intel_crtc->lut_g[i] = green[i] >> 8; intel_crtc->lut_b[i] = blue[i] >> 8; } intel_crtc_load_lut(crtc); } /** * Get a pipe with a simple mode set on it for doing load-based monitor * detection. * * It will be up to the load-detect code to adjust the pipe as appropriate for * its requirements. The pipe will be connected to no other encoders. * * Currently this code will only succeed if there is a pipe with no encoders * configured for it. In the future, it could choose to temporarily disable * some outputs to free up a pipe for its use. * * \return crtc, or NULL if no pipes are available. */ /* VESA 640x480x72Hz mode to set on the pipe */ static struct drm_display_mode load_detect_mode = { DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664, 704, 832, 0, 480, 489, 491, 520, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), }; static int intel_framebuffer_create(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_i915_gem_object *obj, struct drm_framebuffer **res) { struct intel_framebuffer *intel_fb; int ret; intel_fb = malloc(sizeof(*intel_fb), DRM_MEM_KMS, M_WAITOK | M_ZERO); if (!intel_fb) { drm_gem_object_unreference_unlocked(&obj->base); return -ENOMEM; } ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); if (ret) { drm_gem_object_unreference_unlocked(&obj->base); free(intel_fb, DRM_MEM_KMS); return ret; } *res = &intel_fb->base; return 0; } static u32 intel_framebuffer_pitch_for_width(int width, int bpp) { u32 pitch = DIV_ROUND_UP(width * bpp, 8); return roundup2(pitch, 64); } static u32 intel_framebuffer_size_for_mode(struct drm_display_mode *mode, int bpp) { u32 pitch = intel_framebuffer_pitch_for_width(mode->hdisplay, bpp); return roundup2(pitch * mode->vdisplay, PAGE_SIZE); } static int intel_framebuffer_create_for_mode(struct drm_device *dev, struct drm_display_mode *mode, int depth, int bpp, struct drm_framebuffer **res) { struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = { 0 }; obj = i915_gem_alloc_object(dev, intel_framebuffer_size_for_mode(mode, bpp)); if (obj == NULL) return -ENOMEM; mode_cmd.width = mode->hdisplay; mode_cmd.height = mode->vdisplay; mode_cmd.pitches[0] = intel_framebuffer_pitch_for_width(mode_cmd.width, bpp); mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); return intel_framebuffer_create(dev, &mode_cmd, obj, res); } static struct drm_framebuffer * mode_fits_in_fbdev(struct drm_device *dev, struct drm_display_mode *mode) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct drm_framebuffer *fb; if (dev_priv->fbdev == NULL) return NULL; obj = dev_priv->fbdev->ifb.obj; if (obj == NULL) return NULL; fb = &dev_priv->fbdev->ifb.base; if (fb->pitches[0] < intel_framebuffer_pitch_for_width(mode->hdisplay, fb->bits_per_pixel)) return NULL; if (obj->base.size < mode->vdisplay * fb->pitches[0]) return NULL; return fb; } bool intel_get_load_detect_pipe(struct drm_connector *connector, struct drm_display_mode *mode, struct intel_load_detect_pipe *old) { struct intel_crtc *intel_crtc; struct intel_encoder *intel_encoder = intel_attached_encoder(connector); struct drm_crtc *possible_crtc; struct drm_encoder *encoder = &intel_encoder->base; struct drm_crtc *crtc = NULL; struct drm_device *dev = encoder->dev; struct drm_framebuffer *fb; int i = -1; int ret; DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, drm_get_connector_name(connector), encoder->base.id, drm_get_encoder_name(encoder)); /* * Algorithm gets a little messy: * * - if the connector already has an assigned crtc, use it (but make * sure it's on first) * * - try to find the first unused crtc that can drive this connector, * and use that if we find one */ /* See if we already have a CRTC for this connector */ if (encoder->crtc) { crtc = encoder->crtc; old->dpms_mode = connector->dpms; old->load_detect_temp = false; /* Make sure the crtc and connector are running */ if (connector->dpms != DRM_MODE_DPMS_ON) connector->funcs->dpms(connector, DRM_MODE_DPMS_ON); return true; } /* Find an unused one (if possible) */ list_for_each_entry(possible_crtc, &dev->mode_config.crtc_list, head) { i++; if (!(encoder->possible_crtcs & (1 << i))) continue; if (!possible_crtc->enabled) { crtc = possible_crtc; break; } } /* * If we didn't find an unused CRTC, don't use any. */ if (!crtc) { DRM_DEBUG_KMS("no pipe available for load-detect\n"); return false; } intel_encoder->new_crtc = to_intel_crtc(crtc); to_intel_connector(connector)->new_encoder = intel_encoder; intel_crtc = to_intel_crtc(crtc); old->dpms_mode = connector->dpms; old->load_detect_temp = true; old->release_fb = NULL; if (!mode) mode = &load_detect_mode; /* We need a framebuffer large enough to accommodate all accesses * that the plane may generate whilst we perform load detection. * We can not rely on the fbcon either being present (we get called * during its initialisation to detect all boot displays, or it may * not even exist) or that it is large enough to satisfy the * requested mode. */ ret = 0; fb = mode_fits_in_fbdev(dev, mode); if (fb == NULL) { DRM_DEBUG_KMS("creating tmp fb for load-detection\n"); ret = intel_framebuffer_create_for_mode(dev, mode, 24, 32, &fb); old->release_fb = fb; } else DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n"); if (ret) { DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n"); return false; } if (!intel_set_mode(crtc, mode, 0, 0, fb)) { DRM_DEBUG_KMS("failed to set mode on load-detect pipe\n"); if (old->release_fb) old->release_fb->funcs->destroy(old->release_fb); return false; } /* let the connector get through one full cycle before testing */ intel_wait_for_vblank(dev, intel_crtc->pipe); return true; } void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old) { struct intel_encoder *intel_encoder = intel_attached_encoder(connector); struct drm_encoder *encoder = &intel_encoder->base; DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, drm_get_connector_name(connector), encoder->base.id, drm_get_encoder_name(encoder)); if (old->load_detect_temp) { struct drm_crtc *crtc = encoder->crtc; to_intel_connector(connector)->new_encoder = NULL; intel_encoder->new_crtc = NULL; intel_set_mode(crtc, NULL, 0, 0, NULL); if (old->release_fb) old->release_fb->funcs->destroy(old->release_fb); return; } /* Switch crtc and encoder back off if necessary */ if (old->dpms_mode != DRM_MODE_DPMS_ON) connector->funcs->dpms(connector, old->dpms_mode); } /* Returns the clock of the currently programmed mode of the given pipe. */ static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; u32 dpll = I915_READ(DPLL(pipe)); u32 fp; intel_clock_t clock; if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0) fp = I915_READ(FP0(pipe)); else fp = I915_READ(FP1(pipe)); clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT; if (IS_PINEVIEW(dev)) { clock.n = ffs((fp & FP_N_PINEVIEW_DIV_MASK) >> FP_N_DIV_SHIFT) - 1; clock.m2 = (fp & FP_M2_PINEVIEW_DIV_MASK) >> FP_M2_DIV_SHIFT; } else { clock.n = (fp & FP_N_DIV_MASK) >> FP_N_DIV_SHIFT; clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT; } if (!IS_GEN2(dev)) { if (IS_PINEVIEW(dev)) clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >> DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW); else clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK) >> DPLL_FPA01_P1_POST_DIV_SHIFT); switch (dpll & DPLL_MODE_MASK) { case DPLLB_MODE_DAC_SERIAL: clock.p2 = dpll & DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 ? 5 : 10; break; case DPLLB_MODE_LVDS: clock.p2 = dpll & DPLLB_LVDS_P2_CLOCK_DIV_7 ? 7 : 14; break; default: DRM_DEBUG_KMS("Unknown DPLL mode %08x in programmed " "mode\n", (int)(dpll & DPLL_MODE_MASK)); return 0; } /* XXX: Handle the 100Mhz refclk */ intel_clock(dev, 96000, &clock); } else { bool is_lvds = (pipe == 1) && (I915_READ(LVDS) & LVDS_PORT_EN); if (is_lvds) { clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS) >> DPLL_FPA01_P1_POST_DIV_SHIFT); clock.p2 = 14; if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN) { /* XXX: might not be 66MHz */ intel_clock(dev, 66000, &clock); } else intel_clock(dev, 48000, &clock); } else { if (dpll & PLL_P1_DIVIDE_BY_TWO) clock.p1 = 2; else { clock.p1 = ((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830) >> DPLL_FPA01_P1_POST_DIV_SHIFT) + 2; } if (dpll & PLL_P2_DIVIDE_BY_4) clock.p2 = 4; else clock.p2 = 2; intel_clock(dev, 48000, &clock); } } /* XXX: It would be nice to validate the clocks, but we can't reuse * i830PllIsValid() because it relies on the xf86_config connector * configuration being accurate, which it isn't necessarily. */ return clock.dot; } /** Returns the currently programmed mode of the given pipe. */ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder; struct drm_display_mode *mode; int htot = I915_READ(HTOTAL(cpu_transcoder)); int hsync = I915_READ(HSYNC(cpu_transcoder)); int vtot = I915_READ(VTOTAL(cpu_transcoder)); int vsync = I915_READ(VSYNC(cpu_transcoder)); mode = malloc(sizeof(*mode), DRM_MEM_KMS, M_WAITOK | M_ZERO); if (!mode) return NULL; mode->clock = intel_crtc_clock_get(dev, crtc); mode->hdisplay = (htot & 0xffff) + 1; mode->htotal = ((htot & 0xffff0000) >> 16) + 1; mode->hsync_start = (hsync & 0xffff) + 1; mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1; mode->vdisplay = (vtot & 0xffff) + 1; mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1; mode->vsync_start = (vsync & 0xffff) + 1; mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1; drm_mode_set_name(mode); return mode; } static void intel_increase_pllclock(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; drm_i915_private_t *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; int dpll_reg = DPLL(pipe); int dpll; if (HAS_PCH_SPLIT(dev)) return; if (!dev_priv->lvds_downclock_avail) return; dpll = I915_READ(dpll_reg); if (!HAS_PIPE_CXSR(dev) && (dpll & DISPLAY_RATE_SELECT_FPA1)) { DRM_DEBUG_DRIVER("upclocking LVDS\n"); assert_panel_unlocked(dev_priv, pipe); dpll &= ~DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); intel_wait_for_vblank(dev, pipe); dpll = I915_READ(dpll_reg); if (dpll & DISPLAY_RATE_SELECT_FPA1) DRM_DEBUG_DRIVER("failed to upclock LVDS!\n"); } } static void intel_decrease_pllclock(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; drm_i915_private_t *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); if (HAS_PCH_SPLIT(dev)) return; if (!dev_priv->lvds_downclock_avail) return; /* * Since this is called by a timer, we should never get here in * the manual case. */ if (!HAS_PIPE_CXSR(dev) && intel_crtc->lowfreq_avail) { int pipe = intel_crtc->pipe; int dpll_reg = DPLL(pipe); int dpll; DRM_DEBUG_DRIVER("downclocking LVDS\n"); assert_panel_unlocked(dev_priv, pipe); dpll = I915_READ(dpll_reg); dpll |= DISPLAY_RATE_SELECT_FPA1; I915_WRITE(dpll_reg, dpll); intel_wait_for_vblank(dev, pipe); dpll = I915_READ(dpll_reg); if (!(dpll & DISPLAY_RATE_SELECT_FPA1)) DRM_DEBUG_DRIVER("failed to downclock LVDS!\n"); } } void intel_mark_busy(struct drm_device *dev) { i915_update_gfx_val(dev->dev_private); } void intel_mark_idle(struct drm_device *dev) { struct drm_crtc *crtc; if (!i915_powersave) return; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (!crtc->fb) continue; intel_decrease_pllclock(crtc); } } void intel_mark_fb_busy(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_crtc *crtc; if (!i915_powersave) return; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (!crtc->fb) continue; if (to_intel_framebuffer(crtc->fb)->obj == obj) intel_increase_pllclock(crtc); } } static void intel_crtc_destroy(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_unpin_work *work; mtx_lock(&dev->event_lock); work = intel_crtc->unpin_work; intel_crtc->unpin_work = NULL; mtx_unlock(&dev->event_lock); if (work) { taskqueue_cancel(dev_priv->wq, &work->work, NULL); taskqueue_drain(dev_priv->wq, &work->work); free(work, DRM_MEM_KMS); } drm_crtc_cleanup(crtc); free(intel_crtc, DRM_MEM_KMS); } static void intel_unpin_work_fn(void *arg, int pending) { struct intel_unpin_work *work = arg; struct drm_device *dev = work->crtc->dev; DRM_LOCK(dev); intel_unpin_fb_obj(work->old_fb_obj); drm_gem_object_unreference(&work->pending_flip_obj->base); drm_gem_object_unreference(&work->old_fb_obj->base); intel_update_fbc(dev); DRM_UNLOCK(dev); BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0); atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count); free(work, DRM_MEM_KMS); } static void do_intel_finish_page_flip(struct drm_device *dev, struct drm_crtc *crtc) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_unpin_work *work; struct drm_i915_gem_object *obj; /* Ignore early vblank irqs */ if (intel_crtc == NULL) return; mtx_lock(&dev->event_lock); work = intel_crtc->unpin_work; /* Ensure we don't miss a work->pending update ... */ smp_rmb(); if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) { mtx_unlock(&dev->event_lock); return; } /* and that the unpin work is consistent wrt ->pending. */ smp_rmb(); intel_crtc->unpin_work = NULL; if (work->event) drm_send_vblank_event(dev, intel_crtc->pipe, work->event); drm_vblank_put(dev, intel_crtc->pipe); mtx_unlock(&dev->event_lock); obj = work->old_fb_obj; atomic_clear_mask(1 << intel_crtc->plane, &obj->pending_flip); wake_up(&dev_priv->pending_flip_queue); taskqueue_enqueue(dev_priv->wq, &work->work); CTR2(KTR_DRM, "i915_flip_complete %d %p", intel_crtc->plane, work->pending_flip_obj); } void intel_finish_page_flip(struct drm_device *dev, int pipe) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; do_intel_finish_page_flip(dev, crtc); } void intel_finish_page_flip_plane(struct drm_device *dev, int plane) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_crtc *crtc = dev_priv->plane_to_crtc_mapping[plane]; do_intel_finish_page_flip(dev, crtc); } void intel_prepare_page_flip(struct drm_device *dev, int plane) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]); /* NB: An MMIO update of the plane base pointer will also * generate a page-flip completion irq, i.e. every modeset * is also accompanied by a spurious intel_prepare_page_flip(). */ mtx_lock(&dev->event_lock); if (intel_crtc->unpin_work) atomic_inc_not_zero(&intel_crtc->unpin_work->pending); mtx_unlock(&dev->event_lock); } inline static void intel_mark_page_flip_active(struct intel_crtc *intel_crtc) { /* Ensure that the work item is consistent when activating it ... */ smp_wmb(); atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); /* and that it is marked active as soon as the irq could fire. */ smp_wmb(); } static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; ret = intel_ring_begin(ring, 6); if (ret) goto err_unpin; /* Can't queue multiple flips, so wait for the previous * one to finish before executing the next. */ if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); intel_ring_emit(ring, fb->pitches[0]); intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); intel_ring_emit(ring, 0); /* aux display base address, unused */ intel_mark_page_flip_active(intel_crtc); intel_ring_advance(ring); return 0; err_unpin: intel_unpin_fb_obj(obj); err: return ret; } static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; ret = intel_ring_begin(ring, 6); if (ret) goto err_unpin; if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); intel_ring_emit(ring, fb->pitches[0]); intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); intel_ring_emit(ring, MI_NOOP); intel_mark_page_flip_active(intel_crtc); intel_ring_advance(ring); return 0; err_unpin: intel_unpin_fb_obj(obj); err: return ret; } static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; ret = intel_ring_begin(ring, 4); if (ret) goto err_unpin; /* i965+ uses the linear or tiled offsets from the * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); intel_ring_emit(ring, fb->pitches[0]); intel_ring_emit(ring, (obj->gtt_offset + intel_crtc->dspaddr_offset) | obj->tiling_mode); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. * pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE; */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); intel_ring_advance(ring); return 0; err_unpin: intel_unpin_fb_obj(obj); err: return ret; } static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; uint32_t pf, pipesrc; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; ret = intel_ring_begin(ring, 4); if (ret) goto err_unpin; intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode); intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page * flipping with a non-native mode, and worse causes a normal * modeset to fail. * pf = I915_READ(PF_CTL(intel_crtc->pipe)) & PF_ENABLE; */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); intel_ring_advance(ring); return 0; err_unpin: intel_unpin_fb_obj(obj); err: return ret; } /* * On gen7 we currently use the blit ring because (in early silicon at least) * the render ring doesn't give us interrpts for page flip completion, which * means clients will hang after the first flip is queued. Fortunately the * blit ring generates interrupts properly, so use it instead. */ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; uint32_t plane_bit = 0; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; switch(intel_crtc->plane) { case PLANE_A: plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A; break; case PLANE_B: plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B; break; case PLANE_C: plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C; break; default: WARN_ONCE(1, "unknown plane in flip command\n"); ret = -ENODEV; goto err_unpin; } ret = intel_ring_begin(ring, 4); if (ret) goto err_unpin; intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); intel_ring_emit(ring, (MI_NOOP)); intel_mark_page_flip_active(intel_crtc); intel_ring_advance(ring); return 0; err_unpin: intel_unpin_fb_obj(obj); err: return ret; } static int intel_default_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj) { return -ENODEV; } static int intel_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_framebuffer *old_fb = crtc->fb; struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)->obj; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_unpin_work *work; int ret; /* Can't change pixel format via MI display flips. */ if (fb->pixel_format != crtc->fb->pixel_format) return -EINVAL; /* * TILEOFF/LINOFF registers can't be changed via MI display flips. * Note that pitch changes could also affect these register. */ if (INTEL_INFO(dev)->gen > 3 && (fb->offsets[0] != crtc->fb->offsets[0] || fb->pitches[0] != crtc->fb->pitches[0])) return -EINVAL; work = malloc(sizeof *work, DRM_MEM_KMS, M_WAITOK | M_ZERO); if (work == NULL) return -ENOMEM; work->event = event; work->crtc = crtc; work->old_fb_obj = to_intel_framebuffer(old_fb)->obj; TASK_INIT(&work->work, 0, intel_unpin_work_fn, work); ret = drm_vblank_get(dev, intel_crtc->pipe); if (ret) goto free_work; /* We borrow the event spin lock for protecting unpin_work */ mtx_lock(&dev->event_lock); if (intel_crtc->unpin_work) { mtx_unlock(&dev->event_lock); free(work, DRM_MEM_KMS); drm_vblank_put(dev, intel_crtc->pipe); DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); return -EBUSY; } intel_crtc->unpin_work = work; mtx_unlock(&dev->event_lock); if (atomic_read(&intel_crtc->unpin_work_count) >= 2) taskqueue_drain_all(dev_priv->wq); ret = i915_mutex_lock_interruptible(dev); if (ret) goto cleanup; /* Reference the objects for the scheduled work. */ drm_gem_object_reference(&work->old_fb_obj->base); drm_gem_object_reference(&obj->base); crtc->fb = fb; work->pending_flip_obj = obj; work->enable_stall_check = true; /* Block clients from rendering to the new back buffer until * the flip occurs and the object is no longer visible. */ atomic_add(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip); atomic_inc(&intel_crtc->unpin_work_count); ret = dev_priv->display.queue_flip(dev, crtc, fb, obj); if (ret) goto cleanup_pending; intel_disable_fbc(dev); intel_mark_fb_busy(obj); DRM_UNLOCK(dev); CTR2(KTR_DRM, "i915_flip_request %d %p", intel_crtc->plane, obj); return 0; cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); crtc->fb = old_fb; atomic_sub(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip); drm_gem_object_unreference(&work->old_fb_obj->base); drm_gem_object_unreference(&obj->base); DRM_UNLOCK(dev); cleanup: mtx_lock(&dev->event_lock); intel_crtc->unpin_work = NULL; mtx_unlock(&dev->event_lock); drm_vblank_put(dev, intel_crtc->pipe); free_work: free(work, DRM_MEM_KMS); return ret; } static struct drm_crtc_helper_funcs intel_helper_funcs = { .mode_set_base_atomic = intel_pipe_set_base_atomic, .load_lut = intel_crtc_load_lut, .disable = intel_crtc_noop, }; bool intel_encoder_check_is_cloned(struct intel_encoder *encoder) { struct intel_encoder *other_encoder; struct drm_crtc *crtc = &encoder->new_crtc->base; if (WARN_ON(!crtc)) return false; list_for_each_entry(other_encoder, &crtc->dev->mode_config.encoder_list, base.head) { if (&other_encoder->new_crtc->base != crtc || encoder == other_encoder) continue; else return true; } return false; } static bool intel_encoder_crtc_ok(struct drm_encoder *encoder, struct drm_crtc *crtc) { struct drm_device *dev; struct drm_crtc *tmp; int crtc_mask = 1; WARN(!crtc, "checking null crtc?\n"); dev = crtc->dev; list_for_each_entry(tmp, &dev->mode_config.crtc_list, head) { if (tmp == crtc) break; crtc_mask <<= 1; } if (encoder->possible_crtcs & crtc_mask) return true; return false; } /** * intel_modeset_update_staged_output_state * * Updates the staged output configuration state, e.g. after we've read out the * current hw state. */ static void intel_modeset_update_staged_output_state(struct drm_device *dev) { struct intel_encoder *encoder; struct intel_connector *connector; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { connector->new_encoder = to_intel_encoder(connector->base.encoder); } list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { encoder->new_crtc = to_intel_crtc(encoder->base.crtc); } } /** * intel_modeset_commit_output_state * * This function copies the stage display pipe configuration to the real one. */ static void intel_modeset_commit_output_state(struct drm_device *dev) { struct intel_encoder *encoder; struct intel_connector *connector; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { connector->base.encoder = &connector->new_encoder->base; } list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { encoder->base.crtc = &encoder->new_crtc->base; } } static int intel_modeset_adjusted_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode **res) { struct drm_device *dev = crtc->dev; struct drm_display_mode *adjusted_mode; struct drm_encoder_helper_funcs *encoder_funcs; struct intel_encoder *encoder; adjusted_mode = drm_mode_duplicate(dev, mode); if (!adjusted_mode) return -ENOMEM; /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { if (&encoder->new_crtc->base != crtc) continue; encoder_funcs = encoder->base.helper_private; if (!(encoder_funcs->mode_fixup(&encoder->base, mode, adjusted_mode))) { DRM_DEBUG_KMS("Encoder fixup failed\n"); goto fail; } } if (!(intel_crtc_mode_fixup(crtc, mode, adjusted_mode))) { DRM_DEBUG_KMS("CRTC fixup failed\n"); goto fail; } DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id); *res = adjusted_mode; return 0; fail: drm_mode_destroy(dev, adjusted_mode); return -EINVAL; } /* Computes which crtcs are affected and sets the relevant bits in the mask. For * simplicity we use the crtc's pipe number (because it's easier to obtain). */ static void intel_modeset_affected_pipes(struct drm_crtc *crtc, unsigned *modeset_pipes, unsigned *prepare_pipes, unsigned *disable_pipes) { struct intel_crtc *intel_crtc; struct drm_device *dev = crtc->dev; struct intel_encoder *encoder; struct intel_connector *connector; struct drm_crtc *tmp_crtc; *disable_pipes = *modeset_pipes = *prepare_pipes = 0; /* Check which crtcs have changed outputs connected to them, these need * to be part of the prepare_pipes mask. We don't (yet) support global * modeset across multiple crtcs, so modeset_pipes will only have one * bit set at most. */ list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->base.encoder == &connector->new_encoder->base) continue; if (connector->base.encoder) { tmp_crtc = connector->base.encoder->crtc; *prepare_pipes |= 1 << to_intel_crtc(tmp_crtc)->pipe; } if (connector->new_encoder) *prepare_pipes |= 1 << connector->new_encoder->new_crtc->pipe; } list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { if (encoder->base.crtc == &encoder->new_crtc->base) continue; if (encoder->base.crtc) { tmp_crtc = encoder->base.crtc; *prepare_pipes |= 1 << to_intel_crtc(tmp_crtc)->pipe; } if (encoder->new_crtc) *prepare_pipes |= 1 << encoder->new_crtc->pipe; } /* Check for any pipes that will be fully disabled ... */ list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) { bool used = false; /* Don't try to disable disabled crtcs. */ if (!intel_crtc->base.enabled) continue; list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { if (encoder->new_crtc == intel_crtc) used = true; } if (!used) *disable_pipes |= 1 << intel_crtc->pipe; } /* set_mode is also used to update properties on life display pipes. */ intel_crtc = to_intel_crtc(crtc); if (crtc->enabled) *prepare_pipes |= 1 << intel_crtc->pipe; /* * For simplicity do a full modeset on any pipe where the output routing * changed. We could be more clever, but that would require us to be * more careful with calling the relevant encoder->mode_set functions. */ if (*prepare_pipes) *modeset_pipes = *prepare_pipes; /* ... and mask these out. */ *modeset_pipes &= ~(*disable_pipes); *prepare_pipes &= ~(*disable_pipes); /* * HACK: We don't (yet) fully support global modesets. intel_set_config * obies this rule, but the modeset restore mode of * intel_modeset_setup_hw_state does not. */ *modeset_pipes &= 1 << intel_crtc->pipe; *prepare_pipes &= 1 << intel_crtc->pipe; } static bool intel_crtc_in_use(struct drm_crtc *crtc) { struct drm_encoder *encoder; struct drm_device *dev = crtc->dev; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) if (encoder->crtc == crtc) return true; return false; } static void intel_modeset_update_state(struct drm_device *dev, unsigned prepare_pipes) { struct intel_encoder *intel_encoder; struct intel_crtc *intel_crtc; struct drm_connector *connector; list_for_each_entry(intel_encoder, &dev->mode_config.encoder_list, base.head) { if (!intel_encoder->base.crtc) continue; intel_crtc = to_intel_crtc(intel_encoder->base.crtc); if (prepare_pipes & (1 << intel_crtc->pipe)) intel_encoder->connectors_active = false; } intel_modeset_commit_output_state(dev); /* Update computed state. */ list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) { intel_crtc->base.enabled = intel_crtc_in_use(&intel_crtc->base); } list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (!connector->encoder || !connector->encoder->crtc) continue; intel_crtc = to_intel_crtc(connector->encoder->crtc); if (prepare_pipes & (1 << intel_crtc->pipe)) { struct drm_property *dpms_property = dev->mode_config.dpms_property; connector->dpms = DRM_MODE_DPMS_ON; drm_object_property_set_value(&connector->base, dpms_property, DRM_MODE_DPMS_ON); intel_encoder = to_intel_encoder(connector->encoder); intel_encoder->connectors_active = true; } } } #define for_each_intel_crtc_masked(dev, mask, intel_crtc) \ list_for_each_entry((intel_crtc), \ &(dev)->mode_config.crtc_list, \ base.head) \ if (mask & (1 <<(intel_crtc)->pipe)) \ void intel_modeset_check_state(struct drm_device *dev) { struct intel_crtc *crtc; struct intel_encoder *encoder; struct intel_connector *connector; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { /* This also checks the encoder/connector hw state with the * ->get_hw_state callbacks. */ intel_connector_check_state(connector); WARN(&connector->new_encoder->base != connector->base.encoder, "connector's staged encoder doesn't match current encoder\n"); } list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { bool enabled = false; bool active = false; enum pipe pipe, tracked_pipe; DRM_DEBUG_KMS("[ENCODER:%d:%s]\n", encoder->base.base.id, drm_get_encoder_name(&encoder->base)); WARN(&encoder->new_crtc->base != encoder->base.crtc, "encoder's stage crtc doesn't match current crtc\n"); WARN(encoder->connectors_active && !encoder->base.crtc, "encoder's active_connectors set, but no crtc\n"); list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->base.encoder != &encoder->base) continue; enabled = true; if (connector->base.dpms != DRM_MODE_DPMS_OFF) active = true; } WARN(!!encoder->base.crtc != enabled, "encoder's enabled state mismatch " "(expected %i, found %i)\n", !!encoder->base.crtc, enabled); WARN(active && !encoder->base.crtc, "active encoder with no crtc\n"); WARN(encoder->connectors_active != active, "encoder's computed active state doesn't match tracked active state " "(expected %i, found %i)\n", active, encoder->connectors_active); active = encoder->get_hw_state(encoder, &pipe); WARN(active != encoder->connectors_active, "encoder's hw state doesn't match sw tracking " "(expected %i, found %i)\n", encoder->connectors_active, active); if (!encoder->base.crtc) continue; tracked_pipe = to_intel_crtc(encoder->base.crtc)->pipe; WARN(active && pipe != tracked_pipe, "active encoder's pipe doesn't match" "(expected %i, found %i)\n", tracked_pipe, pipe); } list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { bool enabled = false; bool active = false; DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.base.id); WARN(crtc->active && !crtc->base.enabled, "active crtc, but not enabled in sw tracking\n"); list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { if (encoder->base.crtc != &crtc->base) continue; enabled = true; if (encoder->connectors_active) active = true; } WARN(active != crtc->active, "crtc's computed active state doesn't match tracked active state " "(expected %i, found %i)\n", active, crtc->active); WARN(enabled != crtc->base.enabled, "crtc's computed enabled state doesn't match tracked enabled state " "(expected %i, found %i)\n", enabled, crtc->base.enabled); assert_pipe(dev->dev_private, crtc->pipe, crtc->active); } } bool intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode, int x, int y, struct drm_framebuffer *fb) { struct drm_device *dev = crtc->dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_display_mode *adjusted_mode, saved_mode, saved_hwmode; struct intel_crtc *intel_crtc; unsigned disable_pipes, prepare_pipes, modeset_pipes; bool ret = true; intel_modeset_affected_pipes(crtc, &modeset_pipes, &prepare_pipes, &disable_pipes); DRM_DEBUG_KMS("set mode pipe masks: modeset: %x, prepare: %x, disable: %x\n", modeset_pipes, prepare_pipes, disable_pipes); for_each_intel_crtc_masked(dev, disable_pipes, intel_crtc) intel_crtc_disable(&intel_crtc->base); saved_hwmode = crtc->hwmode; saved_mode = crtc->mode; /* Hack: Because we don't (yet) support global modeset on multiple * crtcs, we don't keep track of the new mode for more than one crtc. * Hence simply check whether any bit is set in modeset_pipes in all the * pieces of code that are not yet converted to deal with mutliple crtcs * changing their mode at the same time. */ adjusted_mode = NULL; if (modeset_pipes) { int err = intel_modeset_adjusted_mode(crtc, mode, &adjusted_mode); if (err) { return false; } } for_each_intel_crtc_masked(dev, prepare_pipes, intel_crtc) { if (intel_crtc->base.enabled) dev_priv->display.crtc_disable(&intel_crtc->base); } /* crtc->mode is already used by the ->mode_set callbacks, hence we need * to set it here already despite that we pass it down the callchain. */ if (modeset_pipes) crtc->mode = *mode; /* Only after disabling all output pipelines that will be changed can we * update the output configuration. */ intel_modeset_update_state(dev, prepare_pipes); if (dev_priv->display.modeset_global_resources) dev_priv->display.modeset_global_resources(dev); /* Set up the DPLL and any encoders state that needs to adjust or depend * on the DPLL. */ for_each_intel_crtc_masked(dev, modeset_pipes, intel_crtc) { ret = !intel_crtc_mode_set(&intel_crtc->base, mode, adjusted_mode, x, y, fb); if (!ret) goto done; } /* Now enable the clocks, plane, pipe, and connectors that we set up. */ for_each_intel_crtc_masked(dev, prepare_pipes, intel_crtc) dev_priv->display.crtc_enable(&intel_crtc->base); if (modeset_pipes) { /* Store real post-adjustment hardware mode. */ crtc->hwmode = *adjusted_mode; /* Calculate and store various constants which * are later needed by vblank and swap-completion * timestamping. They are derived from true hwmode. */ drm_calc_timestamping_constants(crtc); } /* FIXME: add subpixel order */ done: drm_mode_destroy(dev, adjusted_mode); if (!ret && crtc->enabled) { crtc->hwmode = saved_hwmode; crtc->mode = saved_mode; } else { intel_modeset_check_state(dev); } return ret; } #undef for_each_intel_crtc_masked static void intel_set_config_free(struct intel_set_config *config) { if (!config) return; free(config->save_connector_encoders, DRM_MEM_KMS); free(config->save_encoder_crtcs, DRM_MEM_KMS); free(config, DRM_MEM_KMS); } static int intel_set_config_save_state(struct drm_device *dev, struct intel_set_config *config) { struct drm_encoder *encoder; struct drm_connector *connector; int count; config->save_encoder_crtcs = malloc(dev->mode_config.num_encoder * sizeof(struct drm_crtc *), DRM_MEM_KMS, M_NOWAIT | M_ZERO); if (!config->save_encoder_crtcs) return -ENOMEM; config->save_connector_encoders = malloc(dev->mode_config.num_connector * sizeof(struct drm_encoder *), DRM_MEM_KMS, M_NOWAIT | M_ZERO); if (!config->save_connector_encoders) return -ENOMEM; /* Copy data. Note that driver private data is not affected. * Should anything bad happen only the expected state is * restored, not the drivers personal bookkeeping. */ count = 0; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { config->save_encoder_crtcs[count++] = encoder->crtc; } count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { config->save_connector_encoders[count++] = connector->encoder; } return 0; } static void intel_set_config_restore_state(struct drm_device *dev, struct intel_set_config *config) { struct intel_encoder *encoder; struct intel_connector *connector; int count; count = 0; list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { encoder->new_crtc = to_intel_crtc(config->save_encoder_crtcs[count++]); } count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { connector->new_encoder = to_intel_encoder(config->save_connector_encoders[count++]); } } static void intel_set_config_compute_mode_changes(struct drm_mode_set *set, struct intel_set_config *config) { /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ if (set->crtc->fb != set->fb) { /* If we have no fb then treat it as a full mode set */ if (set->crtc->fb == NULL) { DRM_DEBUG_KMS("crtc has no fb, full mode set\n"); config->mode_changed = true; } else if (set->fb == NULL) { config->mode_changed = true; } else if (set->fb->depth != set->crtc->fb->depth) { config->mode_changed = true; } else if (set->fb->bits_per_pixel != set->crtc->fb->bits_per_pixel) { config->mode_changed = true; } else config->fb_changed = true; } if (set->fb && (set->x != set->crtc->x || set->y != set->crtc->y)) config->fb_changed = true; if (set->mode && !drm_mode_equal(set->mode, &set->crtc->mode)) { DRM_DEBUG_KMS("modes are different, full mode set\n"); drm_mode_debug_printmodeline(&set->crtc->mode); drm_mode_debug_printmodeline(set->mode); config->mode_changed = true; } } static int intel_modeset_stage_output_state(struct drm_device *dev, struct drm_mode_set *set, struct intel_set_config *config) { struct drm_crtc *new_crtc; struct intel_connector *connector; struct intel_encoder *encoder; int count, ro; /* The upper layers ensure that we either disabl a crtc or have a list * of connectors. For paranoia, double-check this. */ WARN_ON(!set->fb && (set->num_connectors != 0)); WARN_ON(set->fb && (set->num_connectors == 0)); count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { /* Otherwise traverse passed in connector list and get encoders * for them. */ for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == &connector->base) { connector->new_encoder = connector->encoder; break; } } /* If we disable the crtc, disable all its connectors. Also, if * the connector is on the changing crtc but not on the new * connector list, disable it. */ if ((!set->fb || ro == set->num_connectors) && connector->base.encoder && connector->base.encoder->crtc == set->crtc) { connector->new_encoder = NULL; DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [NOCRTC]\n", connector->base.base.id, drm_get_connector_name(&connector->base)); } if (&connector->new_encoder->base != connector->base.encoder) { DRM_DEBUG_KMS("encoder changed, full mode switch\n"); config->mode_changed = true; } } /* connector->new_encoder is now updated for all connectors. */ /* Update crtc of enabled connectors. */ count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (!connector->new_encoder) continue; new_crtc = connector->new_encoder->base.crtc; for (ro = 0; ro < set->num_connectors; ro++) { if (set->connectors[ro] == &connector->base) new_crtc = set->crtc; } /* Make sure the new CRTC will work with the encoder */ if (!intel_encoder_crtc_ok(&connector->new_encoder->base, new_crtc)) { return -EINVAL; } connector->encoder->new_crtc = to_intel_crtc(new_crtc); DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d]\n", connector->base.base.id, drm_get_connector_name(&connector->base), new_crtc->base.id); } /* Check for any encoders that needs to be disabled. */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->new_encoder == encoder) { WARN_ON(!connector->new_encoder->new_crtc); goto next_encoder; } } encoder->new_crtc = NULL; next_encoder: /* Only now check for crtc changes so we don't miss encoders * that will be disabled. */ if (&encoder->new_crtc->base != encoder->base.crtc) { DRM_DEBUG_KMS("crtc changed, full mode switch\n"); config->mode_changed = true; } } /* Now we've also updated encoder->new_crtc for all encoders. */ return 0; } static int intel_crtc_set_config(struct drm_mode_set *set) { struct drm_device *dev; struct drm_mode_set save_set; struct intel_set_config *config; int ret; BUG_ON(!set); BUG_ON(!set->crtc); BUG_ON(!set->crtc->helper_private); if (!set->mode) set->fb = NULL; /* The fb helper likes to play gross jokes with ->mode_set_config. * Unfortunately the crtc helper doesn't do much at all for this case, * so we have to cope with this madness until the fb helper is fixed up. */ if (set->fb && set->num_connectors == 0) return 0; if (set->fb) { DRM_DEBUG_KMS("[CRTC:%d] [FB:%d] #connectors=%d (x y) (%i %i)\n", set->crtc->base.id, set->fb->base.id, (int)set->num_connectors, set->x, set->y); } else { DRM_DEBUG_KMS("[CRTC:%d] [NOFB]\n", set->crtc->base.id); } dev = set->crtc->dev; ret = -ENOMEM; config = malloc(sizeof(*config), DRM_MEM_KMS, M_NOWAIT | M_ZERO); if (!config) goto out_config; ret = intel_set_config_save_state(dev, config); if (ret) goto out_config; save_set.crtc = set->crtc; save_set.mode = &set->crtc->mode; save_set.x = set->crtc->x; save_set.y = set->crtc->y; save_set.fb = set->crtc->fb; /* Compute whether we need a full modeset, only an fb base update or no * change at all. In the future we might also check whether only the * mode changed, e.g. for LVDS where we only change the panel fitter in * such cases. */ intel_set_config_compute_mode_changes(set, config); ret = intel_modeset_stage_output_state(dev, set, config); if (ret) goto fail; if (config->mode_changed) { if (set->mode) { DRM_DEBUG_KMS("attempting to set mode from" " userspace\n"); drm_mode_debug_printmodeline(set->mode); } if (!intel_set_mode(set->crtc, set->mode, set->x, set->y, set->fb)) { DRM_ERROR("failed to set mode on [CRTC:%d]\n", set->crtc->base.id); ret = -EINVAL; goto fail; } } else if (config->fb_changed) { ret = intel_pipe_set_base(set->crtc, set->x, set->y, set->fb); } intel_set_config_free(config); return 0; fail: intel_set_config_restore_state(dev, config); /* Try to restore the config */ if (config->mode_changed && !intel_set_mode(save_set.crtc, save_set.mode, save_set.x, save_set.y, save_set.fb)) DRM_ERROR("failed to restore config after modeset failure\n"); out_config: intel_set_config_free(config); return ret; } static const struct drm_crtc_funcs intel_crtc_funcs = { .cursor_set = intel_crtc_cursor_set, .cursor_move = intel_crtc_cursor_move, .gamma_set = intel_crtc_gamma_set, .set_config = intel_crtc_set_config, .destroy = intel_crtc_destroy, .page_flip = intel_crtc_page_flip, }; static void intel_cpu_pll_init(struct drm_device *dev) { if (IS_HASWELL(dev)) intel_ddi_pll_init(dev); } static void intel_pch_pll_init(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int i; if (dev_priv->num_pch_pll == 0) { DRM_DEBUG_KMS("No PCH PLLs on this hardware, skipping initialisation\n"); return; } for (i = 0; i < dev_priv->num_pch_pll; i++) { dev_priv->pch_plls[i].pll_reg = _PCH_DPLL(i); dev_priv->pch_plls[i].fp0_reg = _PCH_FP0(i); dev_priv->pch_plls[i].fp1_reg = _PCH_FP1(i); } } static void intel_crtc_init(struct drm_device *dev, int pipe) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc; int i; intel_crtc = malloc(sizeof(struct intel_crtc) + (INTELFB_CONN_LIMIT * sizeof(struct drm_connector *)), DRM_MEM_KMS, M_WAITOK | M_ZERO); if (intel_crtc == NULL) return; drm_crtc_init(dev, &intel_crtc->base, &intel_crtc_funcs); drm_mode_crtc_set_gamma_size(&intel_crtc->base, 256); for (i = 0; i < 256; i++) { intel_crtc->lut_r[i] = i; intel_crtc->lut_g[i] = i; intel_crtc->lut_b[i] = i; } /* Swap pipes & planes for FBC on pre-965 */ intel_crtc->pipe = pipe; intel_crtc->plane = pipe; intel_crtc->cpu_transcoder = pipe; if (IS_MOBILE(dev) && IS_GEN3(dev)) { DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); intel_crtc->plane = !pipe; } BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base; dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base; intel_crtc->bpp = 24; /* default for pre-Ironlake */ drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); } int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_get_pipe_from_crtc_id *pipe_from_crtc_id = data; struct drm_mode_object *drmmode_obj; struct intel_crtc *crtc; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -ENODEV; drmmode_obj = drm_mode_object_find(dev, pipe_from_crtc_id->crtc_id, DRM_MODE_OBJECT_CRTC); if (!drmmode_obj) { DRM_ERROR("no such CRTC id\n"); return -EINVAL; } crtc = to_intel_crtc(obj_to_crtc(drmmode_obj)); pipe_from_crtc_id->pipe = crtc->pipe; return 0; } static int intel_encoder_clones(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct intel_encoder *source_encoder; int index_mask = 0; int entry = 0; list_for_each_entry(source_encoder, &dev->mode_config.encoder_list, base.head) { if (encoder == source_encoder) index_mask |= (1 << entry); /* Intel hw has only one MUX where enocoders could be cloned. */ if (encoder->cloneable && source_encoder->cloneable) index_mask |= (1 << entry); entry++; } return index_mask; } static bool has_edp_a(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; if (!IS_MOBILE(dev)) return false; if ((I915_READ(DP_A) & DP_DETECTED) == 0) return false; if (IS_GEN5(dev) && (I915_READ(ILK_DISPLAY_CHICKEN_FUSES) & ILK_eDP_A_DISABLE)) return false; return true; } static void intel_setup_outputs(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_encoder *encoder; bool dpd_is_edp = false; bool has_lvds; has_lvds = intel_lvds_init(dev); if (!has_lvds && !HAS_PCH_SPLIT(dev)) { /* disable the panel fitter on everything but LVDS */ I915_WRITE(PFIT_CONTROL, 0); } if (!(IS_HASWELL(dev) && (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES))) intel_crt_init(dev); if (IS_HASWELL(dev)) { int found; /* Haswell uses DDI functions to detect digital outputs */ found = I915_READ(DDI_BUF_CTL_A) & DDI_INIT_DISPLAY_DETECTED; /* DDI A only supports eDP */ if (found) intel_ddi_init(dev, PORT_A); /* DDI B, C and D detection is indicated by the SFUSE_STRAP * register */ found = I915_READ(SFUSE_STRAP); if (found & SFUSE_STRAP_DDIB_DETECTED) intel_ddi_init(dev, PORT_B); if (found & SFUSE_STRAP_DDIC_DETECTED) intel_ddi_init(dev, PORT_C); if (found & SFUSE_STRAP_DDID_DETECTED) intel_ddi_init(dev, PORT_D); } else if (HAS_PCH_SPLIT(dev)) { int found; dpd_is_edp = intel_dpd_is_edp(dev); if (has_edp_a(dev)) intel_dp_init(dev, DP_A, PORT_A); if (I915_READ(HDMIB) & PORT_DETECTED) { /* PCH SDVOB multiplex with HDMIB */ found = intel_sdvo_init(dev, PCH_SDVOB, true); if (!found) intel_hdmi_init(dev, HDMIB, PORT_B); if (!found && (I915_READ(PCH_DP_B) & DP_DETECTED)) intel_dp_init(dev, PCH_DP_B, PORT_B); } if (I915_READ(HDMIC) & PORT_DETECTED) intel_hdmi_init(dev, HDMIC, PORT_C); if (!dpd_is_edp && I915_READ(HDMID) & PORT_DETECTED) intel_hdmi_init(dev, HDMID, PORT_D); if (I915_READ(PCH_DP_C) & DP_DETECTED) intel_dp_init(dev, PCH_DP_C, PORT_C); if (I915_READ(PCH_DP_D) & DP_DETECTED) intel_dp_init(dev, PCH_DP_D, PORT_D); } else if (IS_VALLEYVIEW(dev)) { int found; /* Check for built-in panel first. Shares lanes with HDMI on SDVOC */ if (I915_READ(DP_C) & DP_DETECTED) intel_dp_init(dev, DP_C, PORT_C); if (I915_READ(SDVOB) & PORT_DETECTED) { /* SDVOB multiplex with HDMIB */ found = intel_sdvo_init(dev, SDVOB, true); if (!found) intel_hdmi_init(dev, SDVOB, PORT_B); if (!found && (I915_READ(DP_B) & DP_DETECTED)) intel_dp_init(dev, DP_B, PORT_B); } if (I915_READ(SDVOC) & PORT_DETECTED) intel_hdmi_init(dev, SDVOC, PORT_C); } else if (SUPPORTS_DIGITAL_OUTPUTS(dev)) { bool found = false; if (I915_READ(SDVOB) & SDVO_DETECTED) { DRM_DEBUG_KMS("probing SDVOB\n"); found = intel_sdvo_init(dev, SDVOB, true); if (!found && SUPPORTS_INTEGRATED_HDMI(dev)) { DRM_DEBUG_KMS("probing HDMI on SDVOB\n"); intel_hdmi_init(dev, SDVOB, PORT_B); } if (!found && SUPPORTS_INTEGRATED_DP(dev)) { DRM_DEBUG_KMS("probing DP_B\n"); intel_dp_init(dev, DP_B, PORT_B); } } /* Before G4X SDVOC doesn't have its own detect register */ if (I915_READ(SDVOB) & SDVO_DETECTED) { DRM_DEBUG_KMS("probing SDVOC\n"); found = intel_sdvo_init(dev, SDVOC, false); } if (!found && (I915_READ(SDVOC) & SDVO_DETECTED)) { if (SUPPORTS_INTEGRATED_HDMI(dev)) { DRM_DEBUG_KMS("probing HDMI on SDVOC\n"); intel_hdmi_init(dev, SDVOC, PORT_C); } if (SUPPORTS_INTEGRATED_DP(dev)) { DRM_DEBUG_KMS("probing DP_C\n"); intel_dp_init(dev, DP_C, PORT_C); } } if (SUPPORTS_INTEGRATED_DP(dev) && (I915_READ(DP_D) & DP_DETECTED)) { DRM_DEBUG_KMS("probing DP_D\n"); intel_dp_init(dev, DP_D, PORT_D); } } else if (IS_GEN2(dev)) intel_dvo_init(dev); if (SUPPORTS_TV(dev)) intel_tv_init(dev); list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { encoder->base.possible_crtcs = encoder->crtc_mask; encoder->base.possible_clones = intel_encoder_clones(encoder); } intel_init_pch_refclk(dev); drm_helper_move_panel_connectors_to_head(dev); } static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); drm_gem_object_unreference_unlocked(&intel_fb->obj->base); free(intel_fb, DRM_MEM_KMS); } static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, struct drm_file *file, unsigned int *handle) { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct drm_i915_gem_object *obj = intel_fb->obj; return drm_gem_handle_create(file, &obj->base, handle); } static const struct drm_framebuffer_funcs intel_fb_funcs = { .destroy = intel_user_framebuffer_destroy, .create_handle = intel_user_framebuffer_create_handle, }; int intel_framebuffer_init(struct drm_device *dev, struct intel_framebuffer *intel_fb, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_i915_gem_object *obj) { int ret; if (obj->tiling_mode == I915_TILING_Y) { DRM_DEBUG("hardware does not support tiling Y\n"); return -EINVAL; } if (mode_cmd->pitches[0] & 63) { DRM_DEBUG("pitch (%d) must be at least 64 byte aligned\n", mode_cmd->pitches[0]); return -EINVAL; } /* FIXME <= Gen4 stride limits are bit unclear */ if (mode_cmd->pitches[0] > 32768) { DRM_DEBUG("pitch (%d) must be at less than 32768\n", mode_cmd->pitches[0]); return -EINVAL; } if (obj->tiling_mode != I915_TILING_NONE && mode_cmd->pitches[0] != obj->stride) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", mode_cmd->pitches[0], obj->stride); return -EINVAL; } /* Reject formats not supported by any plane early. */ switch (mode_cmd->pixel_format) { case DRM_FORMAT_C8: case DRM_FORMAT_RGB565: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: break; case DRM_FORMAT_XRGB1555: case DRM_FORMAT_ARGB1555: if (INTEL_INFO(dev)->gen > 3) { DRM_DEBUG("invalid format: 0x%08x\n", mode_cmd->pixel_format); return -EINVAL; } break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_ARGB2101010: case DRM_FORMAT_XBGR2101010: case DRM_FORMAT_ABGR2101010: if (INTEL_INFO(dev)->gen < 4) { DRM_DEBUG("invalid format: 0x%08x\n", mode_cmd->pixel_format); return -EINVAL; } break; case DRM_FORMAT_YUYV: case DRM_FORMAT_UYVY: case DRM_FORMAT_YVYU: case DRM_FORMAT_VYUY: if (INTEL_INFO(dev)->gen < 5) { DRM_DEBUG("invalid format: 0x%08x\n", mode_cmd->pixel_format); return -EINVAL; } break; default: DRM_DEBUG("unsupported pixel format 0x%08x\n", mode_cmd->pixel_format); return -EINVAL; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) return -EINVAL; ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); return ret; } drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd); intel_fb->obj = obj; return 0; } static int intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_framebuffer **res) { struct drm_i915_gem_object *obj; obj = to_intel_bo(drm_gem_object_lookup(dev, filp, mode_cmd->handles[0])); if (&obj->base == NULL) return -ENOENT; return intel_framebuffer_create(dev, mode_cmd, obj, res); } static const struct drm_mode_config_funcs intel_mode_funcs = { .fb_create = intel_user_framebuffer_create, .output_poll_changed = intel_fb_output_poll_changed, }; /* Set up chip specific display functions */ static void intel_init_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; /* We always want a DPMS function */ if (IS_HASWELL(dev)) { dev_priv->display.crtc_mode_set = haswell_crtc_mode_set; dev_priv->display.crtc_enable = haswell_crtc_enable; dev_priv->display.crtc_disable = haswell_crtc_disable; dev_priv->display.off = haswell_crtc_off; dev_priv->display.update_plane = ironlake_update_plane; } else if (HAS_PCH_SPLIT(dev)) { dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set; dev_priv->display.crtc_enable = ironlake_crtc_enable; dev_priv->display.crtc_disable = ironlake_crtc_disable; dev_priv->display.off = ironlake_crtc_off; dev_priv->display.update_plane = ironlake_update_plane; } else { dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set; dev_priv->display.crtc_enable = i9xx_crtc_enable; dev_priv->display.crtc_disable = i9xx_crtc_disable; dev_priv->display.off = i9xx_crtc_off; dev_priv->display.update_plane = i9xx_update_plane; } /* Returns the core display clock speed */ if (IS_VALLEYVIEW(dev)) dev_priv->display.get_display_clock_speed = valleyview_get_display_clock_speed; else if (IS_I945G(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev))) dev_priv->display.get_display_clock_speed = i945_get_display_clock_speed; else if (IS_I915G(dev)) dev_priv->display.get_display_clock_speed = i915_get_display_clock_speed; else if (IS_I945GM(dev) || IS_845G(dev) || IS_PINEVIEW_M(dev)) dev_priv->display.get_display_clock_speed = i9xx_misc_get_display_clock_speed; else if (IS_I915GM(dev)) dev_priv->display.get_display_clock_speed = i915gm_get_display_clock_speed; else if (IS_I865G(dev)) dev_priv->display.get_display_clock_speed = i865_get_display_clock_speed; else if (IS_I85X(dev)) dev_priv->display.get_display_clock_speed = i855_get_display_clock_speed; else /* 852, 830 */ dev_priv->display.get_display_clock_speed = i830_get_display_clock_speed; if (HAS_PCH_SPLIT(dev)) { if (IS_GEN5(dev)) { dev_priv->display.fdi_link_train = ironlake_fdi_link_train; dev_priv->display.write_eld = ironlake_write_eld; } else if (IS_GEN6(dev)) { dev_priv->display.fdi_link_train = gen6_fdi_link_train; dev_priv->display.write_eld = ironlake_write_eld; } else if (IS_IVYBRIDGE(dev)) { /* FIXME: detect B0+ stepping and use auto training */ dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; dev_priv->display.write_eld = ironlake_write_eld; dev_priv->display.modeset_global_resources = ivb_modeset_global_resources; } else if (IS_HASWELL(dev)) { dev_priv->display.fdi_link_train = hsw_fdi_link_train; dev_priv->display.write_eld = haswell_write_eld; } else dev_priv->display.update_wm = NULL; } else if (IS_G4X(dev)) { dev_priv->display.write_eld = g4x_write_eld; } /* Default just returns -ENODEV to indicate unsupported */ dev_priv->display.queue_flip = intel_default_queue_flip; switch (INTEL_INFO(dev)->gen) { case 2: dev_priv->display.queue_flip = intel_gen2_queue_flip; break; case 3: dev_priv->display.queue_flip = intel_gen3_queue_flip; break; case 4: case 5: dev_priv->display.queue_flip = intel_gen4_queue_flip; break; case 6: dev_priv->display.queue_flip = intel_gen6_queue_flip; break; case 7: dev_priv->display.queue_flip = intel_gen7_queue_flip; break; } } /* * Some BIOSes insist on assuming the GPU's pipe A is enabled at suspend, * resume, or other times. This quirk makes sure that's the case for * affected systems. */ static void quirk_pipea_force(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; dev_priv->quirks |= QUIRK_PIPEA_FORCE; DRM_INFO("applying pipe a force quirk\n"); } /* * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason */ static void quirk_ssc_force_disable(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; dev_priv->quirks |= QUIRK_LVDS_SSC_DISABLE; DRM_INFO("applying lvds SSC disable quirk\n"); } /* * A machine (e.g. Acer Aspire 5734Z) may need to invert the panel backlight * brightness value */ static void quirk_invert_brightness(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; dev_priv->quirks |= QUIRK_INVERT_BRIGHTNESS; DRM_INFO("applying inverted panel brightness quirk\n"); } struct intel_quirk { int device; int subsystem_vendor; int subsystem_device; void (*hook)(struct drm_device *dev); }; /* For systems that don't have a meaningful PCI subdevice/subvendor ID */ struct intel_dmi_quirk { void (*hook)(struct drm_device *dev); const struct dmi_system_id (*dmi_id_list)[]; }; static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) { DRM_INFO("Backlight polarity reversed on %s\n", id->ident); return 1; } static const struct intel_dmi_quirk intel_dmi_quirks[] = { { - .dmi_id_list = &(const struct dmi_system_id[]) { + .dmi_id_list = +#if !defined(__clang__) && !__GNUC_PREREQ__(4, 3) + /* gcc 4.2 needs an additional cast, to avoid a bogus + * "initialization from incompatible pointer type" warning. + * see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36432 + */ + (const struct dmi_system_id (*)[]) +#endif + &(const struct dmi_system_id[]) { { .callback = intel_dmi_reverse_brightness, .ident = "NCR Corporation", .matches = {DMI_MATCH(DMI_SYS_VENDOR, "NCR Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, ""), }, }, { } /* terminating entry */ }, .hook = quirk_invert_brightness, }, }; #define PCI_ANY_ID (~0u) static struct intel_quirk intel_quirks[] = { /* HP Mini needs pipe A force quirk (LP: #322104) */ { 0x27ae, 0x103c, 0x361a, quirk_pipea_force }, /* Toshiba Protege R-205, S-209 needs pipe A force quirk */ { 0x2592, 0x1179, 0x0001, quirk_pipea_force }, /* ThinkPad T60 needs pipe A force quirk (bug #16494) */ { 0x2782, 0x17aa, 0x201a, quirk_pipea_force }, /* 830/845 need to leave pipe A & dpll A up */ { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, { 0x3577, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, /* Lenovo U160 cannot use SSC on LVDS */ { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, /* Sony Vaio Y cannot use SSC on LVDS */ { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable }, /* Acer Aspire 5734Z must invert backlight brightness */ { 0x2a42, 0x1025, 0x0459, quirk_invert_brightness }, /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, /* Acer/eMachines G725 */ { 0x2a42, 0x1025, 0x0210, quirk_invert_brightness }, /* Acer/eMachines e725 */ { 0x2a42, 0x1025, 0x0212, quirk_invert_brightness }, /* Acer/Packard Bell NCL20 */ { 0x2a42, 0x1025, 0x034b, quirk_invert_brightness }, }; static void intel_init_quirks(struct drm_device *dev) { int i; for (i = 0; i < ARRAY_SIZE(intel_quirks); i++) { struct intel_quirk *q = &intel_quirks[i]; if (pci_get_device(dev->dev) == q->device && (pci_get_subvendor(dev->dev) == q->subsystem_vendor || q->subsystem_vendor == PCI_ANY_ID) && (pci_get_subdevice(dev->dev) == q->subsystem_device || q->subsystem_device == PCI_ANY_ID)) q->hook(dev); } for (i = 0; i < ARRAY_SIZE(intel_dmi_quirks); i++) { if (dmi_check_system(*intel_dmi_quirks[i].dmi_id_list) != 0) intel_dmi_quirks[i].hook(dev); } } /* Disable the VGA plane that we never use */ static void i915_disable_vga(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u8 sr1; u32 vga_reg; if (HAS_PCH_SPLIT(dev)) vga_reg = CPU_VGACNTRL; else vga_reg = VGACNTRL; #ifdef FREEBSD_WIP vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); #endif /* FREEBSD_WIP */ outb(VGA_SR_INDEX, SR01); sr1 = inb(VGA_SR_DATA); outb(VGA_SR_DATA, sr1 | 1<<5); #ifdef FREEBSD_WIP vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); #endif /* FREEBSD_WIP */ udelay(300); I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); } void intel_modeset_init_hw(struct drm_device *dev) { /* We attempt to init the necessary power wells early in the initialization * time, so the subsystems that expect power to be enabled can work. */ intel_init_power_wells(dev); intel_prepare_ddi(dev); intel_init_clock_gating(dev); DRM_LOCK(dev); intel_enable_gt_powersave(dev); DRM_UNLOCK(dev); } void intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int i, ret; drm_mode_config_init(dev); dev->mode_config.min_width = 0; dev->mode_config.min_height = 0; dev->mode_config.preferred_depth = 24; dev->mode_config.prefer_shadow = 1; dev->mode_config.funcs = &intel_mode_funcs; intel_init_quirks(dev); intel_init_pm(dev); intel_init_display(dev); if (IS_GEN2(dev)) { dev->mode_config.max_width = 2048; dev->mode_config.max_height = 2048; } else if (IS_GEN3(dev)) { dev->mode_config.max_width = 4096; dev->mode_config.max_height = 4096; } else { dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; } dev->mode_config.fb_base = dev_priv->mm.gtt_base_addr; DRM_DEBUG_KMS("%d display pipe%s available.\n", dev_priv->num_pipe, dev_priv->num_pipe > 1 ? "s" : ""); for (i = 0; i < dev_priv->num_pipe; i++) { intel_crtc_init(dev, i); ret = intel_plane_init(dev, i); if (ret) DRM_DEBUG_KMS("plane %d init failed: %d\n", i, ret); } intel_cpu_pll_init(dev); intel_pch_pll_init(dev); /* Just disable it once at startup */ i915_disable_vga(dev); intel_setup_outputs(dev); } static void intel_connector_break_all_links(struct intel_connector *connector) { connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; connector->encoder->connectors_active = false; connector->encoder->base.crtc = NULL; } static void intel_enable_pipe_a(struct drm_device *dev) { struct intel_connector *connector; struct drm_connector *crt = NULL; struct intel_load_detect_pipe load_detect_temp; /* We can't just switch on the pipe A, we need to set things up with a * proper mode and output configuration. As a gross hack, enable pipe A * by enabling the load detect pipe once. */ list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->encoder->type == INTEL_OUTPUT_ANALOG) { crt = &connector->base; break; } } if (!crt) return; if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp)) intel_release_load_detect_pipe(crt, &load_detect_temp); } static bool intel_check_plane_mapping(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->base.dev->dev_private; u32 reg, val; if (dev_priv->num_pipe == 1) return true; reg = DSPCNTR(!crtc->plane); val = I915_READ(reg); if ((val & DISPLAY_PLANE_ENABLE) && (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe)) return false; return true; } static void intel_sanitize_crtc(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 reg; /* Clear any frame start delays used for debugging left by the BIOS */ reg = PIPECONF(crtc->cpu_transcoder); I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK); /* We need to sanitize the plane -> pipe mapping first because this will * disable the crtc (and hence change the state) if it is wrong. Note * that gen4+ has a fixed plane -> pipe mapping. */ if (INTEL_INFO(dev)->gen < 4 && !intel_check_plane_mapping(crtc)) { struct intel_connector *connector; bool plane; DRM_DEBUG_KMS("[CRTC:%d] wrong plane connection detected!\n", crtc->base.base.id); /* Pipe has the wrong plane attached and the plane is active. * Temporarily change the plane mapping and disable everything * ... */ plane = crtc->plane; crtc->plane = !plane; dev_priv->display.crtc_disable(&crtc->base); crtc->plane = plane; /* ... and break all links. */ list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->encoder->base.crtc != &crtc->base) continue; intel_connector_break_all_links(connector); } WARN_ON(crtc->active); crtc->base.enabled = false; } if (dev_priv->quirks & QUIRK_PIPEA_FORCE && crtc->pipe == PIPE_A && !crtc->active) { /* BIOS forgot to enable pipe A, this mostly happens after * resume. Force-enable the pipe to fix this, the update_dpms * call below we restore the pipe to the right state, but leave * the required bits on. */ intel_enable_pipe_a(dev); } /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ intel_crtc_update_dpms(&crtc->base); if (crtc->active != crtc->base.enabled) { struct intel_encoder *encoder; /* This can happen either due to bugs in the get_hw_state * functions or because the pipe is force-enabled due to the * pipe A quirk. */ DRM_DEBUG_KMS("[CRTC:%d] hw state adjusted, was %s, now %s\n", crtc->base.base.id, crtc->base.enabled ? "enabled" : "disabled", crtc->active ? "enabled" : "disabled"); crtc->base.enabled = crtc->active; /* Because we only establish the connector -> encoder -> * crtc links if something is active, this means the * crtc is now deactivated. Break the links. connector * -> encoder links are only establish when things are * actually up, hence no need to break them. */ WARN_ON(crtc->active); for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->connectors_active); encoder->base.crtc = NULL; } } } static void intel_sanitize_encoder(struct intel_encoder *encoder) { struct intel_connector *connector; struct drm_device *dev = encoder->base.dev; /* We need to check both for a crtc link (meaning that the * encoder is active and trying to read from a pipe) and the * pipe itself being active. */ bool has_active_crtc = encoder->base.crtc && to_intel_crtc(encoder->base.crtc)->active; if (encoder->connectors_active && !has_active_crtc) { DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n", encoder->base.base.id, drm_get_encoder_name(&encoder->base)); /* Connector is active, but has no active pipe. This is * fallout from our resume register restoring. Disable * the encoder manually again. */ if (encoder->base.crtc) { DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n", encoder->base.base.id, drm_get_encoder_name(&encoder->base)); encoder->disable(encoder); } /* Inconsistent output/port/pipe state happens presumably due to * a bug in one of the get_hw_state functions. Or someplace else * in our code, like the register restore mess on resume. Clamp * things to off as a safer default. */ list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->encoder != encoder) continue; intel_connector_break_all_links(connector); } } /* Enabled encoders without active connectors will be fixed in * the crtc fixup. */ } static void i915_redisable_vga(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 vga_reg; if (HAS_PCH_SPLIT(dev)) vga_reg = CPU_VGACNTRL; else vga_reg = VGACNTRL; if (I915_READ(vga_reg) != VGA_DISP_DISABLE) { DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); } } /* Scan out the current hw modeset state, sanitizes it and maps it into the drm * and i915 state tracking structures. */ void intel_modeset_setup_hw_state(struct drm_device *dev, bool force_restore) { struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe; u32 tmp; struct intel_crtc *crtc; struct intel_encoder *encoder; struct intel_connector *connector; if (IS_HASWELL(dev)) { tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)); if (tmp & TRANS_DDI_FUNC_ENABLE) { switch (tmp & TRANS_DDI_EDP_INPUT_MASK) { case TRANS_DDI_EDP_INPUT_A_ON: case TRANS_DDI_EDP_INPUT_A_ONOFF: pipe = PIPE_A; break; case TRANS_DDI_EDP_INPUT_B_ONOFF: pipe = PIPE_B; break; case TRANS_DDI_EDP_INPUT_C_ONOFF: pipe = PIPE_C; break; } crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); crtc->cpu_transcoder = TRANSCODER_EDP; DRM_DEBUG_KMS("Pipe %c using transcoder EDP\n", pipe_name(pipe)); } } for_each_pipe(pipe) { crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); tmp = I915_READ(PIPECONF(crtc->cpu_transcoder)); if (tmp & PIPECONF_ENABLE) crtc->active = true; else crtc->active = false; crtc->base.enabled = crtc->active; DRM_DEBUG_KMS("[CRTC:%d] hw state readout: %s\n", crtc->base.base.id, crtc->active ? "enabled" : "disabled"); } if (IS_HASWELL(dev)) intel_ddi_setup_hw_pll_state(dev); list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { encoder->base.crtc = dev_priv->pipe_to_crtc_mapping[pipe]; } else { encoder->base.crtc = NULL; } encoder->connectors_active = false; DRM_DEBUG_KMS("[ENCODER:%d:%s] hw state readout: %s, pipe=%i\n", encoder->base.base.id, drm_get_encoder_name(&encoder->base), encoder->base.crtc ? "enabled" : "disabled", pipe); } list_for_each_entry(connector, &dev->mode_config.connector_list, base.head) { if (connector->get_hw_state(connector)) { connector->base.dpms = DRM_MODE_DPMS_ON; connector->encoder->connectors_active = true; connector->base.encoder = &connector->encoder->base; } else { connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; } DRM_DEBUG_KMS("[CONNECTOR:%d:%s] hw state readout: %s\n", connector->base.base.id, drm_get_connector_name(&connector->base), connector->base.encoder ? "enabled" : "disabled"); } /* HW state is read out, now we need to sanitize this mess. */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { intel_sanitize_encoder(encoder); } for_each_pipe(pipe) { crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); intel_sanitize_crtc(crtc); } if (force_restore) { for_each_pipe(pipe) { crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); intel_set_mode(&crtc->base, &crtc->base.mode, crtc->base.x, crtc->base.y, crtc->base.fb); } i915_redisable_vga(dev); } else { intel_modeset_update_staged_output_state(dev); } intel_modeset_check_state(dev); drm_mode_config_reset(dev); } void intel_modeset_gem_init(struct drm_device *dev) { intel_modeset_init_hw(dev); intel_setup_overlay(dev); intel_modeset_setup_hw_state(dev, false); } void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; struct intel_crtc *intel_crtc; drm_kms_helper_poll_fini(dev); DRM_LOCK(dev); intel_unregister_dsm_handler(); list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { /* Skip inactive CRTCs */ if (!crtc->fb) continue; intel_crtc = to_intel_crtc(crtc); intel_increase_pllclock(crtc); } intel_disable_fbc(dev); intel_disable_gt_powersave(dev); ironlake_teardown_rc6(dev); if (IS_VALLEYVIEW(dev)) vlv_init_dpio(dev); DRM_UNLOCK(dev); /* Disable the irq before mode object teardown, for the irq might * enqueue unpin/hotplug work. */ drm_irq_uninstall(dev); if (taskqueue_cancel(dev_priv->wq, &dev_priv->hotplug_work, NULL)) taskqueue_drain(dev_priv->wq, &dev_priv->hotplug_work); if (taskqueue_cancel(dev_priv->wq, &dev_priv->rps.work, NULL)) taskqueue_drain(dev_priv->wq, &dev_priv->rps.work); /* flush any delayed tasks or pending work */ taskqueue_drain_all(dev_priv->wq); /* destroy backlight, if any, before the connectors */ intel_panel_destroy_backlight(dev); drm_mode_config_cleanup(dev); } /* * Return which encoder is currently attached for connector. */ struct drm_encoder *intel_best_encoder(struct drm_connector *connector) { return &intel_attached_encoder(connector)->base; } void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder) { connector->encoder = encoder; drm_mode_connector_attach_encoder(&connector->base, &encoder->base); } /* * set vga decode state - true == enable VGA decode */ int intel_modeset_vga_set_state(struct drm_device *dev, bool state) { struct drm_i915_private *dev_priv = dev->dev_private; u16 gmch_ctrl; pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &gmch_ctrl); if (state) gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; else gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; pci_write_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, gmch_ctrl); return 0; } //#ifdef CONFIG_DEBUG_FS #define seq_printf(m, fmt, ...) sbuf_printf((m), (fmt), ##__VA_ARGS__) struct intel_display_error_state { struct intel_cursor_error_state { u32 control; u32 position; u32 base; u32 size; } cursor[I915_MAX_PIPES]; struct intel_pipe_error_state { u32 conf; u32 source; u32 htotal; u32 hblank; u32 hsync; u32 vtotal; u32 vblank; u32 vsync; } pipe[I915_MAX_PIPES]; struct intel_plane_error_state { u32 control; u32 stride; u32 size; u32 pos; u32 addr; u32 surface; u32 tile_offset; } plane[I915_MAX_PIPES]; }; struct intel_display_error_state * intel_display_capture_error_state(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_display_error_state *error; enum transcoder cpu_transcoder; int i; error = malloc(sizeof(*error), DRM_MEM_KMS, M_NOWAIT); if (error == NULL) return NULL; for_each_pipe(i) { cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i); error->cursor[i].control = I915_READ(CURCNTR(i)); error->cursor[i].position = I915_READ(CURPOS(i)); error->cursor[i].base = I915_READ(CURBASE(i)); error->plane[i].control = I915_READ(DSPCNTR(i)); error->plane[i].stride = I915_READ(DSPSTRIDE(i)); error->plane[i].size = I915_READ(DSPSIZE(i)); error->plane[i].pos = I915_READ(DSPPOS(i)); error->plane[i].addr = I915_READ(DSPADDR(i)); if (INTEL_INFO(dev)->gen >= 4) { error->plane[i].surface = I915_READ(DSPSURF(i)); error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i)); } error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder)); error->pipe[i].source = I915_READ(PIPESRC(i)); error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder)); error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder)); error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder)); error->pipe[i].vtotal = I915_READ(VTOTAL(cpu_transcoder)); error->pipe[i].vblank = I915_READ(VBLANK(cpu_transcoder)); error->pipe[i].vsync = I915_READ(VSYNC(cpu_transcoder)); } return error; } void intel_display_print_error_state(struct sbuf *m, struct drm_device *dev, struct intel_display_error_state *error) { drm_i915_private_t *dev_priv = dev->dev_private; int i; seq_printf(m, "Num Pipes: %d\n", dev_priv->num_pipe); for_each_pipe(i) { seq_printf(m, "Pipe [%d]:\n", i); seq_printf(m, " CONF: %08x\n", error->pipe[i].conf); seq_printf(m, " SRC: %08x\n", error->pipe[i].source); seq_printf(m, " HTOTAL: %08x\n", error->pipe[i].htotal); seq_printf(m, " HBLANK: %08x\n", error->pipe[i].hblank); seq_printf(m, " HSYNC: %08x\n", error->pipe[i].hsync); seq_printf(m, " VTOTAL: %08x\n", error->pipe[i].vtotal); seq_printf(m, " VBLANK: %08x\n", error->pipe[i].vblank); seq_printf(m, " VSYNC: %08x\n", error->pipe[i].vsync); seq_printf(m, "Plane [%d]:\n", i); seq_printf(m, " CNTR: %08x\n", error->plane[i].control); seq_printf(m, " STRIDE: %08x\n", error->plane[i].stride); seq_printf(m, " SIZE: %08x\n", error->plane[i].size); seq_printf(m, " POS: %08x\n", error->plane[i].pos); seq_printf(m, " ADDR: %08x\n", error->plane[i].addr); if (INTEL_INFO(dev)->gen >= 4) { seq_printf(m, " SURF: %08x\n", error->plane[i].surface); seq_printf(m, " TILEOFF: %08x\n", error->plane[i].tile_offset); } seq_printf(m, "Cursor [%d]:\n", i); seq_printf(m, " CNTR: %08x\n", error->cursor[i].control); seq_printf(m, " POS: %08x\n", error->cursor[i].position); seq_printf(m, " BASE: %08x\n", error->cursor[i].base); } } //#endif Index: projects/pnfs-planb-server/sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- projects/pnfs-planb-server/sys/dev/hwpmc/hwpmc_mod.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/hwpmc/hwpmc_mod.c (revision 334967) @@ -1,5979 +1,5979 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needs to be after */ #include #include #include #include #include #include #include #include "hwpmc_soft.h" #ifdef NUMA #define NDOMAINS vm_ndomains #else #define NDOMAINS 1 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) #define free_domain(addr, type) free(addr, type) #endif /* * Types */ enum pmc_flags { PMC_FLAG_NONE = 0x00, /* do nothing */ PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ PMC_FLAG_NOWAIT = 0x04, /* do not wait for mallocs */ }; /* * The offset in sysent where the syscall is allocated. */ static int pmc_syscall_num = NO_SYSCALL; struct pmc_cpu **pmc_pcpu; /* per-cpu state */ pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] struct mtx_pool *pmc_mtxpool; static int *pmc_pmcdisp; /* PMC row dispositions */ #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) #define PMC_MARK_ROW_FREE(R) do { \ pmc_pmcdisp[(R)] = 0; \ } while (0) #define PMC_MARK_ROW_STANDALONE(R) do { \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ ("[pmc,%d] row disposition error", __LINE__)); \ } while (0) #define PMC_UNMARK_ROW_STANDALONE(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) #define PMC_MARK_ROW_THREAD(R) do { \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ } while (0) #define PMC_UNMARK_ROW_THREAD(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) /* various event handlers */ static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag, pmc_kld_unload_tag; /* Module statistics */ struct pmc_driverstats pmc_stats; /* Machine/processor dependent operations */ static struct pmc_mdep *md; /* * Hash tables mapping owner processes and target threads to PMCs. */ struct mtx pmc_processhash_mtx; /* spin mutex */ static u_long pmc_processhashmask; static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; /* * Hash table of PMC owner descriptors. This table is protected by * the shared PMC "sx" lock. */ static u_long pmc_ownerhashmask; static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; /* * List of PMC owners with system-wide sampling PMCs. */ static CK_LIST_HEAD(, pmc_owner) pmc_ss_owners; /* * List of free thread entries. This is protected by the spin * mutex. */ static struct mtx pmc_threadfreelist_mtx; /* spin mutex */ static LIST_HEAD(, pmc_thread) pmc_threadfreelist; static int pmc_threadfreelist_entries=0; #define THREADENTRY_SIZE \ (sizeof(struct pmc_thread) + (md->pmd_npmc * sizeof(struct pmc_threadpmcstate))) /* * Task to free thread descriptors */ static struct grouptask free_gtask; /* * A map of row indices to classdep structures. */ static struct pmc_classdep **pmc_rowindex_to_classdep; /* * Prototypes */ #ifdef HWPMC_DEBUG static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); static int pmc_debugflags_parse(char *newstr, char *fence); #endif static int load(struct module *module, int cmd, void *arg); static int pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf); static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); static int pmc_can_attach(struct pmc *pm, struct proc *p); static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); static void pmc_cleanup(void); static int pmc_detach_process(struct proc *p, struct pmc *pm); static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags); static void pmc_destroy_owner_descriptor(struct pmc_owner *po); static void pmc_destroy_pmc_descriptor(struct pmc *pm); static void pmc_destroy_process_descriptor(struct pmc_process *pp); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); static struct pmc_thread *pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, uint32_t mode); static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); static void pmc_log_all_process_mappings(struct pmc_owner *po); static void pmc_log_kernel_mappings(struct pmc *pm); static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_process_csw_in(struct thread *td); static void pmc_process_csw_out(struct thread *td); static void pmc_process_exit(void *arg, struct proc *p); static void pmc_process_fork(void *arg, struct proc *p1, struct proc *p2, int n); static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_process_thread_add(struct thread *td); static void pmc_process_thread_delete(struct thread *td); static void pmc_process_thread_userret(struct thread *td); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static void pmc_restore_cpu_binding(struct pmc_binding *pb); static void pmc_save_cpu_binding(struct pmc_binding *pb); static void pmc_select_cpu(int cpu); static int pmc_start(struct pmc *pm); static int pmc_stop(struct pmc *pm); static int pmc_syscall_handler(struct thread *td, void *syscall_args); static struct pmc_thread *pmc_thread_descriptor_pool_alloc(void); static void pmc_thread_descriptor_pool_drain(void); static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt); static void pmc_unlink_target_process(struct pmc *pmc, struct pmc_process *pp); static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); static struct pmc_mdep *pmc_generic_cpu_initialize(void); static void pmc_generic_cpu_finalize(struct pmc_mdep *md); static void pmc_post_callchain_callback(void); static void pmc_process_threadcreate(struct thread *td); static void pmc_process_threadexit(struct thread *td); static void pmc_process_proccreate(struct proc *p); static void pmc_process_allproc(struct pmc *pm); /* * Kernel tunables and sysctl(8) interface. */ SYSCTL_DECL(_kern_hwpmc); SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW, 0, "HWPMC stats"); /* Stats. */ SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW, &pmc_stats.pm_intr_ignored, "# of interrupts ignored"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW, &pmc_stats.pm_intr_processed, "# of interrupts processed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW, &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW, &pmc_stats.pm_syscalls, "# of syscalls"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW, &pmc_stats.pm_syscall_errors, "# of syscall_errors"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW, &pmc_stats.pm_buffer_requests, "# of buffer requests"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW, &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW, &pmc_stats.pm_log_sweeps, "# of ?"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW, &pmc_stats.pm_merges, "# of times kernel stack was found for user trace"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW, &pmc_stats.pm_overwrites, "# of times a sample was overwritten before being logged"); static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, &pmc_callchaindepth, 0, "depth of call chain records"); char pmc_cpuid[64]; SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD, pmc_cpuid, 0, "cpu version string"); #ifdef HWPMC_DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr)); SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); #endif /* * kern.hwpmc.hashrows -- determines the number of rows in the * of the hash table used to look up threads */ static int pmc_hashsize = PMC_HASH_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN, &pmc_hashsize, 0, "rows in hash tables"); /* * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU */ static int pmc_nsamples = PMC_NSAMPLES; SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, &pmc_nsamples, 0, "number of PC samples per CPU"); /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. */ static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN, &pmc_mtxpool_size, 0, "size of spin mutex pool"); /* * kern.hwpmc.threadfreelist_entries -- number of free entries */ SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_entries, CTLFLAG_RD, &pmc_threadfreelist_entries, 0, "number of avalable thread entries"); /* * kern.hwpmc.threadfreelist_max -- maximum number of free entries */ static int pmc_threadfreelist_max = PMC_THREADLIST_MAX; SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_max, CTLFLAG_RW, &pmc_threadfreelist_max, 0, "maximum number of available thread entries before freeing some"); /* * security.bsd.unprivileged_syspmcs -- allow non-root processes to * allocate system-wide PMCs. * * Allowing unprivileged processes to allocate system PMCs is convenient * if system-wide measurements need to be taken concurrently with other * per-process measurements. This feature is turned off by default. */ static int pmc_unprivileged_syspmcs = 0; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN, &pmc_unprivileged_syspmcs, 0, "allow unprivileged process to allocate system PMCs"); /* * Hash function. Discard the lower 2 bits of the pointer since * these are always zero for our uses. The hash multiplier is * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). */ #if LONG_BIT == 64 #define _PMC_HM 11400714819323198486u #elif LONG_BIT == 32 #define _PMC_HM 2654435769u #else #error Must know the size of 'long' to compile #endif #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) /* * Syscall structures */ /* The `sysent' for the new syscall */ static struct sysent pmc_sysent = { .sy_narg = 2, .sy_call = pmc_syscall_handler, }; static struct syscall_module_data pmc_syscall_mod = { .chainevh = load, .chainarg = NULL, .offset = &pmc_syscall_num, .new_sysent = &pmc_sysent, .old_sysent = { .sy_narg = 0, .sy_call = NULL }, .flags = SY_THR_STATIC_KLD, }; static moduledata_t pmc_mod = { .name = PMC_MODULE_NAME, .evhand = syscall_module_handler, .priv = &pmc_syscall_mod, }; #ifdef EARLY_AP_STARTUP DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SYSCALLS, SI_ORDER_ANY); #else DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); #endif MODULE_VERSION(pmc, PMC_VERSION); #ifdef HWPMC_DEBUG enum pmc_dbgparse_state { PMCDS_WS, /* in whitespace */ PMCDS_MAJOR, /* seen a major keyword */ PMCDS_MINOR }; static int pmc_debugflags_parse(char *newstr, char *fence) { char c, *p, *q; struct pmc_debugflags *tmpflags; int error, found, *newbits, tmp; size_t kwlen; tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO); p = newstr; error = 0; for (; p < fence && (c = *p); p++) { /* skip white space */ if (c == ' ' || c == '\t') continue; /* look for a keyword followed by "=" */ for (q = p; p < fence && (c = *p) && c != '='; p++) ; if (c != '=') { error = EINVAL; goto done; } kwlen = p - q; newbits = NULL; /* lookup flag group name */ #define DBG_SET_FLAG_MAJ(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ newbits = &tmpflags->pdb_ ## F; DBG_SET_FLAG_MAJ("cpu", CPU); DBG_SET_FLAG_MAJ("csw", CSW); DBG_SET_FLAG_MAJ("logging", LOG); DBG_SET_FLAG_MAJ("module", MOD); DBG_SET_FLAG_MAJ("md", MDP); DBG_SET_FLAG_MAJ("owner", OWN); DBG_SET_FLAG_MAJ("pmc", PMC); DBG_SET_FLAG_MAJ("process", PRC); DBG_SET_FLAG_MAJ("sampling", SAM); if (newbits == NULL) { error = EINVAL; goto done; } p++; /* skip the '=' */ /* Now parse the individual flags */ tmp = 0; newflag: for (q = p; p < fence && (c = *p); p++) if (c == ' ' || c == '\t' || c == ',') break; /* p == fence or c == ws or c == "," or c == 0 */ if ((kwlen = p - q) == 0) { *newbits = tmp; continue; } found = 0; #define DBG_SET_FLAG_MIN(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) /* a '*' denotes all possible flags in the group */ if (kwlen == 1 && *q == '*') tmp = found = ~0; /* look for individual flag names */ DBG_SET_FLAG_MIN("allocaterow", ALR); DBG_SET_FLAG_MIN("allocate", ALL); DBG_SET_FLAG_MIN("attach", ATT); DBG_SET_FLAG_MIN("bind", BND); DBG_SET_FLAG_MIN("config", CFG); DBG_SET_FLAG_MIN("exec", EXC); DBG_SET_FLAG_MIN("exit", EXT); DBG_SET_FLAG_MIN("find", FND); DBG_SET_FLAG_MIN("flush", FLS); DBG_SET_FLAG_MIN("fork", FRK); DBG_SET_FLAG_MIN("getbuf", GTB); DBG_SET_FLAG_MIN("hook", PMH); DBG_SET_FLAG_MIN("init", INI); DBG_SET_FLAG_MIN("intr", INT); DBG_SET_FLAG_MIN("linktarget", TLK); DBG_SET_FLAG_MIN("mayberemove", OMR); DBG_SET_FLAG_MIN("ops", OPS); DBG_SET_FLAG_MIN("read", REA); DBG_SET_FLAG_MIN("register", REG); DBG_SET_FLAG_MIN("release", REL); DBG_SET_FLAG_MIN("remove", ORM); DBG_SET_FLAG_MIN("sample", SAM); DBG_SET_FLAG_MIN("scheduleio", SIO); DBG_SET_FLAG_MIN("select", SEL); DBG_SET_FLAG_MIN("signal", SIG); DBG_SET_FLAG_MIN("swi", SWI); DBG_SET_FLAG_MIN("swo", SWO); DBG_SET_FLAG_MIN("start", STA); DBG_SET_FLAG_MIN("stop", STO); DBG_SET_FLAG_MIN("syscall", PMS); DBG_SET_FLAG_MIN("unlinktarget", TUL); DBG_SET_FLAG_MIN("write", WRI); if (found == 0) { /* unrecognized flag name */ error = EINVAL; goto done; } if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ *newbits = tmp; continue; } p++; goto newflag; } /* save the new flag set */ bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); done: free(tmpflags, M_PMC); return error; } static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) { char *fence, *newstr; int error; unsigned int n; (void) arg1; (void) arg2; /* unused parameters */ n = sizeof(pmc_debugstr); newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO); (void) strlcpy(newstr, pmc_debugstr, n); error = sysctl_handle_string(oidp, newstr, n, req); /* if there is a new string, parse and copy it */ if (error == 0 && req->newptr != NULL) { fence = newstr + (n < req->newlen ? n : req->newlen + 1); if ((error = pmc_debugflags_parse(newstr, fence)) == 0) (void) strlcpy(pmc_debugstr, newstr, sizeof(pmc_debugstr)); } free(newstr, M_PMC); return error; } #endif /* * Map a row index to a classdep structure and return the adjusted row * index for the PMC class index. */ static struct pmc_classdep * pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri) { struct pmc_classdep *pcd; (void) md; KASSERT(ri >= 0 && ri < md->pmd_npmc, ("[pmc,%d] illegal row-index %d", __LINE__, ri)); pcd = pmc_rowindex_to_classdep[ri]; KASSERT(pcd != NULL, ("[pmc,%d] ri %d null pcd", __LINE__, ri)); *adjri = ri - pcd->pcd_ri; KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num, ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri)); return (pcd); } /* * Concurrency Control * * The driver manages the following data structures: * * - target process descriptors, one per target process * - owner process descriptors (and attached lists), one per owner process * - lookup hash tables for owner and target processes * - PMC descriptors (and attached lists) * - per-cpu hardware state * - the 'hook' variable through which the kernel calls into * this module * - the machine hardware state (managed by the MD layer) * * These data structures are accessed from: * * - thread context-switch code * - interrupt handlers (possibly on multiple cpus) * - kernel threads on multiple cpus running on behalf of user * processes doing system calls * - this driver's private kernel threads * * = Locks and Locking strategy = * * The driver uses four locking strategies for its operation: * * - The global SX lock "pmc_sx" is used to protect internal * data structures. * * Calls into the module by syscall() start with this lock being * held in exclusive mode. Depending on the requested operation, * the lock may be downgraded to 'shared' mode to allow more * concurrent readers into the module. Calls into the module from * other parts of the kernel acquire the lock in shared mode. * * This SX lock is held in exclusive mode for any operations that * modify the linkages between the driver's internal data structures. * * The 'pmc_hook' function pointer is also protected by this lock. * It is only examined with the sx lock held in exclusive mode. The * kernel module is allowed to be unloaded only with the sx lock held * in exclusive mode. In normal syscall handling, after acquiring the * pmc_sx lock we first check that 'pmc_hook' is non-null before * proceeding. This prevents races between the thread unloading the module * and other threads seeking to use the module. * * - Lookups of target process structures and owner process structures * cannot use the global "pmc_sx" SX lock because these lookups need * to happen during context switches and in other critical sections * where sleeping is not allowed. We protect these lookup tables * with their own private spin-mutexes, "pmc_processhash_mtx" and * "pmc_ownerhash_mtx". * * - Interrupt handlers work in a lock free manner. At interrupt * time, handlers look at the PMC pointer (phw->phw_pmc) configured * when the PMC was started. If this pointer is NULL, the interrupt * is ignored after updating driver statistics. We ensure that this * pointer is set (using an atomic operation if necessary) before the * PMC hardware is started. Conversely, this pointer is unset atomically * only after the PMC hardware is stopped. * * We ensure that everything needed for the operation of an * interrupt handler is available without it needing to acquire any * locks. We also ensure that a PMC's software state is destroyed only * after the PMC is taken off hardware (on all CPUs). * * - Context-switch handling with process-private PMCs needs more * care. * * A given process may be the target of multiple PMCs. For example, * PMCATTACH and PMCDETACH may be requested by a process on one CPU * while the target process is running on another. A PMC could also * be getting released because its owner is exiting. We tackle * these situations in the following manner: * * - each target process structure 'pmc_process' has an array * of 'struct pmc *' pointers, one for each hardware PMC. * * - At context switch IN time, each "target" PMC in RUNNING state * gets started on hardware and a pointer to each PMC is copied into * the per-cpu phw array. The 'runcount' for the PMC is * incremented. * * - At context switch OUT time, all process-virtual PMCs are stopped * on hardware. The saved value is added to the PMCs value field * only if the PMC is in a non-deleted state (the PMCs state could * have changed during the current time slice). * * Note that since in-between a switch IN on a processor and a switch * OUT, the PMC could have been released on another CPU. Therefore * context switch OUT always looks at the hardware state to turn * OFF PMCs and will update a PMC's saved value only if reachable * from the target process record. * * - OP PMCRELEASE could be called on a PMC at any time (the PMC could * be attached to many processes at the time of the call and could * be active on multiple CPUs). * * We prevent further scheduling of the PMC by marking it as in * state 'DELETED'. If the runcount of the PMC is non-zero then * this PMC is currently running on a CPU somewhere. The thread * doing the PMCRELEASE operation waits by repeatedly doing a * pause() till the runcount comes to zero. * * The contents of a PMC descriptor (struct pmc) are protected using * a spin-mutex. In order to save space, we use a mutex pool. * * In terms of lock types used by witness(4), we use: * - Type "pmc-sx", used by the global SX lock. * - Type "pmc-sleep", for sleep mutexes used by logger threads. * - Type "pmc-per-proc", for protecting PMC owner descriptors. * - Type "pmc-leaf", used for all other spin mutexes. */ /* * save the cpu binding of the current kthread */ static void pmc_save_cpu_binding(struct pmc_binding *pb) { PMCDBG0(CPU,BND,2, "save-cpu"); thread_lock(curthread); pb->pb_bound = sched_is_bound(curthread); pb->pb_cpu = curthread->td_oncpu; thread_unlock(curthread); PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); } /* * restore the cpu binding of the current thread */ static void pmc_restore_cpu_binding(struct pmc_binding *pb) { PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", curthread->td_oncpu, pb->pb_cpu); thread_lock(curthread); if (pb->pb_bound) sched_bind(curthread, pb->pb_cpu); else sched_unbind(curthread); thread_unlock(curthread); PMCDBG0(CPU,BND,2, "restore-cpu done"); } /* * move execution over the specified cpu and bind it there. */ static void pmc_select_cpu(int cpu) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); /* Never move to an inactive CPU. */ KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " "CPU %d", __LINE__, cpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu); thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); KASSERT(curthread->td_oncpu == cpu, ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, cpu, curthread->td_oncpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); } /* * Force a context switch. * * We do this by pause'ing for 1 tick -- invoking mi_switch() is not * guaranteed to force a context switch. */ static void pmc_force_context_switch(void) { pause("pmcctx", 1); } uint64_t pmc_rdtsc(void) { #if defined(__i386__) || defined(__amd64__) if (__predict_true(amd_feature & AMDID_RDTSCP)) return rdtscp(); else return rdtsc(); #else return get_cyclecount(); #endif } /* * Get the file name for an executable. This is a simple wrapper * around vn_fullpath(9). */ static void pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) { *fullpath = "unknown"; *freepath = NULL; vn_fullpath(curthread, v, fullpath, freepath); } /* * remove an process owning PMCs */ void pmc_remove_owner(struct pmc_owner *po) { struct pmc *pm, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po); /* Remove descriptor from the owner hash table */ LIST_REMOVE(po, po_next); /* release all owned PMC descriptors */ LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { PMCDBG1(OWN,ORM,2, "pmc=%p", pm); KASSERT(pm->pm_owner == po, ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); pmc_release_pmc_descriptor(pm); /* will unlink from the list */ pmc_destroy_pmc_descriptor(pm); } KASSERT(po->po_sscount == 0, ("[pmc,%d] SS count not zero", __LINE__)); KASSERT(LIST_EMPTY(&po->po_pmcs), ("[pmc,%d] PMC list not empty", __LINE__)); /* de-configure the log file if present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_deconfigure_log(po); } /* * remove an owner process record if all conditions are met. */ static void pmc_maybe_remove_owner(struct pmc_owner *po) { PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po); /* * Remove owner record if * - this process does not own any PMCs * - this process has not allocated a system-wide sampling buffer */ if (LIST_EMPTY(&po->po_pmcs) && ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } } /* * Add an association between a target process and a PMC. */ static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_target *pt; #ifdef INVARIANTS struct pmc_thread *pt_td; #endif sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", __LINE__, pm, pp->pp_proc->p_pid)); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1), ("[pmc,%d] Illegal reference count %d for process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); #ifdef HWPMC_DEBUG LIST_FOREACH(pt, &pm->pm_targets, pt_next) if (pt->pt_process == pp) KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", __LINE__, pp, pm)); #endif pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO); pt->pt_process = pp; LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, (uintptr_t)pm); if (pm->pm_owner->po_owner == pp->pp_proc) pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; /* * Initialize the per-process values at this row index. */ pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? pm->pm_sc.pm_reloadcount : 0; pp->pp_refcnt++; #ifdef INVARIANTS /* Confirm that the per-thread values at this row index are cleared. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt_td, &pp->pp_tds, pt_next) { KASSERT(pt_td->pt_pmcs[ri].pt_pmcval == (pmc_value_t) 0, ("[pmc,%d] pt_pmcval not cleared for pid=%d at " "ri=%d", __LINE__, pp->pp_proc->p_pid, ri)); } mtx_unlock_spin(pp->pp_tdslock); } #endif } /* * Removes the association between a target process and a PMC. */ static void pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct proc *p; struct pmc_target *ptgt; struct pmc_thread *pt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, ri, pm, pp->pp_pmcs[ri].pp_pmc)); pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; /* Clear the per-thread values at this row index. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt, &pp->pp_tds, pt_next) pt->pt_pmcs[ri].pt_pmcval = (pmc_value_t) 0; mtx_unlock_spin(pp->pp_tdslock); } /* Remove owner-specific flags */ if (pm->pm_owner->po_owner == pp->pp_proc) { pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; } pp->pp_refcnt--; /* Remove the target process from the PMC structure */ LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) if (ptgt->pt_process == pp) break; KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); LIST_REMOVE(ptgt, pt_next); free(ptgt, M_PMC); /* if the PMC now lacks targets, send the owner a SIGIO */ if (LIST_EMPTY(&pm->pm_targets)) { p = pm->pm_owner->po_owner; PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p, SIGIO); } } /* * Check if PMC 'pm' may be attached to target process 't'. */ static int pmc_can_attach(struct pmc *pm, struct proc *t) { struct proc *o; /* pmc owner */ struct ucred *oc, *tc; /* owner, target credentials */ int decline_attach, i; /* * A PMC's owner can always attach that PMC to itself. */ if ((o = pm->pm_owner->po_owner) == t) return 0; PROC_LOCK(o); oc = o->p_ucred; crhold(oc); PROC_UNLOCK(o); PROC_LOCK(t); tc = t->p_ucred; crhold(tc); PROC_UNLOCK(t); /* * The effective uid of the PMC owner should match at least one * of the {effective,real,saved} uids of the target process. */ decline_attach = oc->cr_uid != tc->cr_uid && oc->cr_uid != tc->cr_svuid && oc->cr_uid != tc->cr_ruid; /* * Every one of the target's group ids, must be in the owner's * group list. */ for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) decline_attach = !groupmember(tc->cr_groups[i], oc); /* check the read and saved gids too */ if (decline_attach == 0) decline_attach = !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc); crfree(tc); crfree(oc); return !decline_attach; } /* * Attach a process to a PMC. */ static int pmc_attach_one_process(struct proc *p, struct pmc *pm) { int ri, error; char *fullpath, *freepath; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * Locate the process descriptor corresponding to process 'p', * allocating space as needed. * * Verify that rowindex 'pm_rowindex' is free in the process * descriptor. * * If not, allocate space for a descriptor and link the * process descriptor and PMC. */ ri = PMC_TO_ROWINDEX(pm); /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) { error = ENOMEM; goto fail; } if (pp->pp_pmcs[ri].pp_pmc == pm) {/* already present at slot [ri] */ error = EEXIST; goto fail; } if (pp->pp_pmcs[ri].pp_pmc != NULL) { error = EBUSY; goto fail; } pmc_link_target_process(pm, pp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) pm->pm_flags |= PMC_F_NEEDS_LOGFILE; pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ /* issue an attach event to a configured log file */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { if (p->p_flag & P_KPROC) { fullpath = kernelname; freepath = NULL; } else { pmc_getfilename(p->p_textvp, &fullpath, &freepath); pmclog_process_pmcattach(pm, p->p_pid, fullpath); } free(freepath, M_TEMP); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_process_mappings(pm->pm_owner, p); } return (0); fail: PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return (error); } /* * Attach a process and optionally its children */ static int pmc_attach_process(struct proc *p, struct pmc *pm) { int error; struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * If this PMC successfully allowed a GETMSR operation * in the past, disallow further ATTACHes. */ if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) return EPERM; if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_attach_one_process(p, pm); /* * Traverse all child processes, attaching them to * this PMC. */ sx_slock(&proctree_lock); top = p; for (;;) { if ((error = pmc_attach_one_process(p, pm)) != 0) break; if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } if (error) (void) pmc_detach_process(top, pm); done: sx_sunlock(&proctree_lock); return error; } /* * Detach a process from a PMC. If there are no other PMCs tracking * this process, remove the process structure from its hash table. If * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. */ static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) { int ri; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL, ("[pmc,%d] null pm pointer", __LINE__)); ri = PMC_TO_ROWINDEX(pm); PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", pm, ri, p, p->p_pid, p->p_comm, flags); if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) return ESRCH; if (pp->pp_pmcs[ri].pp_pmc != pm) return EINVAL; pmc_unlink_target_process(pm, pp); /* Issue a detach entry if a log file is configured */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcdetach(pm, p->p_pid); /* * If there are no PMCs targeting this process, we remove its * descriptor from the target hash table and unset the P_HWPMC * flag in the struct proc. */ KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal refcnt %d for process struct %p", __LINE__, pp->pp_refcnt, pp)); if (pp->pp_refcnt != 0) /* still a target of some PMC */ return 0; pmc_remove_process_descriptor(pp); if (flags & PMC_FLAG_REMOVE) pmc_destroy_process_descriptor(pp); PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return 0; } /* * Detach a process and optionally its descendants from a PMC. */ static int pmc_detach_process(struct proc *p, struct pmc *pm) { struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); /* * Traverse all children, detaching them from this PMC. We * ignore errors since we could be detaching a PMC from a * partially attached proc tree. */ sx_slock(&proctree_lock); top = p; for (;;) { (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); if (LIST_EMPTY(&pm->pm_targets)) pm->pm_flags &= ~PMC_F_ATTACH_DONE; return 0; } /* * Thread context switch IN */ static void pmc_process_csw_in(struct thread *td) { int cpu; unsigned int adjri, ri; struct pmc *pm; struct proc *p; struct pmc_cpu *pc; struct pmc_hw *phw; pmc_value_t newvalue; struct pmc_process *pp; struct pmc_thread *pt; struct pmc_classdep *pcd; p = td->td_proc; pt = NULL; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) return; KASSERT(pp->pp_proc == td->td_proc, ("[pmc,%d] not my thread state", __LINE__)); critical_enter(); /* no preemption from this point */ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] weird CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Target PMC in non-virtual mode (%d)", __LINE__, PMC_TO_MODE(pm))); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] Row index mismatch pmc %d != ri %d", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Only PMCs that are marked as 'RUNNING' need * be placed on hardware. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* increment PMC runcount */ counter_u64_add(pm->pm_runcount, 1); /* configure the HWPMC we are going to use. */ pcd = pmc_ri_to_classdep(md, ri, &adjri); pcd->pcd_config_pmc(cpu, adjri, pm); phw = pc->pc_hwpmcs[ri]; KASSERT(phw != NULL, ("[pmc,%d] null hw pointer", __LINE__)); KASSERT(phw->phw_pmc == pm, ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, phw->phw_pmc, pm)); /* * Write out saved value and start the PMC. * * Sampling PMCs use a per-thread value, while * counting mode PMCs use a per-pmc value that is * inherited across descendants. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { if (pt == NULL) pt = pmc_find_thread_descriptor(pp, td, PMC_FLAG_NONE); KASSERT(pt != NULL, ("[pmc,%d] No thread found for td=%p", __LINE__, td)); mtx_pool_lock_spin(pmc_mtxpool, pm); /* * If we have a thread descriptor, use the per-thread * counter in the descriptor. If not, we will use * a per-process counter. * * TODO: Remove the per-process "safety net" once * we have thoroughly tested that we don't hit the * above assert. */ if (pt != NULL) { if (pt->pt_pmcs[ri].pt_pmcval > 0) newvalue = pt->pt_pmcs[ri].pt_pmcval; else newvalue = pm->pm_sc.pm_reloadcount; } else { /* * Use the saved value calculated after the most * recent time a thread using the shared counter * switched out. Reset the saved count in case * another thread from this process switches in * before any threads switch out. */ newvalue = pp->pp_pmcs[ri].pp_pmcval; pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount; } mtx_pool_unlock_spin(pmc_mtxpool, pm); KASSERT(newvalue > 0 && newvalue <= pm->pm_sc.pm_reloadcount, ("[pmc,%d] pmcval outside of expected range cpu=%d " "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__, cpu, ri, newvalue, pm->pm_sc.pm_reloadcount)); } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, PMC_TO_MODE(pm))); mtx_pool_lock_spin(pmc_mtxpool, pm); newvalue = PMC_PCPU_SAVED(cpu, ri) = pm->pm_gv.pm_savedvalue; mtx_pool_unlock_spin(pmc_mtxpool, pm); } PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); pcd->pcd_write_pmc(cpu, adjri, newvalue); /* If a sampling mode PMC, reset stalled state. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. */ pm->pm_pcpu_state[cpu].pps_cpustate = 1; /* Start the PMC. */ pcd->pcd_start_pmc(cpu, adjri); } /* * perform any other architecture/cpu dependent thread * switch-in actions. */ (void) (*md->pmd_switch_in)(pc, pp); critical_exit(); } /* * Thread context switch OUT. */ static void pmc_process_csw_out(struct thread *td) { int cpu; int64_t tmp; struct pmc *pm; struct proc *p; enum pmc_mode mode; struct pmc_cpu *pc; pmc_value_t newvalue; unsigned int adjri, ri; struct pmc_process *pp; struct pmc_thread *pt = NULL; struct pmc_classdep *pcd; /* * Locate our process descriptor; this may be NULL if * this process is exiting and we have already removed * the process from the target process table. * * Note that due to kernel preemption, multiple * context switches may happen while the process is * exiting. * * Note also that if the target process cannot be * found we still need to deconfigure any PMCs that * are currently running on hardware. */ p = td->td_proc; pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); /* * save PMCs */ critical_enter(); cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d weird CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; /* * When a PMC gets unlinked from a target PMC, it will * be removed from the target's pp_pmc[] array. * * However, on a MP system, the target could have been * executing on another CPU at the time of the unlink. * So, at context switch OUT time, we need to look at * the hardware to determine if a PMC is scheduled on * it. */ for (ri = 0; ri < md->pmd_npmc; ri++) { pcd = pmc_ri_to_classdep(md, ri, &adjri); pm = NULL; (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); if (pm == NULL) /* nothing at this row index */ continue; mode = PMC_TO_MODE(pm); if (!PMC_IS_VIRTUAL_MODE(mode)) continue; /* not a process virtual PMC */ KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Change desired state, and then stop if not stalled. * This two-step dance should avoid race conditions where * an interrupt re-enables the PMC after this code has * already checked the pm_stalled flag. */ pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_pcpu_state[cpu].pps_stalled == 0) pcd->pcd_stop_pmc(cpu, adjri); /* reduce this PMC's runcount */ counter_u64_add(pm->pm_runcount, -1); /* * If this PMC is associated with this process, * save the reading. */ if (pm->pm_state != PMC_STATE_DELETED && pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(pp->pp_refcnt > 0, ("[pmc,%d] pp refcnt = %d", __LINE__, pp->pp_refcnt)); pcd->pcd_read_pmc(cpu, adjri, &newvalue); if (mode == PMC_MODE_TS) { PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d val=%jd (samp)", cpu, ri, newvalue); if (pt == NULL) pt = pmc_find_thread_descriptor(pp, td, PMC_FLAG_NONE); KASSERT(pt != NULL, ("[pmc,%d] No thread found for td=%p", __LINE__, td)); mtx_pool_lock_spin(pmc_mtxpool, pm); /* * If we have a thread descriptor, save the * per-thread counter in the descriptor. If not, * we will update the per-process counter. * * TODO: Remove the per-process "safety net" * once we have thoroughly tested that we * don't hit the above assert. */ if (pt != NULL) pt->pt_pmcs[ri].pt_pmcval = newvalue; else { /* * For sampling process-virtual PMCs, * newvalue is the number of events to * be seen until the next sampling * interrupt. We can just add the events * left from this invocation to the * counter, then adjust in case we * overflow our range. * * (Recall that we reload the counter * every time we use it.) */ pp->pp_pmcs[ri].pp_pmcval += newvalue; if (pp->pp_pmcs[ri].pp_pmcval > pm->pm_sc.pm_reloadcount) pp->pp_pmcs[ri].pp_pmcval -= pm->pm_sc.pm_reloadcount; } mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)", cpu, ri, tmp); /* * For counting process-virtual PMCs, * we expect the count to be * increasing monotonically, modulo a 64 * bit wraparound. */ KASSERT(tmp >= 0, ("[pmc,%d] negative increment cpu=%d " "ri=%d newvalue=%jx saved=%jx " "incr=%jx", __LINE__, cpu, ri, newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); if (pm->pm_flags & PMC_F_LOG_PROCCSW) pmclog_process_proccsw(pm, pp, tmp, td); } } /* mark hardware as free */ pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * perform any other architecture/cpu dependent thread * switch out functions. */ (void) (*md->pmd_switch_out)(pc, pp); critical_exit(); } /* * A new thread for a process. */ static void pmc_process_thread_add(struct thread *td) { struct pmc_process *pmc; pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); if (pmc != NULL) pmc_find_thread_descriptor(pmc, td, PMC_FLAG_ALLOCATE); } /* * A thread delete for a process. */ static void pmc_process_thread_delete(struct thread *td) { struct pmc_process *pmc; pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); if (pmc != NULL) pmc_thread_descriptor_pool_free(pmc_find_thread_descriptor(pmc, td, PMC_FLAG_REMOVE)); } /* * A userret() call for a thread. */ static void pmc_process_thread_userret(struct thread *td) { sched_pin(); pmc_capture_user_callchain(curcpu, PMC_UR, td->td_frame); sched_unpin(); } /* * A mapping change for a process. */ static void pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) { int ri; pid_t pid; char *fullpath, *freepath; const struct pmc *pm; struct pmc_owner *po; const struct pmc_process *pp; freepath = fullpath = NULL; MPASS(!in_epoch()); pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; epoch_enter_preempt(global_epoch_preempt); /* Inform owners of all system-wide sampling PMCs. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; /* * Inform sampling PMC owners tracking this process. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_in(pm->pm_owner, pid, pkm->pm_address, fullpath); done: if (freepath) free(freepath, M_TEMP); epoch_exit_preempt(global_epoch_preempt); } /* * Log an munmap request. */ static void pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) { int ri; pid_t pid; struct pmc_owner *po; const struct pmc *pm; const struct pmc_process *pp; pid = td->td_proc->p_pid; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); epoch_exit_preempt(global_epoch_preempt); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_out(pm->pm_owner, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); } /* * Log mapping information about the kernel. */ static void pmc_log_kernel_mappings(struct pmc *pm) { struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; MPASS(in_epoch() || sx_xlocked(&pmc_sx)); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); po = pm->pm_owner; if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) return; if (PMC_TO_MODE(pm) == PMC_MODE_SS) pmc_process_allproc(pm); /* * Log the current set of kernel modules. */ kmbase = linker_hwpmc_list_objects(); for (km = kmbase; km->pm_file != NULL; km++) { PMCDBG2(LOG,REG,1,"%s %p", (char *) km->pm_file, (void *) km->pm_address); pmclog_process_map_in(po, (pid_t) -1, km->pm_address, km->pm_file); } free(kmbase, M_LINKER); po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; } /* * Log the mappings for a single process. */ static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) { vm_map_t map; struct vnode *vp; struct vmspace *vm; vm_map_entry_t entry; vm_offset_t last_end; u_int last_timestamp; struct vnode *last_vp; vm_offset_t start_addr; vm_object_t obj, lobj, tobj; char *fullpath, *freepath; last_vp = NULL; last_end = (vm_offset_t) 0; fullpath = freepath = NULL; if ((vm = vmspace_acquire_ref(p)) == NULL) return; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry == NULL) { PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " "NULL! pid=%d vm_map=%p\n", p->p_pid, map); break; } /* * We only care about executable map entries. */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || !(entry->protection & VM_PROT_EXECUTE) || (entry->object.vm_object == NULL)) { continue; } obj = entry->object.vm_object; VM_OBJECT_RLOCK(obj); /* * Walk the backing_object list to find the base * (non-shadowed) vm_object. */ for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } /* * At this point lobj is the base vm_object and it is locked. */ if (lobj == NULL) { PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d " "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj); VM_OBJECT_RUNLOCK(obj); continue; } vp = vm_object_vnode(lobj); if (vp == NULL) { if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * Skip contiguous regions that point to the same * vnode, so we don't emit redundant MAP-IN * directives. */ if (entry->start == last_end && vp == last_vp) { last_end = entry->end; if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * We don't want to keep the proc's vm_map or this * vm_object locked while we walk the pathname, since * vn_fullpath() can sleep. However, if we drop the * lock, it's possible for concurrent activity to * modify the vm_map list. To protect against this, * we save the vm_map timestamp before we release the * lock, and check it after we reacquire the lock * below. */ start_addr = entry->start; last_end = entry->end; last_timestamp = map->timestamp; vm_map_unlock_read(map); vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); freepath = NULL; pmc_getfilename(vp, &fullpath, &freepath); last_vp = vp; vrele(vp); vp = NULL; pmclog_process_map_in(po, p->p_pid, start_addr, fullpath); if (freepath) free(freepath, M_TEMP); vm_map_lock_read(map); /* * If our saved timestamp doesn't match, this means * that the vm_map was modified out from under us and * we can't trust our current "entry" pointer. Do a * new lookup for this entry. If there is no entry * for this address range, vm_map_lookup_entry() will * return the previous one, so we always want to go to * entry->next on the next loop iteration. * * There is an edge condition here that can occur if * there is no entry at or before this address. In * this situation, vm_map_lookup_entry returns * &map->header, which would cause our loop to abort * without processing the rest of the map. However, * in practice this will never happen for process * vm_map. This is because the executable's text * segment is the first mapping in the proc's address * space, and this mapping is never removed until the * process exits, so there will always be a non-header * entry at or before the requested address for * vm_map_lookup_entry to return. */ if (map->timestamp != last_timestamp) vm_map_lookup_entry(map, last_end - 1, &entry); } vm_map_unlock_read(map); vmspace_free(vm); return; } /* * Log mappings for all processes in the system. */ static void pmc_log_all_process_mappings(struct pmc_owner *po) { struct proc *p, *top; sx_assert(&pmc_sx, SX_XLOCKED); if ((p = pfind(1)) == NULL) panic("[pmc,%d] Cannot find init", __LINE__); PROC_UNLOCK(p); sx_slock(&proctree_lock); top = p; for (;;) { pmc_log_process_mappings(po, p); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); } /* * The 'hook' invoked from the kernel proper */ #ifdef HWPMC_DEBUG const char *pmc_hooknames[] = { /* these strings correspond to PMC_FN_* in */ "", "EXEC", "CSW-IN", "CSW-OUT", "SAMPLE", "UNUSED1", "UNUSED2", "MMAP", "MUNMAP", "CALLCHAIN-NMI", "CALLCHAIN-SOFT", "SOFTSAMPLING", "THR-CREATE", "THR-EXIT", "THR-USERRET", "THR-CREATE-LOG", "THR-EXIT-LOG", "PROC-CREATE-LOG" }; #endif static int pmc_hook_handler(struct thread *td, int function, void *arg) { int cpu; PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, pmc_hooknames[function], arg); switch (function) { /* * Process exec() */ case PMC_FN_PROCESS_EXEC: { char *fullpath, *freepath; unsigned int ri; int is_using_hwpmcs; struct pmc *pm; struct proc *p; struct pmc_owner *po; struct pmc_process *pp; struct pmckern_procexec *pk; sx_assert(&pmc_sx, SX_XLOCKED); p = td->td_proc; pmc_getfilename(p->p_textvp, &fullpath, &freepath); pk = (struct pmckern_procexec *) arg; epoch_enter_preempt(global_epoch_preempt); /* Inform owners of SS mode PMCs of the exec event. */ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_entryaddr, fullpath); epoch_exit_preempt(global_epoch_preempt); PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); if (!is_using_hwpmcs) { if (freepath) free(freepath, M_TEMP); break; } /* * PMCs are not inherited across an exec(): remove any * PMCs that this process is the owner of. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } /* * If the process being exec'ed is not the target of any * PMC, we are done. */ if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { if (freepath) free(freepath, M_TEMP); break; } /* * Log the exec event to all monitoring owners. Skip * owners who have already received the event because * they had system sampling PMCs active. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, pm->pm_id, p->p_pid, pk->pm_entryaddr, fullpath); } if (freepath) free(freepath, M_TEMP); PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", p, p->p_pid, p->p_comm, pk->pm_credentialschanged); if (pk->pm_credentialschanged == 0) /* no change */ break; /* * If the newly exec()'ed process has a different credential * than before, allow it to be the target of a PMC only if * the PMC's owner has sufficient privilege. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) if (pmc_can_attach(pm, td->td_proc) != 0) pmc_detach_one_process(td->td_proc, pm, PMC_FLAG_NONE); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, pp->pp_refcnt, pp)); /* * If this process is no longer the target of any * PMCs, we can remove the process entry and free * up space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); pmc_destroy_process_descriptor(pp); break; } } break; case PMC_FN_CSW_IN: pmc_process_csw_in(td); break; case PMC_FN_CSW_OUT: pmc_process_csw_out(td); break; /* * Process accumulated PC samples. * * This function is expected to be called by hardclock() for * each CPU that has accumulated PC samples. * * This function is to be executed on the CPU whose samples * are being processed. */ case PMC_FN_DO_SAMPLES: /* * Clear the cpu specific bit in the CPU mask before * do the rest of the processing. If the NMI handler * gets invoked after the "atomic_clear_int()" call * below but before "pmc_process_samples()" gets * around to processing the interrupt, then we will * come back here at the next hardclock() tick (and * may find nothing to do if "pmc_process_samples()" * had already processed the interrupt). We don't * lose the interrupt sample. */ DPCPU_SET(pmc_sampled, 0); cpu = PCPU_GET(cpuid); pmc_process_samples(cpu, PMC_HR); pmc_process_samples(cpu, PMC_SR); pmc_process_samples(cpu, PMC_UR); break; case PMC_FN_MMAP: pmc_process_mmap(td, (struct pmckern_map_in *) arg); break; case PMC_FN_MUNMAP: MPASS(in_epoch() || sx_xlocked(&pmc_sx)); pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; case PMC_FN_PROC_CREATE_LOG: pmc_process_proccreate((struct proc *)arg); break; case PMC_FN_USER_CALLCHAIN: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, (struct trapframe *) arg); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_USER_CALLCHAIN_SOFT: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); cpu = PCPU_GET(cpuid); pmc_capture_user_callchain(cpu, PMC_SR, (struct trapframe *) arg); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_SOFT_SAMPLING: /* * Call soft PMC sampling intr. */ pmc_soft_intr((struct pmckern_soft *) arg); break; case PMC_FN_THR_CREATE: pmc_process_thread_add(td); pmc_process_threadcreate(td); break; case PMC_FN_THR_CREATE_LOG: pmc_process_threadcreate(td); break; case PMC_FN_THR_EXIT: KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_process_thread_delete(td); pmc_process_threadexit(td); break; case PMC_FN_THR_EXIT_LOG: pmc_process_threadexit(td); break; case PMC_FN_THR_USERRET: KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_process_thread_userret(td); break; default: #ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); #endif break; } return 0; } /* * allocate a 'struct pmc_owner' descriptor in the owner hash table. */ static struct pmc_owner * pmc_allocate_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; /* allocate space for N pointers and one descriptor struct */ po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO); po->po_owner = p; LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ TAILQ_INIT(&po->po_logbuffers); mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", p, p->p_pid, p->p_comm, po); return po; } static void pmc_destroy_owner_descriptor(struct pmc_owner *po) { PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); mtx_destroy(&po->po_mtx); free(po, M_PMC); } /* * Allocate a thread descriptor from the free pool. * * NOTE: This *can* return NULL. */ static struct pmc_thread * pmc_thread_descriptor_pool_alloc(void) { struct pmc_thread *pt; mtx_lock_spin(&pmc_threadfreelist_mtx); if ((pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { LIST_REMOVE(pt, pt_next); pmc_threadfreelist_entries--; } mtx_unlock_spin(&pmc_threadfreelist_mtx); return (pt); } /* * Add a thread descriptor to the free pool. We use this instead of free() * to maintain a cache of free entries. Additionally, we can safely call * this function when we cannot call free(), such as in a critical section. * */ static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt) { if (pt == NULL) return; memset(pt, 0, THREADENTRY_SIZE); mtx_lock_spin(&pmc_threadfreelist_mtx); LIST_INSERT_HEAD(&pmc_threadfreelist, pt, pt_next); pmc_threadfreelist_entries++; if (pmc_threadfreelist_entries > pmc_threadfreelist_max) GROUPTASK_ENQUEUE(&free_gtask); mtx_unlock_spin(&pmc_threadfreelist_mtx); } /* * A callout to manage the free list. */ static void pmc_thread_descriptor_pool_free_task(void *arg __unused) { struct pmc_thread *pt; LIST_HEAD(, pmc_thread) tmplist; int delta; LIST_INIT(&tmplist); /* Determine what changes, if any, we need to make. */ mtx_lock_spin(&pmc_threadfreelist_mtx); delta = pmc_threadfreelist_entries - pmc_threadfreelist_max; while (delta > 0 && (pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { delta--; LIST_REMOVE(pt, pt_next); LIST_INSERT_HEAD(&tmplist, pt, pt_next); } mtx_unlock_spin(&pmc_threadfreelist_mtx); /* If there are entries to free, free them. */ while (!LIST_EMPTY(&tmplist)) { pt = LIST_FIRST(&tmplist); LIST_REMOVE(pt, pt_next); free(pt, M_PMC); } } /* * Drain the thread free pool, freeing all allocations. */ static void pmc_thread_descriptor_pool_drain() { struct pmc_thread *pt, *next; LIST_FOREACH_SAFE(pt, &pmc_threadfreelist, pt_next, next) { LIST_REMOVE(pt, pt_next); free(pt, M_PMC); } } /* * find the descriptor corresponding to thread 'td', adding or removing it * as specified by 'mode'. * * Note that this supports additional mode flags in addition to those * supported by pmc_find_process_descriptor(): * PMC_FLAG_NOWAIT: Causes the function to not wait for mallocs. * This makes it safe to call while holding certain other locks. */ static struct pmc_thread * pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, uint32_t mode) { struct pmc_thread *pt = NULL, *ptnew = NULL; int wait_flag; KASSERT(td != NULL, ("[pmc,%d] called to add NULL td", __LINE__)); /* * Pre-allocate memory in the PMC_FLAG_ALLOCATE case prior to * acquiring the lock. */ if (mode & PMC_FLAG_ALLOCATE) { if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) { wait_flag = M_WAITOK; if ((mode & PMC_FLAG_NOWAIT) || in_epoch()) wait_flag = M_NOWAIT; ptnew = malloc(THREADENTRY_SIZE, M_PMC, wait_flag|M_ZERO); } } mtx_lock_spin(pp->pp_tdslock); LIST_FOREACH(pt, &pp->pp_tds, pt_next) if (pt->pt_td == td) break; if ((mode & PMC_FLAG_REMOVE) && pt != NULL) LIST_REMOVE(pt, pt_next); if ((mode & PMC_FLAG_ALLOCATE) && pt == NULL && ptnew != NULL) { pt = ptnew; ptnew = NULL; pt->pt_td = td; LIST_INSERT_HEAD(&pp->pp_tds, pt, pt_next); } mtx_unlock_spin(pp->pp_tdslock); if (ptnew != NULL) { free(ptnew, M_PMC); } return pt; } /* * Try to add thread descriptors for each thread in a process. */ static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp) { struct thread *curtd; struct pmc_thread **tdlist; int i, tdcnt, tdlistsz; KASSERT(!PROC_LOCKED(p), ("[pmc,%d] proc unexpectedly locked", __LINE__)); tdcnt = 32; restart: tdlistsz = roundup2(tdcnt, 32); tdcnt = 0; tdlist = malloc(sizeof(struct pmc_thread*) * tdlistsz, M_TEMP, M_WAITOK); PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, curtd) tdcnt++; if (tdcnt >= tdlistsz) { PROC_UNLOCK(p); free(tdlist, M_TEMP); goto restart; } /* * Try to add each thread to the list without sleeping. If unable, * add to a queue to retry after dropping the process lock. */ tdcnt = 0; FOREACH_THREAD_IN_PROC(p, curtd) { tdlist[tdcnt] = pmc_find_thread_descriptor(pp, curtd, PMC_FLAG_ALLOCATE|PMC_FLAG_NOWAIT); if (tdlist[tdcnt] == NULL) { PROC_UNLOCK(p); for (i = 0; i <= tdcnt; i++) pmc_thread_descriptor_pool_free(tdlist[i]); free(tdlist, M_TEMP); goto restart; } tdcnt++; } PROC_UNLOCK(p); free(tdlist, M_TEMP); } /* * find the descriptor corresponding to process 'p', adding or removing it * as specified by 'mode'. */ static struct pmc_process * pmc_find_process_descriptor(struct proc *p, uint32_t mode) { uint32_t hindex; struct pmc_process *pp, *ppnew; struct pmc_processhash *pph; hindex = PMC_HASH_PTR(p, pmc_processhashmask); pph = &pmc_processhash[hindex]; ppnew = NULL; /* * Pre-allocate memory in the PMC_FLAG_ALLOCATE case since we * cannot call malloc(9) once we hold a spin lock. */ if (mode & PMC_FLAG_ALLOCATE) ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO); mtx_lock_spin(&pmc_processhash_mtx); LIST_FOREACH(pp, pph, pp_next) if (pp->pp_proc == p) break; if ((mode & PMC_FLAG_REMOVE) && pp != NULL) LIST_REMOVE(pp, pp_next); if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && ppnew != NULL) { ppnew->pp_proc = p; LIST_INIT(&ppnew->pp_tds); ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew); LIST_INSERT_HEAD(pph, ppnew, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); pp = ppnew; ppnew = NULL; /* Add thread descriptors for this process' current threads. */ pmc_add_thread_descriptors_from_proc(p, pp); } else mtx_unlock_spin(&pmc_processhash_mtx); if (ppnew != NULL) free(ppnew, M_PMC); return pp; } /* * remove a process descriptor from the process hash table. */ static void pmc_remove_process_descriptor(struct pmc_process *pp) { KASSERT(pp->pp_refcnt == 0, ("[pmc,%d] Removing process descriptor %p with count %d", __LINE__, pp, pp->pp_refcnt)); mtx_lock_spin(&pmc_processhash_mtx); LIST_REMOVE(pp, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); } /* * destroy a process descriptor. */ static void pmc_destroy_process_descriptor(struct pmc_process *pp) { struct pmc_thread *pmc_td; while ((pmc_td = LIST_FIRST(&pp->pp_tds)) != NULL) { LIST_REMOVE(pmc_td, pt_next); pmc_thread_descriptor_pool_free(pmc_td); } free(pp, M_PMC); } /* * find an owner descriptor corresponding to proc 'p' */ static struct pmc_owner * pmc_find_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; po = NULL; LIST_FOREACH(po, poh, po_next) if (po->po_owner == p) break; PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); return po; } /* * pmc_allocate_pmc_descriptor * * Allocate a pmc descriptor and initialize its * fields. */ static struct pmc * pmc_allocate_pmc_descriptor(void) { struct pmc *pmc; pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO); pmc->pm_runcount = counter_u64_alloc(M_WAITOK); pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO); PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); return pmc; } /* * Destroy a pmc descriptor. */ static void pmc_destroy_pmc_descriptor(struct pmc *pm) { KASSERT(pm->pm_state == PMC_STATE_DELETED || pm->pm_state == PMC_STATE_FREE, ("[pmc,%d] destroying non-deleted PMC", __LINE__)); KASSERT(LIST_EMPTY(&pm->pm_targets), ("[pmc,%d] destroying pmc with targets", __LINE__)); KASSERT(pm->pm_owner == NULL, ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); KASSERT(counter_u64_fetch(pm->pm_runcount) == 0, ("[pmc,%d] pmc has non-zero run count %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); counter_u64_free(pm->pm_runcount); free(pm->pm_pcpu_state, M_PMC); free(pm, M_PMC); } static void pmc_wait_for_pmc_idle(struct pmc *pm) { #ifdef HWPMC_DEBUG volatile int maxloop; maxloop = 100 * pmc_cpu_max(); #endif /* * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ pmclog_flush(pm->pm_owner, 1); while (counter_u64_fetch(pm->pm_runcount) > 0) { pmclog_flush(pm->pm_owner, 1); #ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%ld) waiting too long for " "pmc to be free", __LINE__, PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount))); #endif pmc_force_context_switch(); } } /* * This function does the following things: * * - detaches the PMC from hardware * - unlinks all target threads that were attached to it * - removes the PMC from its owner's list * - destroys the PMC private mutex * * Once this function completes, the given pmc pointer can be freed by * calling pmc_destroy_pmc_descriptor(). */ static void pmc_release_pmc_descriptor(struct pmc *pm) { enum pmc_mode mode; struct pmc_hw *phw; u_int adjri, ri, cpu; struct pmc_owner *po; struct pmc_binding pb; struct pmc_process *pp; struct pmc_classdep *pcd; struct pmc_target *ptgt, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mode = PMC_TO_MODE(pm); PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, mode); /* * First, we take the PMC off hardware. */ cpu = 0; if (PMC_IS_SYSTEM_MODE(mode)) { /* * A system mode PMC runs on a specific CPU. Switch * to this CPU and turn hardware off. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); pmc_select_cpu(cpu); /* switch off non-stalled CPUs */ pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_state == PMC_STATE_RUNNING && pm->pm_pcpu_state[cpu].pps_stalled == 0) { phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; KASSERT(phw->phw_pmc == pm, ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", __LINE__, ri, phw->phw_pmc, pm)); PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_stop_pmc(cpu, adjri); critical_exit(); } PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_config_pmc(cpu, adjri, NULL); critical_exit(); /* adjust the global and process count of SS mode PMCs */ if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { po = pm->pm_owner; po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); CK_LIST_REMOVE(po, po_ssnext); epoch_wait_preempt(global_epoch_preempt); } } pm->pm_state = PMC_STATE_DELETED; pmc_restore_cpu_binding(&pb); /* * We could have references to this PMC structure in * the per-cpu sample queues. Wait for the queue to * drain. */ pmc_wait_for_pmc_idle(pm); } else if (PMC_IS_VIRTUAL_MODE(mode)) { /* * A virtual PMC could be running on multiple CPUs at * a given instant. * * By marking its state as DELETED, we ensure that * this PMC is never further scheduled on hardware. * * Then we wait till all CPUs are done with this PMC. */ pm->pm_state = PMC_STATE_DELETED; /* Wait for the PMCs runcount to come to zero. */ pmc_wait_for_pmc_idle(pm); /* * At this point the PMC is off all CPUs and cannot be * freshly scheduled onto a CPU. It is now safe to * unlink all targets from this PMC. If a * process-record's refcount falls to zero, we remove * it from the hash table. The module-wide SX lock * protects us from races. */ LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { pp = ptgt->pt_process; pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); /* * If the target process record shows that no * PMCs are attached to it, reclaim its space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); pmc_destroy_process_descriptor(pp); } } cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ } /* * Release any MD resources */ (void) pcd->pcd_release_pmc(cpu, adjri, pm); /* * Update row disposition */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) PMC_UNMARK_ROW_STANDALONE(ri); else PMC_UNMARK_ROW_THREAD(ri); /* unlink from the owner's list */ if (pm->pm_owner) { LIST_REMOVE(pm, pm_next); pm->pm_owner = NULL; } } /* * Register an owner and a pmc. */ static int pmc_register_owner(struct proc *p, struct pmc *pmc) { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) return ENOMEM; KASSERT(pmc->pm_owner == NULL, ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); pmc->pm_owner = po; LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcallocate(pmc); PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", po, pmc); return 0; } /* * Return the current row disposition: * == 0 => FREE * > 0 => PROCESS MODE * < 0 => SYSTEM MODE */ int pmc_getrowdisp(int ri) { return pmc_pmcdisp[ri]; } /* * Check if a PMC at row index 'ri' can be allocated to the current * process. * * Allocation can fail if: * - the current process is already being profiled by a PMC at index 'ri', * attached to it via OP_PMCATTACH. * - the current process has already allocated a PMC at index 'ri' * via OP_ALLOCATE. */ static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) { enum pmc_mode mode; struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); /* * We shouldn't have already allocated a process-mode PMC at * row index 'ri'. * * We shouldn't have allocated a system-wide PMC on the same * CPU and same RI. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) LIST_FOREACH(pm, &po->po_pmcs, pm_next) { if (PMC_TO_ROWINDEX(pm) == ri) { mode = PMC_TO_MODE(pm); if (PMC_IS_VIRTUAL_MODE(mode)) return EEXIST; if (PMC_IS_SYSTEM_MODE(mode) && (int) PMC_TO_CPU(pm) == cpu) return EEXIST; } } /* * We also shouldn't be the target of any PMC at this index * since otherwise a PMC_ATTACH to ourselves will fail. */ if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) if (pp->pp_pmcs[ri].pp_pmc) return EEXIST; PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", p, p->p_pid, p->p_comm, ri); return 0; } /* * Check if a given PMC at row index 'ri' can be currently used in * mode 'mode'. */ static int pmc_can_allocate_row(int ri, enum pmc_mode mode) { enum pmc_disp disp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); if (PMC_IS_SYSTEM_MODE(mode)) disp = PMC_DISP_STANDALONE; else disp = PMC_DISP_THREAD; /* * check disposition for PMC row 'ri': * * Expected disposition Row-disposition Result * * STANDALONE STANDALONE or FREE proceed * STANDALONE THREAD fail * THREAD THREAD or FREE proceed * THREAD STANDALONE fail */ if (!PMC_ROW_DISP_IS_FREE(ri) && !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) return EBUSY; /* * All OK */ PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); return 0; } /* * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. */ static struct pmc * pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) { struct pmc *pm; KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_id == pmcid) return pm; return NULL; } static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) { struct pmc *pm, *opm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid); if (PMC_ID_TO_ROWINDEX(pmcid) >= md->pmd_npmc) return (EINVAL); if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) { /* * In case of PMC_F_DESCENDANTS child processes we will not find * the current process in the owners hash list. Find the owner * process first and from there lookup the po. */ if ((pp = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) { return ESRCH; } else { opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc; if (opm == NULL) return ESRCH; if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER| PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER| PMC_F_DESCENDANTS)) return ESRCH; po = opm->pm_owner; } } if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) return EINVAL; PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); *pmc = pm; return 0; } /* * Start a PMC. */ static int pmc_start(struct pmc *pm) { enum pmc_mode mode; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, error, cpu, ri; KASSERT(pm != NULL, ("[pmc,%d] null pm", __LINE__)); mode = PMC_TO_MODE(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); error = 0; PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); po = pm->pm_owner; /* * Disallow PMCSTART if a logfile is required but has not been * configured yet. */ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EDOOFUS); /* programming error */ /* * If this is a sampling mode PMC, log mapping information for * the kernel modules that are currently loaded. */ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_kernel_mappings(pm); if (PMC_IS_VIRTUAL_MODE(mode)) { /* * If a PMCATTACH has never been done on this PMC, * attach it to its owner process. */ if (LIST_EMPTY(&pm->pm_targets)) error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : pmc_attach_process(po->po_owner, pm); /* * If the PMC is attached to its owner, then force a context * switch to ensure that the MD state gets set correctly. */ if (error == 0) { pm->pm_state = PMC_STATE_RUNNING; if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) pmc_force_context_switch(); } return (error); } /* * A system-wide PMC. * * Add the owner to the global list if this is a system-wide * sampling PMC. */ if (mode == PMC_MODE_SS) { /* * Log mapping information for all existing processes in the * system. Subsequent mappings are logged as they happen; * see pmc_process_mmap(). */ if (po->po_logprocmaps == 0) { pmc_log_all_process_mappings(po); po->po_logprocmaps = 1; } po->po_sscount++; if (po->po_sscount == 1) { atomic_add_rel_int(&pmc_ss_count, 1); CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); PMCDBG1(PMC,OPS,1, "po=%p in global list", po); } } /* * Move to the CPU associated with this * PMC, and start the hardware. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); if (!pmc_cpu_is_active(cpu)) return (ENXIO); pmc_select_cpu(cpu); /* * global PMCs are configured at allocation time * so write out the initial value and start the PMC. */ pm->pm_state = PMC_STATE_RUNNING; critical_enter(); if ((error = pcd->pcd_write_pmc(cpu, adjri, PMC_IS_SAMPLING_MODE(mode) ? pm->pm_sc.pm_reloadcount : pm->pm_sc.pm_initial)) == 0) { /* If a sampling mode PMC, reset stalled state. */ if (PMC_IS_SAMPLING_MODE(mode)) pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. Start it. */ pm->pm_pcpu_state[cpu].pps_cpustate = 1; error = pcd->pcd_start_pmc(cpu, adjri); } critical_exit(); pmc_restore_cpu_binding(&pb); return (error); } /* * Stop a PMC. */ static int pmc_stop(struct pmc *pm) { struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, cpu, error, ri; KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); pm->pm_state = PMC_STATE_STOPPED; /* * If the PMC is a virtual mode one, changing the state to * non-RUNNING is enough to ensure that the PMC never gets * scheduled. * * If this PMC is current running on a CPU, then it will * handled correctly at the time its target process is context * switched out. */ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) return 0; /* * A system-mode PMC. Move to the CPU associated with * this PMC, and stop the hardware. We update the * 'initial count' so that a subsequent PMCSTART will * resume counting from the current hardware count. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); if (!pmc_cpu_is_active(cpu)) return ENXIO; pmc_select_cpu(cpu); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); pm->pm_pcpu_state[cpu].pps_cpustate = 0; critical_enter(); if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0) error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial); critical_exit(); pmc_restore_cpu_binding(&pb); po = pm->pm_owner; /* remove this owner from the global list of SS PMC owners */ if (PMC_TO_MODE(pm) == PMC_MODE_SS) { po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); CK_LIST_REMOVE(po, po_ssnext); epoch_wait_preempt(global_epoch_preempt); PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po); } } return (error); } static struct pmc_classdep * pmc_class_to_classdep(enum pmc_class class) { int n; for (n = 0; n < md->pmd_nclass; n++) if (md->pmd_classdep[n].pcd_class == class) return (&md->pmd_classdep[n]); return (NULL); } #if defined(HWPMC_DEBUG) && defined(KTR) static const char *pmc_op_to_name[] = { #undef __PMC_OP #define __PMC_OP(N, D) #N , __PMC_OPS() NULL }; #endif /* * The syscall interface */ #define PMC_GET_SX_XLOCK(...) do { \ sx_xlock(&pmc_sx); \ if (pmc_hook == NULL) { \ sx_xunlock(&pmc_sx); \ return __VA_ARGS__; \ } \ } while (0) #define PMC_DOWNGRADE_SX() do { \ sx_downgrade(&pmc_sx); \ is_sx_downgraded = 1; \ } while (0) static int pmc_syscall_handler(struct thread *td, void *syscall_args) { int error, is_sx_downgraded, op; struct pmc_syscall_args *c; void *pmclog_proc_handle; void *arg; c = (struct pmc_syscall_args *)syscall_args; op = c->pmop_code; arg = c->pmop_data; /* PMC isn't set up yet */ if (pmc_hook == NULL) return (EINVAL); if (op == PMC_OP_CONFIGURELOG) { /* * We cannot create the logging process inside * pmclog_configure_log() because there is a LOR * between pmc_sx and process structure locks. * Instead, pre-create the process and ignite the loop * if everything is fine, otherwise direct the process * to exit. */ error = pmclog_proc_create(td, &pmclog_proc_handle); if (error != 0) goto done_syscall; } PMC_GET_SX_XLOCK(ENOSYS); is_sx_downgraded = 0; PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, pmc_op_to_name[op], arg); error = 0; counter_u64_add(pmc_stats.pm_syscalls, 1); switch (op) { /* * Configure a log file. * * XXX This OP will be reworked. */ case PMC_OP_CONFIGURELOG: { struct proc *p; struct pmc *pm; struct pmc_owner *po; struct pmc_op_configurelog cl; if ((error = copyin(arg, &cl, sizeof(cl))) != 0) { pmclog_proc_ignite(pmclog_proc_handle, NULL); break; } /* mark this process as owning a log file */ p = td->td_proc; if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = ENOMEM; break; } /* * If a valid fd was passed in, try to configure that, * otherwise if 'fd' was less than zero and there was * a log file configured, flush its buffers and * de-configure it. */ if (cl.pm_logfd >= 0) { error = pmclog_configure_log(md, po, cl.pm_logfd); pmclog_proc_ignite(pmclog_proc_handle, error == 0 ? po : NULL); } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = pmclog_close(po); if (error == 0) { LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && pm->pm_state == PMC_STATE_RUNNING) pmc_stop(pm); error = pmclog_deconfigure_log(po); } } else { pmclog_proc_ignite(pmclog_proc_handle, NULL); error = EINVAL; } } break; /* * Flush a log file. */ case PMC_OP_FLUSHLOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_flush(po, 0); } break; /* * Close a log file. */ case PMC_OP_CLOSELOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_close(po); } break; /* * Retrieve hardware configuration. */ case PMC_OP_GETCPUINFO: /* CPU information */ { struct pmc_op_getcpuinfo gci; struct pmc_classinfo *pci; struct pmc_classdep *pcd; int cl; gci.pm_cputype = md->pmd_cputype; gci.pm_ncpu = pmc_cpu_max(); gci.pm_npmc = md->pmd_npmc; gci.pm_nclass = md->pmd_nclass; pci = gci.pm_classes; pcd = md->pmd_classdep; for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) { pci->pm_caps = pcd->pcd_caps; pci->pm_class = pcd->pcd_class; pci->pm_width = pcd->pcd_width; pci->pm_num = pcd->pcd_num; } error = copyout(&gci, arg, sizeof(gci)); } break; /* * Retrieve soft events list. */ case PMC_OP_GETDYNEVENTINFO: { enum pmc_class cl; enum pmc_event ev; struct pmc_op_getdyneventinfo *gei; struct pmc_dyn_event_descr dev; struct pmc_soft *ps; uint32_t nevent; sx_assert(&pmc_sx, SX_LOCKED); gei = (struct pmc_op_getdyneventinfo *) arg; if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0) break; /* Only SOFT class is dynamic. */ if (cl != PMC_CLASS_SOFT) { error = EINVAL; break; } nevent = 0; for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) { ps = pmc_soft_ev_acquire(ev); if (ps == NULL) continue; bcopy(&ps->ps_ev, &dev, sizeof(dev)); pmc_soft_ev_release(ps); error = copyout(&dev, &gei->pm_events[nevent], sizeof(struct pmc_dyn_event_descr)); if (error != 0) break; nevent++; } if (error != 0) break; error = copyout(&nevent, &gei->pm_nevent, sizeof(nevent)); } break; /* * Get module statistics */ case PMC_OP_GETDRIVERSTATS: { struct pmc_op_getdriverstats gms; #define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field) CFETCH(gms, pmc_stats, pm_intr_ignored); CFETCH(gms, pmc_stats, pm_intr_processed); CFETCH(gms, pmc_stats, pm_intr_bufferfull); CFETCH(gms, pmc_stats, pm_syscalls); CFETCH(gms, pmc_stats, pm_syscall_errors); CFETCH(gms, pmc_stats, pm_buffer_requests); CFETCH(gms, pmc_stats, pm_buffer_requests_failed); CFETCH(gms, pmc_stats, pm_log_sweeps); #undef CFETCH error = copyout(&gms, arg, sizeof(gms)); } break; /* * Retrieve module version number */ case PMC_OP_GETMODULEVERSION: { uint32_t cv, modv; /* retrieve the client's idea of the ABI version */ if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) break; /* don't service clients newer than our driver */ modv = PMC_VERSION; if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { error = EPROGMISMATCH; break; } error = copyout(&modv, arg, sizeof(int)); } break; /* * Retrieve the state of all the PMCs on a given * CPU. */ case PMC_OP_GETPMCINFO: { int ari; struct pmc *pm; size_t pmcinfo_size; uint32_t cpu, n, npmc; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_info *p, *pmcinfo; struct pmc_op_getpmcinfo *gpi; PMC_DOWNGRADE_SX(); gpi = (struct pmc_op_getpmcinfo *) arg; if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) break; if (cpu >= pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* switch to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); npmc = md->pmd_npmc; pmcinfo_size = npmc * sizeof(struct pmc_info); pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK); p = pmcinfo; for (n = 0; n < md->pmd_npmc; n++, p++) { pcd = pmc_ri_to_classdep(md, n, &ari); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0) break; if (PMC_ROW_DISP_IS_STANDALONE(n)) p->pm_rowdisp = PMC_DISP_STANDALONE; else if (PMC_ROW_DISP_IS_THREAD(n)) p->pm_rowdisp = PMC_DISP_THREAD; else p->pm_rowdisp = PMC_DISP_FREE; p->pm_ownerpid = -1; if (pm == NULL) /* no PMC associated */ continue; po = pm->pm_owner; KASSERT(po->po_owner != NULL, ("[pmc,%d] pmc_owner had a null proc pointer", __LINE__)); p->pm_ownerpid = po->po_owner->p_pid; p->pm_mode = PMC_TO_MODE(pm); p->pm_event = pm->pm_event; p->pm_flags = pm->pm_flags; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) p->pm_reloadcount = pm->pm_sc.pm_reloadcount; } pmc_restore_cpu_binding(&pb); /* now copy out the PMC info collected */ if (error == 0) error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); free(pmcinfo, M_PMC); } break; /* * Set the administrative state of a PMC. I.e. whether * the PMC is to be used or not. */ case PMC_OP_PMCADMIN: { int cpu, ri; enum pmc_state request; struct pmc_cpu *pc; struct pmc_hw *phw; struct pmc_op_pmcadmin pma; struct pmc_binding pb; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); error = priv_check(td, PRIV_PMC_MANAGE); if (error) break; if ((error = copyin(arg, &pma, sizeof(pma))) != 0) break; cpu = pma.pm_cpu; if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } request = pma.pm_state; if (request != PMC_STATE_DISABLED && request != PMC_STATE_FREE) { error = EINVAL; break; } ri = pma.pm_pmc; /* pmc id == row index */ if (ri < 0 || ri >= (int) md->pmd_npmc) { error = EINVAL; break; } /* * We can't disable a PMC with a row-index allocated * for process virtual PMCs. */ if (PMC_ROW_DISP_IS_THREAD(ri) && request == PMC_STATE_DISABLED) { error = EBUSY; break; } /* * otherwise, this PMC on this CPU is either free or * in system-wide mode. */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); pc = pmc_pcpu[cpu]; phw = pc->pc_hwpmcs[ri]; /* * XXX do we need some kind of 'forced' disable? */ if (phw->phw_pmc == NULL) { if (request == PMC_STATE_DISABLED && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; PMC_MARK_ROW_STANDALONE(ri); } else if (request == PMC_STATE_FREE && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; PMC_UNMARK_ROW_STANDALONE(ri); } /* other cases are a no-op */ } else error = EBUSY; pmc_restore_cpu_binding(&pb); } break; /* * Allocate a PMC. */ case PMC_OP_PMCALLOCATE: { int adjri, n; u_int cpu; uint32_t caps; struct pmc *pmc; enum pmc_mode mode; struct pmc_hw *phw; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_op_pmcallocate pa; if ((error = copyin(arg, &pa, sizeof(pa))) != 0) break; caps = pa.pm_caps; mode = pa.pm_mode; cpu = pa.pm_cpu; if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && mode != PMC_MODE_TS && mode != PMC_MODE_TC) || (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { error = EINVAL; break; } /* * Virtual PMCs should only ask for a default CPU. * System mode PMCs need to specify a non-default CPU. */ if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { error = EINVAL; break; } /* * Check that an inactive CPU is not being asked for. */ if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* * Refuse an allocation for a system-wide PMC if this * process has been jailed, or if this process lacks * super-user credentials and the sysctl tunable * 'security.bsd.unprivileged_syspmcs' is zero. */ if (PMC_IS_SYSTEM_MODE(mode)) { if (jailed(curthread->td_ucred)) { error = EPERM; break; } if (!pmc_unprivileged_syspmcs) { error = priv_check(curthread, PRIV_PMC_SYSTEM); if (error) break; } } /* * Look for valid values for 'pm_flags' */ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) != 0) { error = EINVAL; break; } /* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN */ if ((pa.pm_flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) == PMC_F_USERCALLCHAIN) { error = EINVAL; break; } /* PMC_F_USERCALLCHAIN is only valid for sampling mode */ if (pa.pm_flags & PMC_F_USERCALLCHAIN && mode != PMC_MODE_TS && mode != PMC_MODE_SS) { error = EINVAL; break; } /* process logging options are not allowed for system PMCs */ if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { error = EINVAL; break; } /* * All sampling mode PMCs need to be able to interrupt the * CPU. */ if (PMC_IS_SAMPLING_MODE(mode)) caps |= PMC_CAP_INTERRUPT; /* A valid class specifier should have been passed in. */ pcd = pmc_class_to_classdep(pa.pm_class); if (pcd == NULL) { error = EINVAL; break; } /* The requested PMC capabilities should be feasible. */ if ((pcd->pcd_caps & caps) != caps) { error = EOPNOTSUPP; break; } PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", pa.pm_ev, caps, mode, cpu); pmc = pmc_allocate_pmc_descriptor(); pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, PMC_ID_INVALID); pmc->pm_event = pa.pm_ev; pmc->pm_state = PMC_STATE_FREE; pmc->pm_caps = caps; pmc->pm_flags = pa.pm_flags; /* XXX set lower bound on sampling for process counters */ if (PMC_IS_SAMPLING_MODE(mode)) pmc->pm_sc.pm_reloadcount = pa.pm_count; else pmc->pm_sc.pm_initial = pa.pm_count; /* switch thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); #define PMC_IS_SHAREABLE_PMC(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ PMC_PHW_FLAG_IS_SHAREABLE) #define PMC_IS_UNALLOCATED(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) if (PMC_IS_SYSTEM_MODE(mode)) { pmc_select_cpu(cpu); for (n = pcd->pcd_ri; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, cpu) == 0 && (PMC_IS_UNALLOCATED(cpu, n) || PMC_IS_SHAREABLE_PMC(cpu, n)) && pcd->pcd_allocate_pmc(cpu, adjri, pmc, &pa) == 0) break; } } else { /* Process virtual mode */ for (n = pcd->pcd_ri; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, PMC_CPU_ANY) == 0 && pcd->pcd_allocate_pmc(curthread->td_oncpu, adjri, pmc, &pa) == 0) break; } } #undef PMC_IS_UNALLOCATED #undef PMC_IS_SHAREABLE_PMC pmc_restore_cpu_binding(&pb); if (n == (int) md->pmd_npmc) { pmc_destroy_pmc_descriptor(pmc); pmc = NULL; error = EINVAL; break; } /* Fill in the correct value in the ID field */ pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); /* Process mode PMCs with logging enabled need log files */ if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* All system mode sampling PMCs require a log file */ if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* * Configure global pmc's immediately */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; pcd = pmc_ri_to_classdep(md, n, &adjri); if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) { (void) pcd->pcd_release_pmc(cpu, adjri, pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; pmc_restore_cpu_binding(&pb); error = EPERM; break; } pmc_restore_cpu_binding(&pb); } pmc->pm_state = PMC_STATE_ALLOCATED; pmc->pm_class = pa.pm_class; /* * mark row disposition */ if (PMC_IS_SYSTEM_MODE(mode)) PMC_MARK_ROW_STANDALONE(n); else PMC_MARK_ROW_THREAD(n); /* * Register this PMC with the current thread as its owner. */ if ((error = pmc_register_owner(curthread->td_proc, pmc)) != 0) { pmc_release_pmc_descriptor(pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; break; } /* * Return the allocated index. */ pa.pm_pmcid = pmc->pm_id; error = copyout(&pa, arg, sizeof(pa)); } break; /* * Attach a PMC to a process. */ case PMC_OP_PMCATTACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { error = EINVAL; break; } /* PMCs may be (re)attached only when allocated or stopped */ if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } else if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED) { error = EINVAL; break; } /* lookup pid */ if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Ignore processes that are working on exiting. */ if (p->p_flag & P_WEXIT) { error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ break; } /* * we are allowed to attach a PMC to a process if * we can debug it. */ error = p_candebug(curthread, p); PROC_UNLOCK(p); if (error == 0) error = pmc_attach_process(p, pm); } break; /* * Detach an attached PMC from a process. */ case PMC_OP_PMCDETACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Treat processes that are in the process of exiting * as if they were not present. */ if (p->p_flag & P_WEXIT) error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ if (error == 0) error = pmc_detach_process(p, pm); } break; /* * Retrieve the MSR number associated with the counter * 'pmc_id'. This allows processes to directly use RDPMC * instructions to read their PMCs, without the overhead of a * system call. */ case PMC_OP_PMCGETMSR: { int adjri, ri; struct pmc *pm; struct pmc_target *pt; struct pmc_op_getmsr gm; struct pmc_classdep *pcd; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &gm, sizeof(gm))) != 0) break; if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) break; /* * The allocated PMC has to be a process virtual PMC, * i.e., of type MODE_T[CS]. Global PMCs can only be * read using the PMCREAD operation since they may be * allocated on a different CPU than the one we could * be running on at the time of the RDPMC instruction. * * The GETMSR operation is not allowed for PMCs that * are inherited across processes. */ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || (pm->pm_flags & PMC_F_DESCENDANTS)) { error = EINVAL; break; } /* * It only makes sense to use a RDPMC (or its * equivalent instruction on non-x86 architectures) on * a process that has allocated and attached a PMC to * itself. Conversely the PMC is only allowed to have * one process attached to it -- its owner. */ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || LIST_NEXT(pt, pt_next) != NULL || pt->pt_process->pp_proc != pm->pm_owner->po_owner) { error = EINVAL; break; } ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); /* PMC class has no 'GETMSR' support */ if (pcd->pcd_get_msr == NULL) { error = ENOSYS; break; } if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0) break; if ((error = copyout(&gm, arg, sizeof(gm))) < 0) break; /* * Mark our process as using MSRs. Update machine * state using a forced context switch. */ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; pmc_force_context_switch(); } break; /* * Release an allocated PMC */ case PMC_OP_PMCRELEASE: { pmc_id_t pmcid; struct pmc *pm; struct pmc_owner *po; struct pmc_op_simple sp; /* * Find PMC pointer for the named PMC. * * Use pmc_release_pmc_descriptor() to switch off the * PMC, remove all its target threads, and remove the * PMC from its owner's list. * * Remove the owner record if this is the last PMC * owned. * * Free up space. */ if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; po = pm->pm_owner; pmc_release_pmc_descriptor(pm); pmc_maybe_remove_owner(po); pmc_destroy_pmc_descriptor(pm); } break; /* * Read and/or write a PMC. */ case PMC_OP_PMCRW: { int adjri; struct pmc *pm; uint32_t cpu, ri; pmc_value_t oldvalue; struct pmc_binding pb; struct pmc_op_pmcrw prw; struct pmc_classdep *pcd; struct pmc_op_pmcrw *pprw; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &prw, sizeof(prw))) != 0) break; ri = 0; PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, prw.pm_flags); /* must have at least one flag set */ if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { error = EINVAL; break; } /* locate pmc descriptor */ if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) break; /* Can't read a PMC that hasn't been started. */ if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } /* writing a new value is allowed only for 'STOPPED' pmcs */ if (pm->pm_state == PMC_STATE_RUNNING && (prw.pm_flags & PMC_F_NEWVALUE)) { error = EBUSY; break; } if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { /* * If this PMC is attached to its owner (i.e., * the process requesting this operation) and * is running, then attempt to get an * upto-date reading from hardware for a READ. * Writes are only allowed when the PMC is * stopped, so only update the saved value * field. * * If the PMC is not running, or is not * attached to its owner, read/write to the * savedvalue field. */ ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mtx_pool_lock_spin(pmc_mtxpool, pm); cpu = curthread->td_oncpu; if (prw.pm_flags & PMC_F_OLDVALUE) { if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && (pm->pm_state == PMC_STATE_RUNNING)) error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue); else oldvalue = pm->pm_gv.pm_savedvalue; } if (prw.pm_flags & PMC_F_NEWVALUE) pm->pm_gv.pm_savedvalue = prw.pm_value; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* System mode PMCs */ cpu = PMC_TO_CPU(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* move this thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); critical_enter(); /* save old value */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue))) goto error; /* write out new value */ if (prw.pm_flags & PMC_F_NEWVALUE) error = (*pcd->pcd_write_pmc)(cpu, adjri, prw.pm_value); error: critical_exit(); pmc_restore_cpu_binding(&pb); if (error) break; } pprw = (struct pmc_op_pmcrw *) arg; #ifdef HWPMC_DEBUG if (prw.pm_flags & PMC_F_NEWVALUE) PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx", ri, prw.pm_value, oldvalue); else if (prw.pm_flags & PMC_F_OLDVALUE) PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); #endif /* return old value if requested */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = copyout(&oldvalue, &pprw->pm_value, sizeof(prw.pm_value)))) break; } break; /* * Set the sampling rate for a sampling mode PMC and the * initial count for a counting mode PMC. */ case PMC_OP_PMCSETCOUNT: { struct pmc *pm; struct pmc_op_pmcsetcount sc; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sc, sizeof(sc))) != 0) break; if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) break; if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pm->pm_sc.pm_reloadcount = sc.pm_count; else pm->pm_sc.pm_initial = sc.pm_count; } break; /* * Start a PMC. */ case PMC_OP_PMCSTART: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmcid %x != id %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ break; else if (pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_ALLOCATED) { error = EINVAL; break; } error = pmc_start(pm); } break; /* * Stop a PMC. */ case PMC_OP_PMCSTOP: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; /* * Mark the PMC as inactive and invoke the MD stop * routines if needed. */ if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ break; else if (pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } error = pmc_stop(pm); } break; /* * Write a user supplied value to the log file. */ case PMC_OP_WRITELOG: { struct pmc_op_writelog wl; struct pmc_owner *po; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &wl, sizeof(wl))) != 0) break; if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; break; } error = pmclog_process_userlog(po, &wl); } break; default: error = EINVAL; break; } if (is_sx_downgraded) sx_sunlock(&pmc_sx); else sx_xunlock(&pmc_sx); done_syscall: if (error) counter_u64_add(pmc_stats.pm_syscall_errors, 1); return (error); } /* * Helper functions */ /* * Mark the thread as needing callchain capture and post an AST. The * actual callchain capture will be done in a context where it is safe * to take page faults. */ static void pmc_post_callchain_callback(void) { struct thread *td; td = curthread; /* * If there is multiple PMCs for the same interrupt ignore new post */ if (td->td_pflags & TDP_CALLCHAIN) return; /* * Mark this thread as needing callchain capture. * `td->td_pflags' will be safe to touch because this thread * was in user space when it was interrupted. */ td->td_pflags |= TDP_CALLCHAIN; /* * Don't let this thread migrate between CPUs until callchain * capture completes. */ sched_pin(); return; } /* * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook * needs to be invoked from the clock handler. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ static int pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf) { int error, cpu, callchaindepth, inuserspace; struct thread *td; struct pmc_sample *ps; struct pmc_samplebuffer *psb; error = 0; /* * Allocate space for a sample buffer. */ cpu = curcpu; psb = pmc_pcpu[cpu]->pc_sb[ring]; inuserspace = TRAPF_USERMODE(tf); ps = psb->ps_write; if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { counter_u64_add(ps->ps_pmc->pm_runcount, -1); counter_u64_add(pmc_stats.pm_overwrites, 1); ps->ps_nsamples = 0; } else if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_pcpu_state[cpu].pps_stalled = 1; counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); callchaindepth = 1; error = ENOMEM; goto done; } /* Fill in entry. */ PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, (unsigned long)counter_u64_fetch(pm->pm_runcount))); counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ td = curthread; ps->ps_pmc = pm; ps->ps_td = td; ps->ps_pid = td->td_proc->p_pid; ps->ps_tid = td->td_tid; ps->ps_tsc = pmc_rdtsc(); ps->ps_cpu = cpu; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; if (callchaindepth == 1) ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); else { /* * Kernel stack traversals can be done immediately, * while we defer to an AST for user space traversals. */ if (!inuserspace) { callchaindepth = pmc_save_kernel_callchain(ps->ps_pc, callchaindepth, tf); } else { pmc_post_callchain_callback(); callchaindepth = PMC_SAMPLE_INUSE; } } ps->ps_nsamples = callchaindepth; /* mark entry as in use */ if (ring == PMC_UR) { ps->ps_nsamples_actual = callchaindepth; /* mark entry as in use */ ps->ps_nsamples = PMC_SAMPLE_INUSE; } else ps->ps_nsamples = callchaindepth; /* mark entry as in use */ /* increment write pointer, modulo ring buffer size */ ps++; if (ps == psb->ps_fence) psb->ps_write = psb->ps_samples; else psb->ps_write = ps; done: /* mark CPU as needing processing */ if (callchaindepth != PMC_SAMPLE_INUSE) DPCPU_SET(pmc_sampled, 1); return (error); } /* * Interrupt processing. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ int pmc_process_interrupt(int ring, struct pmc *pm, struct trapframe *tf) { struct thread *td; td = curthread; if ((pm->pm_flags & PMC_F_USERCALLCHAIN) && - (td->td_proc->p_flag & P_KPROC) == 0 && - !TRAPF_USERMODE(tf)) { - atomic_add_int(&curthread->td_pmcpend, 1); + (td->td_proc->p_flag & P_KPROC) == 0 && + !TRAPF_USERMODE(tf)) { + atomic_add_int(&td->td_pmcpend, 1); return (pmc_add_sample(PMC_UR, pm, tf)); } return (pmc_add_sample(ring, pm, tf)); } /* * Capture a user call chain. This function will be called from ast() * before control returns to userland and before the process gets * rescheduled. */ static void pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) { struct pmc *pm; struct thread *td; struct pmc_sample *ps, *ps_end; struct pmc_samplebuffer *psb; int nsamples, nrecords, pass; #ifdef INVARIANTS int ncallchains; int nfree; #endif psb = pmc_pcpu[cpu]->pc_sb[ring]; td = curthread; KASSERT(td->td_pflags & TDP_CALLCHAIN, ("[pmc,%d] Retrieving callchain for thread that doesn't want it", __LINE__)); #ifdef INVARIANTS ncallchains = 0; nfree = 0; #endif nrecords = INT_MAX; pass = 0; restart: if (ring == PMC_UR) nrecords = atomic_readandclear_32(&td->td_pmcpend); /* * Iterate through all deferred callchain requests. * Walk from the current read pointer to the current * write pointer. */ ps = psb->ps_read; ps_end = psb->ps_write; do { #ifdef INVARIANTS if (ps->ps_nsamples == PMC_SAMPLE_FREE) { nfree++; goto next; } if ((ps->ps_pmc == NULL) || (ps->ps_pmc->pm_state != PMC_STATE_RUNNING)) nfree++; #endif if (ps->ps_nsamples != PMC_SAMPLE_INUSE) goto next; if (ps->ps_td != td) goto next; KASSERT(ps->ps_cpu == cpu, ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, ps->ps_cpu, PCPU_GET(cpuid))); pm = ps->ps_pmc; KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, ("[pmc,%d] Retrieving callchain for PMC that doesn't " "want it", __LINE__)); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); if (ring == PMC_UR) { nsamples = ps->ps_nsamples_actual; counter_u64_add(pmc_stats.pm_merges, 1); } else nsamples = 0; /* * Retrieve the callchain and mark the sample buffer * as 'processable' by the timer tick sweep code. */ #ifdef INVARIANTS ncallchains++; #endif if (__predict_true(nsamples < pmc_callchaindepth - 1)) nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples, pmc_callchaindepth - nsamples - 1, tf); wmb(); ps->ps_nsamples = nsamples; if (nrecords-- == 1) break; next: /* increment the pointer, modulo sample ring size */ if (++ps == psb->ps_fence) ps = psb->ps_samples; } while (ps != ps_end); if (__predict_false(ring == PMC_UR && td->td_pmcpend)) { if (pass == 0) { pass = 1; goto restart; } /* only collect samples for this part once */ td->td_pmcpend = 0; } #ifdef INVARIANTS if (ring == PMC_HR) KASSERT(ncallchains > 0 || nfree > 0, ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, cpu)); #endif /* mark CPU as needing processing */ DPCPU_SET(pmc_sampled, 1); } static void pmc_flush_ring(int cpu, int ring) { struct pmc *pm; struct pmc_sample *ps; struct pmc_samplebuffer *psb; int n; psb = pmc_pcpu[cpu]->pc_sb[ring]; for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; if (ps->ps_nsamples == PMC_SAMPLE_FREE) goto next; pm = ps->ps_pmc; counter_u64_add(pm->pm_runcount, -1); ps->ps_nsamples = PMC_SAMPLE_FREE; /* increment read pointer, modulo sample size */ next: if (++ps == psb->ps_fence) psb->ps_read = psb->ps_samples; else psb->ps_read = ps; } } void pmc_flush_samples(int cpu) { int n; for (n = 0; n < PMC_NUM_SR; n++) pmc_flush_ring(cpu, n); } /* * Process saved PC samples. */ static void pmc_process_samples(int cpu, int ring) { struct pmc *pm; int adjri, n; struct thread *td; struct pmc_owner *po; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *psb; KASSERT(PCPU_GET(cpuid) == cpu, ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, PCPU_GET(cpuid), cpu)); psb = pmc_pcpu[cpu]->pc_sb[ring]; for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; if (ps->ps_nsamples == PMC_SAMPLE_FREE) break; pm = ps->ps_pmc; KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, (unsigned long)counter_u64_fetch(pm->pm_runcount))); po = pm->pm_owner; KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, pm, PMC_TO_MODE(pm))); /* Ignore PMCs that have been switched off */ if (pm->pm_state != PMC_STATE_RUNNING) goto entrydone; /* If there is a pending AST wait for completion */ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ DPCPU_SET(pmc_sampled, 1); break; } PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, pm, ps->ps_nsamples, ps->ps_flags, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); /* * If this is a process-mode PMC that is attached to * its owner, and if the PC is in user mode, update * profiling statistics like timer-based profiling * would have done. * * Otherwise, this is either a sampling-mode PMC that * is attached to a different process than its owner, * or a system-wide sampling PMC. Dispatch a log * entry to the PMC's owner process. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); addupc_intr(td, ps->ps_pc[0], 1); } } else pmclog_process_callchain(pm, ps); entrydone: ps->ps_nsamples = 0; /* mark entry as free */ counter_u64_add(pm->pm_runcount, -1); /* increment read pointer, modulo sample size */ if (++ps == psb->ps_fence) psb->ps_read = psb->ps_samples; else psb->ps_read = ps; } counter_u64_add(pmc_stats.pm_log_sweeps, 1); /* Do not re-enable stalled PMCs if we failed to process any samples */ if (n == 0) return; /* * Restart any stalled sampling PMCs on this CPU. * * If the NMI handler sets the pm_stalled field of a PMC after * the check below, we'll end up processing the stalled PMC at * the next hardclock tick. */ for (n = 0; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); (void) (*pcd->pcd_get_config)(cpu,adjri,&pm); if (pm == NULL || /* !cfg'ed */ pm->pm_state != PMC_STATE_RUNNING || /* !active */ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ !pm->pm_pcpu_state[cpu].pps_cpustate || /* !desired */ !pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */ continue; pm->pm_pcpu_state[cpu].pps_stalled = 0; (*pcd->pcd_start_pmc)(cpu, adjri); } } /* * Event handlers. */ /* * Handle a process exit. * * Remove this process from all hash tables. If this process * owned any PMCs, turn off those PMCs and deallocate them, * removing any associations with target processes. * * This function will be called by the last 'thread' of a * process. * * XXX This eventhandler gets called early in the exit process. * Consider using a 'hook' invocation from thread_exit() or equivalent * spot. Another negative is that kse_exit doesn't seem to call * exit1() [??]. * */ static void pmc_process_exit(void *arg __unused, struct proc *p) { struct pmc *pm; int adjri, cpu; unsigned int ri; int is_using_hwpmcs; struct pmc_owner *po; struct pmc_process *pp; struct pmc_classdep *pcd; pmc_value_t newvalue, tmp; PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); /* * Log a sysexit event to all SS PMC owners. */ epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_sysexit(po, p->p_pid); epoch_exit_preempt(global_epoch_preempt); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); /* * Since this code is invoked by the last thread in an exiting * process, we would have context switched IN at some prior * point. However, with PREEMPTION, kernel mode context * switches may happen any time, so we want to disable a * context switch OUT till we get any PMCs targeting this * process off the hardware. * * We also need to atomically remove this process' * entry from our target process hash table, using * PMC_FLAG_REMOVE. */ PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); critical_enter(); /* no preemption */ cpu = curthread->td_oncpu; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_REMOVE)) != NULL) { PMCDBG2(PRC,EXT,2, "process-exit proc=%p pmc-process=%p", p, pp); /* * The exiting process could the target of * some PMCs which will be running on * currently executing CPU. * * We need to turn these PMCs off like we * would do at context switch OUT time. */ for (ri = 0; ri < md->pmd_npmc; ri++) { /* * Pick up the pmc pointer from hardware * state similar to the CSW_OUT code. */ pm = NULL; pcd = pmc_ri_to_classdep(md, ri, &adjri); (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm); if (pm == NULL || !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, pm, pm->pm_state); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] bad runcount ri %d rc %ld", __LINE__, ri, (unsigned long)counter_u64_fetch(pm->pm_runcount))); /* * Change desired state, and then stop if not * stalled. This two-step dance should avoid * race conditions where an interrupt re-enables * the PMC after this code has already checked * the pm_stalled flag. */ if (pm->pm_pcpu_state[cpu].pps_cpustate) { pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (!pm->pm_pcpu_state[cpu].pps_stalled) { (void) pcd->pcd_stop_pmc(cpu, adjri); if (PMC_TO_MODE(pm) == PMC_MODE_TC) { pcd->pcd_read_pmc(cpu, adjri, &newvalue); tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin( pmc_mtxpool, pm); } } } counter_u64_add(pm->pm_runcount, -1); KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); (void) pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * Inform the MD layer of this pseudo "context switch * out" */ (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); critical_exit(); /* ok to be pre-empted now */ /* * Unlink this process from the PMCs that are * targeting it. This will send a signal to * all PMC owner's whose PMCs are orphaned. * * Log PMC value at exit time if requested. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) pmclog_process_procexit(pm, pp); pmc_unlink_target_process(pm, pp); } free(pp, M_PMC); } else critical_exit(); /* pp == NULL */ /* * If the process owned PMCs, free them up and free up * memory. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } sx_xunlock(&pmc_sx); } /* * Handle a process fork. * * If the parent process 'p1' is under HWPMC monitoring, then copy * over any attached PMCs that have 'do_descendants' semantics. */ static void pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, int flags) { int is_using_hwpmcs; unsigned int ri; uint32_t do_descendants; struct pmc *pm; struct pmc_owner *po; struct pmc_process *ppnew, *ppold; (void) flags; /* unused parameter */ PROC_LOCK(p1); is_using_hwpmcs = p1->p_flag & P_HWPMC; PROC_UNLOCK(p1); /* * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); pmclog_process_proccreate(po, newproc, 1); } epoch_exit_preempt(global_epoch_preempt); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, p1->p_pid, p1->p_comm, newproc); /* * If the parent process (curthread->td_proc) is a * target of any PMCs, look for PMCs that are to be * inherited, and link these into the new process * descriptor. */ if ((ppold = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) goto done; /* nothing to do */ do_descendants = 0; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; if (do_descendants == 0) /* nothing to do */ goto done; /* * Now mark the new process as being tracked by this driver. */ PROC_LOCK(newproc); newproc->p_flag |= P_HWPMC; PROC_UNLOCK(newproc); /* allocate a descriptor for the new process */ if ((ppnew = pmc_find_process_descriptor(newproc, PMC_FLAG_ALLOCATE)) == NULL) goto done; /* * Run through all PMCs that were targeting the old process * and which specified F_DESCENDANTS and attach them to the * new process. * * Log the fork event to all owners of PMCs attached to this * process, if not already logged. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && (pm->pm_flags & PMC_F_DESCENDANTS)) { pmc_link_target_process(pm, ppnew); po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); } done: sx_xunlock(&pmc_sx); } static void pmc_process_threadcreate(struct thread *td) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_threadcreate(po, td, 1); epoch_exit_preempt(global_epoch_preempt); } static void pmc_process_threadexit(struct thread *td) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_threadexit(po, td); epoch_exit_preempt(global_epoch_preempt); } static void pmc_process_proccreate(struct proc *p) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_proccreate(po, p, 1 /* sync */); epoch_exit_preempt(global_epoch_preempt); } static void pmc_process_allproc(struct pmc *pm) { struct pmc_owner *po; struct thread *td; struct proc *p; po = pm->pm_owner; if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { pmclog_process_proccreate(po, p, 0 /* sync */); PROC_LOCK(p); FOREACH_THREAD_IN_PROC(p, td) pmclog_process_threadcreate(po, td, 0 /* sync */); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); pmclog_flush(po, 0); } static void pmc_kld_load(void *arg __unused, linker_file_t lf) { struct pmc_owner *po; /* * Notify owners of system sampling PMCs about KLD operations. */ epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->filename); epoch_exit_preempt(global_epoch_preempt); /* * TODO: Notify owners of (all) process-sampling PMCs too. */ } static void pmc_kld_unload(void *arg __unused, const char *filename __unused, caddr_t address, size_t size) { struct pmc_owner *po; epoch_enter_preempt(global_epoch_preempt); CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, (uintfptr_t) address, (uintfptr_t) address + size); epoch_exit_preempt(global_epoch_preempt); /* * TODO: Notify owners of process-sampling PMCs. */ } /* * initialization */ static const char * pmc_name_of_pmcclass(enum pmc_class class) { switch (class) { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) \ case PMC_CLASS_##S: \ return #S; __PMC_CLASSES(); default: return (""); } } /* * Base class initializer: allocate structure and set default classes. */ struct pmc_mdep * pmc_mdep_alloc(int nclasses) { struct pmc_mdep *md; int n; /* SOFT + md classes */ n = 1 + nclasses; md = malloc(sizeof(struct pmc_mdep) + n * sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO); md->pmd_nclass = n; /* Add base class. */ pmc_soft_initialize(md); return md; } void pmc_mdep_free(struct pmc_mdep *md) { pmc_soft_finalize(md); free(md, M_PMC); } static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static struct pmc_mdep * pmc_generic_cpu_initialize(void) { struct pmc_mdep *md; md = pmc_mdep_alloc(0); md->pmd_cputype = PMC_CPU_GENERIC; md->pmd_pcpu_init = NULL; md->pmd_pcpu_fini = NULL; md->pmd_switch_in = generic_switch_in; md->pmd_switch_out = generic_switch_out; return (md); } static void pmc_generic_cpu_finalize(struct pmc_mdep *md) { (void) md; } static int pmc_initialize(void) { int c, cpu, error, n, ri; unsigned int maxcpu, domain; struct pcpu *pc; struct pmc_binding pb; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *sb; md = NULL; error = 0; pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK); pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK); pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK); pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK); pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK); pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK); #ifdef HWPMC_DEBUG /* parse debug flags first */ if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr))) pmc_debugflags_parse(pmc_debugstr, pmc_debugstr+strlen(pmc_debugstr)); #endif PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); /* check kernel version */ if (pmc_kernel_version != PMC_VERSION) { if (pmc_kernel_version == 0) printf("hwpmc: this kernel has not been compiled with " "'options HWPMC_HOOKS'.\n"); else printf("hwpmc: kernel version (0x%x) does not match " "module version (0x%x).\n", pmc_kernel_version, PMC_VERSION); return EPROGMISMATCH; } /* * check sysctl parameters */ if (pmc_hashsize <= 0) { (void) printf("hwpmc: tunable \"hashsize\"=%d must be " "greater than zero.\n", pmc_hashsize); pmc_hashsize = PMC_HASH_SIZE; } if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { (void) printf("hwpmc: tunable \"nsamples\"=%d out of " "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } if (pmc_callchaindepth <= 0 || pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " "range - using %d.\n", pmc_callchaindepth, PMC_CALLCHAIN_DEPTH_MAX); pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX; } md = pmc_md_initialize(); if (md == NULL) { /* Default to generic CPU. */ md = pmc_generic_cpu_initialize(); if (md == NULL) return (ENOSYS); } KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, ("[pmc,%d] no classes or pmcs", __LINE__)); /* Compute the map from row-indices to classdep pointers. */ pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); for (n = 0; n < md->pmd_npmc; n++) pmc_rowindex_to_classdep[n] = NULL; for (ri = c = 0; c < md->pmd_nclass; c++) { pcd = &md->pmd_classdep[c]; for (n = 0; n < pcd->pcd_num; n++, ri++) pmc_rowindex_to_classdep[ri] = pcd; } KASSERT(ri == md->pmd_npmc, ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__, ri, md->pmd_npmc)); maxcpu = pmc_cpu_max(); /* allocate space for the per-cpu array */ pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC, M_WAITOK|M_ZERO); /* per-cpu 'saved values' for managing process-mode PMCs */ pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, M_PMC, M_WAITOK); /* Perform CPU-dependent initialization. */ pmc_save_cpu_binding(&pb); error = 0; for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pmc_select_cpu(cpu); pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) + md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC, M_WAITOK|M_ZERO); if (md->pmd_pcpu_init) error = md->pmd_pcpu_init(md, cpu); for (n = 0; error == 0 && n < md->pmd_nclass; n++) error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu); } pmc_restore_cpu_binding(&pb); if (error) return (error); /* allocate space for the sample array */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pc = pcpu_find(cpu); domain = pc->pc_domain; sb = malloc_domain(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; sb = malloc_domain(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; sb = malloc_domain(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb; } /* allocate space for the row disposition array */ pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); /* mark all PMCs as available */ for (n = 0; n < (int) md->pmd_npmc; n++) PMC_MARK_ROW_FREE(n); /* allocate thread hash tables */ pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, &pmc_ownerhashmask); pmc_processhash = hashinit(pmc_hashsize, M_PMC, &pmc_processhashmask); mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", MTX_SPIN); CK_LIST_INIT(&pmc_ss_owners); pmc_ss_count = 0; /* allocate a pool of spin mutexes */ pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, MTX_SPIN); PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, pmc_processhash, pmc_processhashmask); /* Initialize a spin mutex for the thread free list. */ mtx_init(&pmc_threadfreelist_mtx, "pmc-threadfreelist", "pmc-leaf", MTX_SPIN); /* * Initialize the callout to monitor the thread free list. * This callout will also handle the initial population of the list. */ taskqgroup_config_gtask_init(NULL, &free_gtask, pmc_thread_descriptor_pool_free_task, "thread descriptor pool free task"); /* register process {exit,fork,exec} handlers */ pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); /* register kld event handlers */ pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load, NULL, EVENTHANDLER_PRI_ANY); pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload, NULL, EVENTHANDLER_PRI_ANY); /* initialize logging */ pmclog_initialize(); /* set hook functions */ pmc_intr = md->pmd_intr; wmb(); pmc_hook = pmc_hook_handler; if (error == 0) { printf(PMC_MODULE_NAME ":"); for (n = 0; n < (int) md->pmd_nclass; n++) { pcd = &md->pmd_classdep[n]; printf(" %s/%d/%d/0x%b", pmc_name_of_pmcclass(pcd->pcd_class), pcd->pcd_num, pcd->pcd_width, pcd->pcd_caps, "\20" "\1INT\2USR\3SYS\4EDG\5THR" "\6REA\7WRI\10INV\11QUA\12PRC" "\13TAG\14CSC"); } printf("\n"); } return (error); } /* prepare to be unloaded */ static void pmc_cleanup(void) { int c, cpu; unsigned int maxcpu; struct pmc_ownerhash *ph; struct pmc_owner *po, *tmp; struct pmc_binding pb; #ifdef HWPMC_DEBUG struct pmc_processhash *prh; #endif PMCDBG0(MOD,INI,0, "cleanup"); /* switch off sampling */ CPU_FOREACH(cpu) DPCPU_ID_SET(cpu, pmc_sampled, 0); pmc_intr = NULL; sx_xlock(&pmc_sx); if (pmc_hook == NULL) { /* being unloaded already */ sx_xunlock(&pmc_sx); return; } pmc_hook = NULL; /* prevent new threads from entering module */ /* deregister event handlers */ EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag); EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag); /* send SIGBUS to all owner threads, free up allocations */ if (pmc_ownerhash) for (ph = pmc_ownerhash; ph <= &pmc_ownerhash[pmc_ownerhashmask]; ph++) { LIST_FOREACH_SAFE(po, ph, po_next, tmp) { pmc_remove_owner(po); /* send SIGBUS to owner processes */ PMCDBG3(MOD,INI,2, "cleanup signal proc=%p " "(%d, %s)", po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); PROC_LOCK(po->po_owner); kern_psignal(po->po_owner, SIGBUS); PROC_UNLOCK(po->po_owner); pmc_destroy_owner_descriptor(po); } } /* reclaim allocated data structures */ mtx_destroy(&pmc_threadfreelist_mtx); pmc_thread_descriptor_pool_drain(); if (pmc_mtxpool) mtx_pool_destroy(&pmc_mtxpool); mtx_destroy(&pmc_processhash_mtx); taskqgroup_config_gtask_deinit(&free_gtask); if (pmc_processhash) { #ifdef HWPMC_DEBUG struct pmc_process *pp; PMCDBG0(MOD,INI,3, "destroy process hash"); for (prh = pmc_processhash; prh <= &pmc_processhash[pmc_processhashmask]; prh++) LIST_FOREACH(pp, prh, pp_next) PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); #endif hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); pmc_processhash = NULL; } if (pmc_ownerhash) { PMCDBG0(MOD,INI,3, "destroy owner hash"); hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); pmc_ownerhash = NULL; } KASSERT(CK_LIST_EMPTY(&pmc_ss_owners), ("[pmc,%d] Global SS owner list not empty", __LINE__)); KASSERT(pmc_ss_count == 0, ("[pmc,%d] Global SS count not empty", __LINE__)); /* do processor and pmc-class dependent cleanup */ maxcpu = pmc_cpu_max(); PMCDBG0(MOD,INI,3, "md cleanup"); if (md) { pmc_save_cpu_binding(&pb); for (cpu = 0; cpu < maxcpu; cpu++) { PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", cpu, pmc_pcpu[cpu]); if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) continue; pmc_select_cpu(cpu); for (c = 0; c < md->pmd_nclass; c++) md->pmd_classdep[c].pcd_pcpu_fini(md, cpu); if (md->pmd_pcpu_fini) md->pmd_pcpu_fini(md, cpu); } if (md->pmd_cputype == PMC_CPU_GENERIC) pmc_generic_cpu_finalize(md); else pmc_md_finalize(md); pmc_mdep_free(md); md = NULL; pmc_restore_cpu_binding(&pb); } /* Free per-cpu descriptors. */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL, ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL, ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__, cpu)); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC); free_domain(pmc_pcpu[cpu], M_PMC); } free(pmc_pcpu, M_PMC); pmc_pcpu = NULL; free(pmc_pcpu_saved, M_PMC); pmc_pcpu_saved = NULL; if (pmc_pmcdisp) { free(pmc_pmcdisp, M_PMC); pmc_pmcdisp = NULL; } if (pmc_rowindex_to_classdep) { free(pmc_rowindex_to_classdep, M_PMC); pmc_rowindex_to_classdep = NULL; } pmclog_shutdown(); counter_u64_free(pmc_stats.pm_intr_ignored); counter_u64_free(pmc_stats.pm_intr_processed); counter_u64_free(pmc_stats.pm_intr_bufferfull); counter_u64_free(pmc_stats.pm_syscalls); counter_u64_free(pmc_stats.pm_syscall_errors); counter_u64_free(pmc_stats.pm_buffer_requests); counter_u64_free(pmc_stats.pm_buffer_requests_failed); counter_u64_free(pmc_stats.pm_log_sweeps); counter_u64_free(pmc_stats.pm_merges); counter_u64_free(pmc_stats.pm_overwrites); sx_xunlock(&pmc_sx); /* we are done */ } /* * The function called at load/unload. */ static int load (struct module *module __unused, int cmd, void *arg __unused) { int error; error = 0; switch (cmd) { case MOD_LOAD : /* initialize the subsystem */ error = pmc_initialize(); if (error != 0) break; PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d", pmc_syscall_num, pmc_cpu_max()); break; case MOD_UNLOAD : case MOD_SHUTDOWN: pmc_cleanup(); PMCDBG0(MOD,INI,1, "unloaded"); break; default : error = EINVAL; /* XXX should panic(9) */ break; } return error; } Index: projects/pnfs-planb-server/sys/dev/mse/mse.c =================================================================== --- projects/pnfs-planb-server/sys/dev/mse/mse.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/mse/mse.c (revision 334967) @@ -1,572 +1,566 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 M. Warner Losh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Copyright 1992 by the University of Guelph * * Permission to use, copy and modify this * software and its documentation for any purpose and without * fee is hereby granted, provided that the above copyright * notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting * documentation. * University of Guelph makes no representations about the suitability of * this software for any purpose. It is provided "as is" * without express or implied warranty. */ /* * Driver for the Logitech and ATI Inport Bus mice for use with 386bsd and * the X386 port, courtesy of * Rick Macklem, rick@snowhite.cis.uoguelph.ca * Caveats: The driver currently uses spltty(), but doesn't use any * generic tty code. It could use splmse() (that only masks off the * bus mouse interrupt, but that would require hacking in i386/isa/icu.s. * (This may be worth the effort, since the Logitech generates 30/60 * interrupts/sec continuously while it is open.) * NB: The ATI has NOT been tested yet! */ /* * Modification history: * Sep 6, 1994 -- Lars Fredriksen(fredriks@mcs.com) * improved probe based on input from Logitech. * * Oct 19, 1992 -- E. Stark (stark@cs.sunysb.edu) * fixes to make it work with Microsoft InPort busmouse * * Jan, 1993 -- E. Stark (stark@cs.sunysb.edu) * added patches for new "select" interface * * May 4, 1993 -- E. Stark (stark@cs.sunysb.edu) * changed position of some spl()'s in mseread * * October 8, 1993 -- E. Stark (stark@cs.sunysb.edu) * limit maximum negative x/y value to -127 to work around XFree problem * that causes spurious button pushes. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include devclass_t mse_devclass; static d_open_t mseopen; static d_close_t mseclose; static d_read_t mseread; static d_ioctl_t mseioctl; static d_poll_t msepoll; static struct cdevsw mse_cdevsw = { .d_version = D_VERSION, .d_open = mseopen, .d_close = mseclose, .d_read = mseread, .d_ioctl = mseioctl, .d_poll = msepoll, .d_name = "mse", }; static void mseintr(void *); static void mseintr_locked(mse_softc_t *sc); static void msetimeout(void *); #define MSE_NBLOCKIO(dev) (dev2unit(dev) != 0) #define MSEPRI (PZERO + 3) int mse_common_attach(device_t dev) { mse_softc_t *sc; int unit, flags, rid; sc = device_get_softc(dev); unit = device_get_unit(dev); mtx_init(&sc->sc_lock, "mse", NULL, MTX_DEF); callout_init_mtx(&sc->sc_callout, &sc->sc_lock, 0); rid = 0; sc->sc_intr = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->sc_intr == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rid, sc->sc_port); mtx_destroy(&sc->sc_lock); return ENXIO; } if (bus_setup_intr(dev, sc->sc_intr, INTR_TYPE_TTY | INTR_MPSAFE, NULL, mseintr, sc, &sc->sc_ih)) { bus_release_resource(dev, SYS_RES_IOPORT, rid, sc->sc_port); bus_release_resource(dev, SYS_RES_IRQ, rid, sc->sc_intr); mtx_destroy(&sc->sc_lock); return ENXIO; } flags = device_get_flags(dev); sc->mode.accelfactor = (flags & MSE_CONFIG_ACCEL) >> 4; sc->sc_dev = make_dev(&mse_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "mse%d", unit); sc->sc_dev->si_drv1 = sc; sc->sc_ndev = make_dev(&mse_cdevsw, 1, UID_ROOT, GID_WHEEL, 0600, "nmse%d", unit); sc->sc_ndev->si_drv1 = sc; gone_in_dev(dev, 12, "mse(4) driver"); return 0; } int mse_detach(device_t dev) { mse_softc_t *sc; int rid; sc = device_get_softc(dev); MSE_LOCK(sc); if (sc->sc_flags & MSESC_OPEN) { MSE_UNLOCK(sc); return EBUSY; } /* Sabotage subsequent opens. */ sc->sc_mousetype = MSE_NONE; MSE_UNLOCK(sc); destroy_dev(sc->sc_dev); destroy_dev(sc->sc_ndev); rid = 0; bus_teardown_intr(dev, sc->sc_intr, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, rid, sc->sc_intr); bus_release_resource(dev, SYS_RES_IOPORT, rid, sc->sc_port); callout_drain(&sc->sc_callout); mtx_destroy(&sc->sc_lock); return 0; } /* * Exclusive open the mouse, initialize it and enable interrupts. */ static int mseopen(struct cdev *dev, int flags, int fmt, struct thread *td) { mse_softc_t *sc = dev->si_drv1; MSE_LOCK(sc); if (sc->sc_mousetype == MSE_NONE) { MSE_UNLOCK(sc); return (ENXIO); } if (sc->sc_flags & MSESC_OPEN) { MSE_UNLOCK(sc); return (EBUSY); } sc->sc_flags |= MSESC_OPEN; sc->sc_obuttons = sc->sc_buttons = MOUSE_MSC_BUTTONS; sc->sc_deltax = sc->sc_deltay = 0; sc->sc_bytesread = sc->mode.packetsize = MOUSE_MSC_PACKETSIZE; sc->sc_watchdog = FALSE; callout_reset(&sc->sc_callout, hz * 2, msetimeout, dev); sc->mode.level = 0; sc->status.flags = 0; sc->status.button = sc->status.obutton = 0; sc->status.dx = sc->status.dy = sc->status.dz = 0; /* * Initialize mouse interface and enable interrupts. */ (*sc->sc_enablemouse)(sc->sc_port); MSE_UNLOCK(sc); return (0); } /* * mseclose: just turn off mouse innterrupts. */ static int mseclose(struct cdev *dev, int flags, int fmt, struct thread *td) { mse_softc_t *sc = dev->si_drv1; MSE_LOCK(sc); callout_stop(&sc->sc_callout); (*sc->sc_disablemouse)(sc->sc_port); sc->sc_flags &= ~MSESC_OPEN; MSE_UNLOCK(sc); return(0); } /* * mseread: return mouse info using the MSC serial protocol, but without * using bytes 4 and 5. * (Yes this is cheesy, but it makes the X386 server happy, so...) */ static int mseread(struct cdev *dev, struct uio *uio, int ioflag) { mse_softc_t *sc = dev->si_drv1; int xfer, error; /* * If there are no protocol bytes to be read, set up a new protocol * packet. */ MSE_LOCK(sc); while (sc->sc_flags & MSESC_READING) { if (MSE_NBLOCKIO(dev)) { MSE_UNLOCK(sc); return (0); } sc->sc_flags |= MSESC_WANT; error = mtx_sleep(sc, &sc->sc_lock, MSEPRI | PCATCH, "mseread", 0); if (error) { MSE_UNLOCK(sc); return (error); } } sc->sc_flags |= MSESC_READING; xfer = 0; if (sc->sc_bytesread >= sc->mode.packetsize) { while (sc->sc_deltax == 0 && sc->sc_deltay == 0 && (sc->sc_obuttons ^ sc->sc_buttons) == 0) { if (MSE_NBLOCKIO(dev)) goto out; sc->sc_flags |= MSESC_WANT; error = mtx_sleep(sc, &sc->sc_lock, MSEPRI | PCATCH, "mseread", 0); if (error) goto out; } /* * Generate protocol bytes. * For some reason X386 expects 5 bytes but never uses * the fourth or fifth? */ sc->sc_bytes[0] = sc->mode.syncmask[1] | (sc->sc_buttons & ~sc->mode.syncmask[0]); if (sc->sc_deltax > 127) sc->sc_deltax = 127; if (sc->sc_deltax < -127) sc->sc_deltax = -127; sc->sc_deltay = -sc->sc_deltay; /* Otherwise mousey goes wrong way */ if (sc->sc_deltay > 127) sc->sc_deltay = 127; if (sc->sc_deltay < -127) sc->sc_deltay = -127; sc->sc_bytes[1] = sc->sc_deltax; sc->sc_bytes[2] = sc->sc_deltay; sc->sc_bytes[3] = sc->sc_bytes[4] = 0; sc->sc_bytes[5] = sc->sc_bytes[6] = 0; sc->sc_bytes[7] = MOUSE_SYS_EXTBUTTONS; sc->sc_obuttons = sc->sc_buttons; sc->sc_deltax = sc->sc_deltay = 0; sc->sc_bytesread = 0; } xfer = min(uio->uio_resid, sc->mode.packetsize - sc->sc_bytesread); MSE_UNLOCK(sc); error = uiomove(&sc->sc_bytes[sc->sc_bytesread], xfer, uio); MSE_LOCK(sc); out: sc->sc_flags &= ~MSESC_READING; if (error == 0) sc->sc_bytesread += xfer; if (sc->sc_flags & MSESC_WANT) { sc->sc_flags &= ~MSESC_WANT; MSE_UNLOCK(sc); wakeup(sc); } else MSE_UNLOCK(sc); return (error); } /* * mseioctl: process ioctl commands. */ static int mseioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { mse_softc_t *sc = dev->si_drv1; mousestatus_t status; int err = 0; switch (cmd) { case MOUSE_GETHWINFO: MSE_LOCK(sc); *(mousehw_t *)addr = sc->hw; if (sc->mode.level == 0) ((mousehw_t *)addr)->model = MOUSE_MODEL_GENERIC; MSE_UNLOCK(sc); break; case MOUSE_GETMODE: MSE_LOCK(sc); *(mousemode_t *)addr = sc->mode; switch (sc->mode.level) { case 0: break; case 1: ((mousemode_t *)addr)->protocol = MOUSE_PROTO_SYSMOUSE; ((mousemode_t *)addr)->syncmask[0] = MOUSE_SYS_SYNCMASK; ((mousemode_t *)addr)->syncmask[1] = MOUSE_SYS_SYNC; break; } MSE_UNLOCK(sc); break; case MOUSE_SETMODE: switch (((mousemode_t *)addr)->level) { case 0: case 1: break; default: return (EINVAL); } MSE_LOCK(sc); if (((mousemode_t *)addr)->accelfactor < -1) { MSE_UNLOCK(sc); return (EINVAL); } else if (((mousemode_t *)addr)->accelfactor >= 0) sc->mode.accelfactor = ((mousemode_t *)addr)->accelfactor; sc->mode.level = ((mousemode_t *)addr)->level; switch (sc->mode.level) { case 0: sc->sc_bytesread = sc->mode.packetsize = MOUSE_MSC_PACKETSIZE; break; case 1: sc->sc_bytesread = sc->mode.packetsize = MOUSE_SYS_PACKETSIZE; break; } MSE_UNLOCK(sc); break; case MOUSE_GETLEVEL: MSE_LOCK(sc); *(int *)addr = sc->mode.level; MSE_UNLOCK(sc); break; case MOUSE_SETLEVEL: switch (*(int *)addr) { case 0: MSE_LOCK(sc); sc->mode.level = *(int *)addr; sc->sc_bytesread = sc->mode.packetsize = MOUSE_MSC_PACKETSIZE; MSE_UNLOCK(sc); break; case 1: MSE_LOCK(sc); sc->mode.level = *(int *)addr; sc->sc_bytesread = sc->mode.packetsize = MOUSE_SYS_PACKETSIZE; MSE_UNLOCK(sc); break; default: return (EINVAL); } break; case MOUSE_GETSTATUS: MSE_LOCK(sc); status = sc->status; sc->status.flags = 0; sc->status.obutton = sc->status.button; sc->status.button = 0; sc->status.dx = 0; sc->status.dy = 0; sc->status.dz = 0; MSE_UNLOCK(sc); *(mousestatus_t *)addr = status; break; case MOUSE_READSTATE: case MOUSE_READDATA: return (ENODEV); -#if (defined(MOUSE_GETVARS)) - case MOUSE_GETVARS: - case MOUSE_SETVARS: - return (ENODEV); -#endif - default: return (ENOTTY); } return (err); } /* * msepoll: check for mouse input to be processed. */ static int msepoll(struct cdev *dev, int events, struct thread *td) { mse_softc_t *sc = dev->si_drv1; int revents = 0; MSE_LOCK(sc); if (events & (POLLIN | POLLRDNORM)) { if (sc->sc_bytesread != sc->mode.packetsize || sc->sc_deltax != 0 || sc->sc_deltay != 0 || (sc->sc_obuttons ^ sc->sc_buttons) != 0) revents |= events & (POLLIN | POLLRDNORM); else selrecord(td, &sc->sc_selp); } MSE_UNLOCK(sc); return (revents); } /* * msetimeout: watchdog timer routine. */ static void msetimeout(void *arg) { struct cdev *dev; mse_softc_t *sc; dev = (struct cdev *)arg; sc = dev->si_drv1; MSE_ASSERT_LOCKED(sc); if (sc->sc_watchdog) { if (bootverbose) printf("%s: lost interrupt?\n", devtoname(dev)); mseintr_locked(sc); } sc->sc_watchdog = TRUE; callout_schedule(&sc->sc_callout, hz); } /* * mseintr: update mouse status. sc_deltax and sc_deltay are accumulative. */ static void mseintr(void *arg) { mse_softc_t *sc = arg; MSE_LOCK(sc); mseintr_locked(sc); MSE_UNLOCK(sc); } static void mseintr_locked(mse_softc_t *sc) { /* * the table to turn MouseSystem button bits (MOUSE_MSC_BUTTON?UP) * into `mousestatus' button bits (MOUSE_BUTTON?DOWN). */ static int butmap[8] = { 0, MOUSE_BUTTON3DOWN, MOUSE_BUTTON2DOWN, MOUSE_BUTTON2DOWN | MOUSE_BUTTON3DOWN, MOUSE_BUTTON1DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON3DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON2DOWN, MOUSE_BUTTON1DOWN | MOUSE_BUTTON2DOWN | MOUSE_BUTTON3DOWN }; int dx, dy, but; int sign; #ifdef DEBUG static int mse_intrcnt = 0; if((mse_intrcnt++ % 10000) == 0) printf("mseintr\n"); #endif /* DEBUG */ if ((sc->sc_flags & MSESC_OPEN) == 0) return; (*sc->sc_getmouse)(sc->sc_port, &dx, &dy, &but); if (sc->mode.accelfactor > 0) { sign = (dx < 0); dx = dx * dx / sc->mode.accelfactor; if (dx == 0) dx = 1; if (sign) dx = -dx; sign = (dy < 0); dy = dy * dy / sc->mode.accelfactor; if (dy == 0) dy = 1; if (sign) dy = -dy; } sc->sc_deltax += dx; sc->sc_deltay += dy; sc->sc_buttons = but; but = butmap[~but & MOUSE_MSC_BUTTONS]; sc->status.dx += dx; sc->status.dy += dy; sc->status.flags |= ((dx || dy) ? MOUSE_POSCHANGED : 0) | (sc->status.button ^ but); sc->status.button = but; sc->sc_watchdog = FALSE; /* * If mouse state has changed, wake up anyone wanting to know. */ if (sc->sc_deltax != 0 || sc->sc_deltay != 0 || (sc->sc_obuttons ^ sc->sc_buttons) != 0) { if (sc->sc_flags & MSESC_WANT) { sc->sc_flags &= ~MSESC_WANT; wakeup(sc); } selwakeuppri(&sc->sc_selp, MSEPRI); } } Index: projects/pnfs-planb-server/sys/dev/ocs_fc/ocs_ioctl.c =================================================================== --- projects/pnfs-planb-server/sys/dev/ocs_fc/ocs_ioctl.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/ocs_fc/ocs_ioctl.c (revision 334967) @@ -1,1253 +1,1253 @@ /*- * Copyright (c) 2017 Broadcom. All rights reserved. * The term "Broadcom" refers to Broadcom Limited and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "ocs.h" #include "ocs_utils.h" #include #include #include #include #include #include #include static d_open_t ocs_open; static d_close_t ocs_close; static d_ioctl_t ocs_ioctl; static struct cdevsw ocs_cdevsw = { .d_version = D_VERSION, .d_open = ocs_open, .d_close = ocs_close, .d_ioctl = ocs_ioctl, .d_name = "ocs_fc" }; int ocs_firmware_write(ocs_t *ocs, const uint8_t *buf, size_t buf_len, uint8_t *change_status); static int ocs_open(struct cdev *cdev, int flags, int fmt, struct thread *td) { #if 0 struct ocs_softc *ocs = cdev->si_drv1; device_printf(ocs->dev, "%s\n", __func__); #endif return 0; } static int ocs_close(struct cdev *cdev, int flag, int fmt, struct thread *td) { #if 0 struct ocs_softc *ocs = cdev->si_drv1; device_printf(ocs->dev, "%s\n", __func__); #endif return 0; } static int32_t __ocs_ioctl_mbox_cb(ocs_hw_t *hw, int32_t status, uint8_t *mqe, void *arg) { struct ocs_softc *ocs = arg; /* wait for the ioctl to sleep before calling wakeup */ mtx_lock(&ocs->dbg_lock); mtx_unlock(&ocs->dbg_lock); wakeup(arg); return 0; } static int ocs_process_sli_config (ocs_t *ocs, ocs_ioctl_elxu_mbox_t *mcmd, ocs_dma_t *dma){ sli4_cmd_sli_config_t *sli_config = (sli4_cmd_sli_config_t *)mcmd->payload; if (sli_config->emb) { sli4_req_hdr_t *req = (sli4_req_hdr_t *)sli_config->payload.embed; switch (req->opcode) { case SLI4_OPC_COMMON_READ_OBJECT: if (mcmd->out_bytes) { sli4_req_common_read_object_t *rdobj = (sli4_req_common_read_object_t *)sli_config->payload.embed; if (ocs_dma_alloc(ocs, dma, mcmd->out_bytes, 4096)) { device_printf(ocs->dev, "%s: COMMON_READ_OBJECT - %lld allocation failed\n", __func__, (unsigned long long)mcmd->out_bytes); return ENXIO; } memset(dma->virt, 0, mcmd->out_bytes); rdobj->host_buffer_descriptor[0].bde_type = SLI4_BDE_TYPE_BDE_64; rdobj->host_buffer_descriptor[0].buffer_length = mcmd->out_bytes; rdobj->host_buffer_descriptor[0].u.data.buffer_address_low = ocs_addr32_lo(dma->phys); rdobj->host_buffer_descriptor[0].u.data.buffer_address_high = ocs_addr32_hi(dma->phys); } break; case SLI4_OPC_COMMON_WRITE_OBJECT: { sli4_req_common_write_object_t *wrobj = (sli4_req_common_write_object_t *)sli_config->payload.embed; if (ocs_dma_alloc(ocs, dma, wrobj->desired_write_length, 4096)) { device_printf(ocs->dev, "%s: COMMON_WRITE_OBJECT - %d allocation failed\n", __func__, wrobj->desired_write_length); return ENXIO; } /* setup the descriptor */ wrobj->host_buffer_descriptor[0].bde_type = SLI4_BDE_TYPE_BDE_64; wrobj->host_buffer_descriptor[0].buffer_length = wrobj->desired_write_length; wrobj->host_buffer_descriptor[0].u.data.buffer_address_low = ocs_addr32_lo(dma->phys); wrobj->host_buffer_descriptor[0].u.data.buffer_address_high = ocs_addr32_hi(dma->phys); /* copy the data into the DMA buffer */ - copyin((void *)mcmd->in_addr, dma->virt, mcmd->in_bytes); + copyin((void *)(uintptr_t)mcmd->in_addr, dma->virt, mcmd->in_bytes); } break; case SLI4_OPC_COMMON_DELETE_OBJECT: break; case SLI4_OPC_COMMON_READ_OBJECT_LIST: if (mcmd->out_bytes) { sli4_req_common_read_object_list_t *rdobj = (sli4_req_common_read_object_list_t *)sli_config->payload.embed; if (ocs_dma_alloc(ocs, dma, mcmd->out_bytes, 4096)) { device_printf(ocs->dev, "%s: COMMON_READ_OBJECT_LIST - %lld allocation failed\n", __func__,(unsigned long long) mcmd->out_bytes); return ENXIO; } memset(dma->virt, 0, mcmd->out_bytes); rdobj->host_buffer_descriptor[0].bde_type = SLI4_BDE_TYPE_BDE_64; rdobj->host_buffer_descriptor[0].buffer_length = mcmd->out_bytes; rdobj->host_buffer_descriptor[0].u.data.buffer_address_low = ocs_addr32_lo(dma->phys); rdobj->host_buffer_descriptor[0].u.data.buffer_address_high = ocs_addr32_hi(dma->phys); } break; case SLI4_OPC_COMMON_READ_TRANSCEIVER_DATA: break; default: device_printf(ocs->dev, "%s: in=%p (%lld) out=%p (%lld)\n", __func__, - (void *)mcmd->in_addr, (unsigned long long)mcmd->in_bytes, - (void *)mcmd->out_addr, (unsigned long long)mcmd->out_bytes); + (void *)(uintptr_t)mcmd->in_addr, (unsigned long long)mcmd->in_bytes, + (void *)(uintptr_t)mcmd->out_addr, (unsigned long long)mcmd->out_bytes); device_printf(ocs->dev, "%s: unknown (opc=%#x)\n", __func__, req->opcode); hexdump(mcmd, mcmd->size, NULL, 0); break; } } else { uint32_t max_bytes = max(mcmd->in_bytes, mcmd->out_bytes); if (ocs_dma_alloc(ocs, dma, max_bytes, 4096)) { device_printf(ocs->dev, "%s: non-embedded - %u allocation failed\n", __func__, max_bytes); return ENXIO; } - copyin((void *)mcmd->in_addr, dma->virt, mcmd->in_bytes); + copyin((void *)(uintptr_t)mcmd->in_addr, dma->virt, mcmd->in_bytes); sli_config->payload.mem.address_low = ocs_addr32_lo(dma->phys); sli_config->payload.mem.address_high = ocs_addr32_hi(dma->phys); sli_config->payload.mem.length = max_bytes; } return 0; } static int ocs_process_mbx_ioctl(ocs_t *ocs, ocs_ioctl_elxu_mbox_t *mcmd) { ocs_dma_t dma = { 0 }; if ((ELXU_BSD_MAGIC != mcmd->magic) || (sizeof(ocs_ioctl_elxu_mbox_t) != mcmd->size)) { device_printf(ocs->dev, "%s: malformed command m=%08x s=%08x\n", __func__, mcmd->magic, mcmd->size); return EINVAL; } switch(((sli4_mbox_command_header_t *)mcmd->payload)->command) { case SLI4_MBOX_COMMAND_SLI_CONFIG: if (ENXIO == ocs_process_sli_config(ocs, mcmd, &dma)) return ENXIO; break; case SLI4_MBOX_COMMAND_READ_REV: case SLI4_MBOX_COMMAND_READ_STATUS: case SLI4_MBOX_COMMAND_READ_LNK_STAT: break; default: device_printf(ocs->dev, "command %d\n",((sli4_mbox_command_header_t *)mcmd->payload)->command); device_printf(ocs->dev, "%s, command not support\n", __func__); goto no_support; break; } /* * The dbg_lock usage here insures the command completion code * (__ocs_ioctl_mbox_cb), which calls wakeup(), does not run until * after first calling msleep() * * 1. ioctl grabs dbg_lock * 2. ioctl issues command * if the command completes before msleep(), the * command completion code (__ocs_ioctl_mbox_cb) will spin * on dbg_lock before calling wakeup() * 3. ioctl calls msleep which releases dbg_lock before sleeping * and reacquires it before waking * 4. command completion handler acquires the dbg_lock, immediately * releases it, and calls wakeup * 5. msleep returns, re-acquiring the lock * 6. ioctl code releases the lock */ mtx_lock(&ocs->dbg_lock); ocs_hw_command(&ocs->hw, mcmd->payload, OCS_CMD_NOWAIT, __ocs_ioctl_mbox_cb, ocs); msleep(ocs, &ocs->dbg_lock, 0, "ocsmbx", 0); mtx_unlock(&ocs->dbg_lock); if( SLI4_MBOX_COMMAND_SLI_CONFIG == ((sli4_mbox_command_header_t *)mcmd->payload)->command && mcmd->out_bytes && dma.virt) { - copyout(dma.virt, (void *)mcmd->out_addr, mcmd->out_bytes); + copyout(dma.virt, (void *)(uintptr_t)mcmd->out_addr, mcmd->out_bytes); } no_support: ocs_dma_free(ocs, &dma); return 0; } /** * @brief perform requested Elx CoreDump helper function * * The Elx CoreDump facility used for BE3 diagnostics uses the OCS_IOCTL_CMD_ECD_HELPER * ioctl function to execute requested "help" functions * * @param ocs pointer to ocs structure * @param req pointer to helper function request * * @return returns 0 for success, a negative error code value for failure. */ static int ocs_process_ecd_helper (ocs_t *ocs, ocs_ioctl_ecd_helper_t *req) { int32_t rc = 0; uint8_t v8; uint16_t v16; uint32_t v32; /* Check the BAR read/write commands for valid bar */ switch(req->cmd) { case OCS_ECD_HELPER_BAR_READ8: case OCS_ECD_HELPER_BAR_READ16: case OCS_ECD_HELPER_BAR_READ32: case OCS_ECD_HELPER_BAR_WRITE8: case OCS_ECD_HELPER_BAR_WRITE16: case OCS_ECD_HELPER_BAR_WRITE32: if (req->bar >= PCI_MAX_BAR) { device_printf(ocs->dev, "Error: bar %d out of range\n", req->bar); return -EFAULT; } if (ocs->reg[req->bar].res == NULL) { device_printf(ocs->dev, "Error: bar %d not defined\n", req->bar); return -EFAULT; } break; default: break; } switch(req->cmd) { case OCS_ECD_HELPER_CFG_READ8: v8 = ocs_config_read8(ocs, req->offset); req->data = v8; break; case OCS_ECD_HELPER_CFG_READ16: v16 = ocs_config_read16(ocs, req->offset); req->data = v16; break; case OCS_ECD_HELPER_CFG_READ32: v32 = ocs_config_read32(ocs, req->offset); req->data = v32; break; case OCS_ECD_HELPER_CFG_WRITE8: ocs_config_write8(ocs, req->offset, req->data); break; case OCS_ECD_HELPER_CFG_WRITE16: ocs_config_write16(ocs, req->offset, req->data); break; case OCS_ECD_HELPER_CFG_WRITE32: ocs_config_write32(ocs, req->offset, req->data); break; case OCS_ECD_HELPER_BAR_READ8: req->data = ocs_reg_read8(ocs, req->bar, req->offset); break; case OCS_ECD_HELPER_BAR_READ16: req->data = ocs_reg_read16(ocs, req->bar, req->offset); break; case OCS_ECD_HELPER_BAR_READ32: req->data = ocs_reg_read32(ocs, req->bar, req->offset); break; case OCS_ECD_HELPER_BAR_WRITE8: ocs_reg_write8(ocs, req->bar, req->offset, req->data); break; case OCS_ECD_HELPER_BAR_WRITE16: ocs_reg_write16(ocs, req->bar, req->offset, req->data); break; case OCS_ECD_HELPER_BAR_WRITE32: ocs_reg_write32(ocs, req->bar, req->offset, req->data); break; default: device_printf(ocs->dev, "Invalid helper command=%d\n", req->cmd); break; } return rc; } static int ocs_ioctl(struct cdev *cdev, u_long cmd, caddr_t addr, int flag, struct thread *td) { int status = 0; struct ocs_softc *ocs = cdev->si_drv1; device_t dev = ocs->dev; switch (cmd) { case OCS_IOCTL_CMD_ELXU_MBOX: { /* "copyin" done by kernel; thus, just dereference addr */ ocs_ioctl_elxu_mbox_t *mcmd = (void *)addr; status = ocs_process_mbx_ioctl(ocs, mcmd); break; } case OCS_IOCTL_CMD_ECD_HELPER: { /* "copyin" done by kernel; thus, just dereference addr */ ocs_ioctl_ecd_helper_t *req = (void *)addr; status = ocs_process_ecd_helper(ocs, req); break; } case OCS_IOCTL_CMD_VPORT: { int32_t rc = 0; ocs_ioctl_vport_t *req = (ocs_ioctl_vport_t*) addr; ocs_domain_t *domain; domain = ocs_domain_get_instance(ocs, req->domain_index); if (domain == NULL) { device_printf(ocs->dev, "domain [%d] nod found\n", req->domain_index); return -EFAULT; } if (req->req_create) { rc = ocs_sport_vport_new(domain, req->wwpn, req->wwnn, UINT32_MAX, req->enable_ini, req->enable_tgt, NULL, NULL, TRUE); } else { rc = ocs_sport_vport_del(ocs, domain, req->wwpn, req->wwnn); } return rc; } case OCS_IOCTL_CMD_GET_DDUMP: { ocs_ioctl_ddump_t *req = (ocs_ioctl_ddump_t*) addr; ocs_textbuf_t textbuf; int x; /* Build a text buffer */ if (ocs_textbuf_alloc(ocs, &textbuf, req->user_buffer_len)) { device_printf(ocs->dev, "Error: ocs_textbuf_alloc failed\n"); return -EFAULT; } switch (req->args.action) { case OCS_IOCTL_DDUMP_GET: case OCS_IOCTL_DDUMP_GET_SAVED: { uint32_t remaining; uint32_t written; uint32_t idx; int32_t n; ocs_textbuf_t *ptbuf = NULL; uint32_t flags = 0; if (req->args.action == OCS_IOCTL_DDUMP_GET_SAVED) { if (ocs_textbuf_initialized(&ocs->ddump_saved)) { ptbuf = &ocs->ddump_saved; } } else { if (ocs_textbuf_alloc(ocs, &textbuf, req->user_buffer_len)) { ocs_log_err(ocs, "Error: ocs_textbuf_alloc failed\n"); return -EFAULT; } /* translate IOCTL ddump flags to ddump flags */ if (req->args.flags & OCS_IOCTL_DDUMP_FLAGS_WQES) { flags |= OCS_DDUMP_FLAGS_WQES; } if (req->args.flags & OCS_IOCTL_DDUMP_FLAGS_CQES) { flags |= OCS_DDUMP_FLAGS_CQES; } if (req->args.flags & OCS_IOCTL_DDUMP_FLAGS_MQES) { flags |= OCS_DDUMP_FLAGS_MQES; } if (req->args.flags & OCS_IOCTL_DDUMP_FLAGS_RQES) { flags |= OCS_DDUMP_FLAGS_RQES; } if (req->args.flags & OCS_IOCTL_DDUMP_FLAGS_EQES) { flags |= OCS_DDUMP_FLAGS_EQES; } /* Try 3 times to get the dump */ for(x=0; x<3; x++) { if (ocs_ddump(ocs, &textbuf, flags, req->args.q_entries) != 0) { ocs_textbuf_reset(&textbuf); } else { /* Success */ x = 0; break; } } if (x != 0 ) { /* Retries failed */ ocs_log_test(ocs, "ocs_ddump failed\n"); } else { ptbuf = &textbuf; } } written = 0; if (ptbuf != NULL) { /* Process each textbuf segment */ remaining = req->user_buffer_len; for (idx = 0; remaining; idx++) { n = ocs_textbuf_ext_get_written(ptbuf, idx); if (n < 0) { break; } if ((uint32_t)n >= remaining) { n = (int32_t)remaining; } if (ocs_copy_to_user(req->user_buffer + written, ocs_textbuf_ext_get_buffer(ptbuf, idx), n)) { ocs_log_test(ocs, "Error: (%d) ocs_copy_to_user failed\n", __LINE__); } written += n; remaining -= (uint32_t)n; } } req->bytes_written = written; if (ptbuf == &textbuf) { ocs_textbuf_free(ocs, &textbuf); } break; } case OCS_IOCTL_DDUMP_CLR_SAVED: ocs_clear_saved_ddump(ocs); break; default: ocs_log_err(ocs, "Error: ocs_textbuf_alloc failed\n"); break; } break; } case OCS_IOCTL_CMD_DRIVER_INFO: { ocs_ioctl_driver_info_t *req = (ocs_ioctl_driver_info_t*)addr; ocs_memset(req, 0, sizeof(*req)); req->pci_vendor = ocs->pci_vendor; req->pci_device = ocs->pci_device; ocs_strncpy(req->businfo, ocs->businfo, sizeof(req->businfo)); req->sli_intf = ocs_config_read32(ocs, SLI4_INTF_REG); ocs_strncpy(req->desc, device_get_desc(dev), sizeof(req->desc)); ocs_strncpy(req->fw_rev, ocs->fwrev, sizeof(req->fw_rev)); if (ocs->domain && ocs->domain->sport) { *((uint64_t*)req->hw_addr.fc.wwnn) = ocs_htobe64(ocs->domain->sport->wwnn); *((uint64_t*)req->hw_addr.fc.wwpn) = ocs_htobe64(ocs->domain->sport->wwpn); } ocs_strncpy(req->serialnum, ocs->serialnum, sizeof(req->serialnum)); break; } case OCS_IOCTL_CMD_MGMT_LIST: { ocs_ioctl_mgmt_buffer_t* req = (ocs_ioctl_mgmt_buffer_t *)addr; ocs_textbuf_t textbuf; /* Build a text buffer */ if (ocs_textbuf_alloc(ocs, &textbuf, req->user_buffer_len)) { ocs_log_err(ocs, "Error: ocs_textbuf_alloc failed\n"); return -EFAULT; } ocs_mgmt_get_list(ocs, &textbuf); if (ocs_textbuf_get_written(&textbuf)) { if (ocs_copy_to_user(req->user_buffer, ocs_textbuf_get_buffer(&textbuf), ocs_textbuf_get_written(&textbuf))) { ocs_log_test(ocs, "Error: (%d) ocs_copy_to_user failed\n", __LINE__); } } req->bytes_written = ocs_textbuf_get_written(&textbuf); ocs_textbuf_free(ocs, &textbuf); break; } case OCS_IOCTL_CMD_MGMT_GET_ALL: { ocs_ioctl_mgmt_buffer_t* req = (ocs_ioctl_mgmt_buffer_t *)addr; ocs_textbuf_t textbuf; int32_t n; uint32_t idx; uint32_t copied = 0; /* Build a text buffer */ if (ocs_textbuf_alloc(ocs, &textbuf, req->user_buffer_len)) { ocs_log_err(ocs, "Error: ocs_textbuf_alloc failed\n"); return -EFAULT; } ocs_mgmt_get_all(ocs, &textbuf); for (idx = 0; (n = ocs_textbuf_ext_get_written(&textbuf, idx)) > 0; idx++) { if(ocs_copy_to_user(req->user_buffer + copied, ocs_textbuf_ext_get_buffer(&textbuf, idx), ocs_textbuf_ext_get_written(&textbuf, idx))) { ocs_log_err(ocs, "Error: ocs_textbuf_alloc failed\n"); } copied += n; } req->bytes_written = copied; ocs_textbuf_free(ocs, &textbuf); break; } case OCS_IOCTL_CMD_MGMT_GET: { ocs_ioctl_cmd_get_t* req = (ocs_ioctl_cmd_get_t*)addr; ocs_textbuf_t textbuf; char name[OCS_MGMT_MAX_NAME]; /* Copy the name value in from user space */ if (ocs_copy_from_user(name, req->name, OCS_MGMT_MAX_NAME)) { ocs_log_test(ocs, "ocs_copy_from_user failed\n"); ocs_ioctl_free(ocs, req, sizeof(ocs_ioctl_cmd_get_t)); return -EFAULT; } /* Build a text buffer */ if (ocs_textbuf_alloc(ocs, &textbuf, req->value_length)) { ocs_log_err(ocs, "Error: ocs_textbuf_alloc failed\n"); return -EFAULT; } ocs_mgmt_get(ocs, name, &textbuf); if (ocs_textbuf_get_written(&textbuf)) { if (ocs_copy_to_user(req->value, ocs_textbuf_get_buffer(&textbuf), ocs_textbuf_get_written(&textbuf))) { ocs_log_test(ocs, "Error: (%d) ocs_copy_to_user failed\n", __LINE__); } } req->value_length = ocs_textbuf_get_written(&textbuf); ocs_textbuf_free(ocs, &textbuf); break; } case OCS_IOCTL_CMD_MGMT_SET: { char name[OCS_MGMT_MAX_NAME]; char value[OCS_MGMT_MAX_VALUE]; ocs_ioctl_cmd_set_t* req = (ocs_ioctl_cmd_set_t*)addr; // Copy the name in from user space if (ocs_copy_from_user(name, req->name, OCS_MGMT_MAX_NAME)) { ocs_log_test(ocs, "Error: copy from user failed\n"); ocs_ioctl_free(ocs, req, sizeof(*req)); return -EFAULT; } // Copy the value in from user space if (ocs_copy_from_user(value, req->value, OCS_MGMT_MAX_VALUE)) { ocs_log_test(ocs, "Error: copy from user failed\n"); ocs_ioctl_free(ocs, req, sizeof(*req)); return -EFAULT; } req->result = ocs_mgmt_set(ocs, req->name, req->value); break; } case OCS_IOCTL_CMD_MGMT_EXEC: { ocs_ioctl_action_t* req = (ocs_ioctl_action_t*) addr; char action_name[OCS_MGMT_MAX_NAME]; if (ocs_copy_from_user(action_name, req->name, sizeof(action_name))) { ocs_log_test(ocs, "Error: copy req.name from user failed\n"); ocs_ioctl_free(ocs, req, sizeof(*req)); return -EFAULT; } req->result = ocs_mgmt_exec(ocs, action_name, req->arg_in, req->arg_in_length, req->arg_out, req->arg_out_length); break; } default: ocs_log_test(ocs, "Error: unknown cmd %#lx\n", cmd); status = -ENOTTY; break; } return status; } static void ocs_fw_write_cb(int32_t status, uint32_t actual_write_length, uint32_t change_status, void *arg) { ocs_mgmt_fw_write_result_t *result = arg; result->status = status; result->actual_xfer = actual_write_length; result->change_status = change_status; ocs_sem_v(&(result->semaphore)); } int ocs_firmware_write(ocs_t *ocs, const uint8_t *buf, size_t buf_len, uint8_t *change_status) { int rc = 0; uint32_t bytes_left; uint32_t xfer_size; uint32_t offset; ocs_dma_t dma; int last = 0; ocs_mgmt_fw_write_result_t result; ocs_sem_init(&(result.semaphore), 0, "fw_write"); bytes_left = buf_len; offset = 0; if (ocs_dma_alloc(ocs, &dma, FW_WRITE_BUFSIZE, 4096)) { ocs_log_err(ocs, "ocs_firmware_write: malloc failed\n"); return -ENOMEM; } while (bytes_left > 0) { if (bytes_left > FW_WRITE_BUFSIZE) { xfer_size = FW_WRITE_BUFSIZE; } else { xfer_size = bytes_left; } ocs_memcpy(dma.virt, buf + offset, xfer_size); if (bytes_left == xfer_size) { last = 1; } ocs_hw_firmware_write(&ocs->hw, &dma, xfer_size, offset, last, ocs_fw_write_cb, &result); if (ocs_sem_p(&(result.semaphore), OCS_SEM_FOREVER) != 0) { rc = -ENXIO; break; } if (result.actual_xfer == 0 || result.status != 0) { rc = -EFAULT; break; } if (last) { *change_status = result.change_status; } bytes_left -= result.actual_xfer; offset += result.actual_xfer; } ocs_dma_free(ocs, &dma); return rc; } static int ocs_sys_fwupgrade(SYSCTL_HANDLER_ARGS) { char file_name[256] = {0}; char fw_change_status; uint32_t rc = 1; ocs_t *ocs = (ocs_t *)arg1; const struct firmware *fw; const struct ocs_hw_grp_hdr *fw_image; rc = sysctl_handle_string(oidp, file_name, sizeof(file_name), req); if (rc || !req->newptr) return rc; fw = firmware_get(file_name); if (fw == NULL) { device_printf(ocs->dev, "Unable to get Firmware. " "Make sure %s is copied to /boot/modules\n", file_name); return ENOENT; } fw_image = (const struct ocs_hw_grp_hdr *)fw->data; /* Check if firmware provided is compatible with this particular * Adapter of not*/ if ((ocs_be32toh(fw_image->magic_number) != OCS_HW_OBJECT_G5) && (ocs_be32toh(fw_image->magic_number) != OCS_HW_OBJECT_G6)) { device_printf(ocs->dev, "Invalid FW image found Magic: 0x%x Size: %zu \n", ocs_be32toh(fw_image->magic_number), fw->datasize); rc = -1; goto exit; } if (!strncmp(ocs->fw_version, fw_image->revision, strnlen(fw_image->revision, 16))) { device_printf(ocs->dev, "No update req. " "Firmware is already up to date. \n"); rc = 0; goto exit; } device_printf(ocs->dev, "Upgrading Firmware from %s to %s \n", ocs->fw_version, fw_image->revision); rc = ocs_firmware_write(ocs, fw->data, fw->datasize, &fw_change_status); if (rc) { ocs_log_err(ocs, "Firmware update failed with status = %d\n", rc); } else { ocs_log_info(ocs, "Firmware updated successfully\n"); switch (fw_change_status) { case 0x00: device_printf(ocs->dev, "No reset needed, new firmware is active.\n"); break; case 0x01: device_printf(ocs->dev, "A physical device reset (host reboot) is " "needed to activate the new firmware\n"); break; case 0x02: case 0x03: device_printf(ocs->dev, "firmware is resetting to activate the new " "firmware, Host reboot is needed \n"); break; default: ocs_log_warn(ocs, "Unexected value change_status: %d\n", fw_change_status); break; } } exit: /* Release Firmware*/ firmware_put(fw, FIRMWARE_UNLOAD); return rc; } static int ocs_sysctl_wwnn(SYSCTL_HANDLER_ARGS) { uint32_t rc = 1; ocs_t *ocs = oidp->oid_arg1; char old[64]; char new[64]; uint64_t *wwnn = NULL; ocs_xport_t *xport = ocs->xport; if (xport->req_wwnn) { wwnn = &xport->req_wwnn; memset(old, 0, sizeof(old)); snprintf(old, sizeof(old), "0x%llx" , (unsigned long long) *wwnn); } else { wwnn = ocs_hw_get_ptr(&ocs->hw, OCS_HW_WWN_NODE); memset(old, 0, sizeof(old)); snprintf(old, sizeof(old), "0x%llx" , (unsigned long long) ocs_htobe64(*wwnn)); } /*Read wwnn*/ if (!req->newptr) { return (sysctl_handle_string(oidp, old, sizeof(old), req)); } /*Configure port wwn*/ rc = sysctl_handle_string(oidp, new, sizeof(new), req); if (rc) return (rc); if (strncmp(old, new, strlen(old)) == 0) { return 0; } return (set_req_wwnn(ocs, NULL, new)); } static int ocs_sysctl_wwpn(SYSCTL_HANDLER_ARGS) { uint32_t rc = 1; ocs_t *ocs = oidp->oid_arg1; char old[64]; char new[64]; uint64_t *wwpn = NULL; ocs_xport_t *xport = ocs->xport; if (xport->req_wwpn) { wwpn = &xport->req_wwpn; memset(old, 0, sizeof(old)); snprintf(old, sizeof(old), "0x%llx",(unsigned long long) *wwpn); } else { wwpn = ocs_hw_get_ptr(&ocs->hw, OCS_HW_WWN_PORT); memset(old, 0, sizeof(old)); snprintf(old, sizeof(old), "0x%llx",(unsigned long long) ocs_htobe64(*wwpn)); } /*Read wwpn*/ if (!req->newptr) { return (sysctl_handle_string(oidp, old, sizeof(old), req)); } /*Configure port wwn*/ rc = sysctl_handle_string(oidp, new, sizeof(new), req); if (rc) return (rc); if (strncmp(old, new, strlen(old)) == 0) { return 0; } return (set_req_wwpn(ocs, NULL, new)); } static int ocs_sysctl_current_topology(SYSCTL_HANDLER_ARGS) { ocs_t *ocs = oidp->oid_arg1; uint32_t value; ocs_hw_get(&ocs->hw, OCS_HW_TOPOLOGY, &value); return (sysctl_handle_int(oidp, &value, 0, req)); } static int ocs_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { ocs_t *ocs = oidp->oid_arg1; uint32_t value; ocs_hw_get(&ocs->hw, OCS_HW_LINK_SPEED, &value); return (sysctl_handle_int(oidp, &value, 0, req)); } static int ocs_sysctl_config_topology(SYSCTL_HANDLER_ARGS) { uint32_t rc = 1; ocs_t *ocs = oidp->oid_arg1; uint32_t old_value; uint32_t new_value; char buf[64]; ocs_hw_get(&ocs->hw, OCS_HW_CONFIG_TOPOLOGY, &old_value); /*Read topo*/ if (!req->newptr) { return (sysctl_handle_int(oidp, &old_value, 0, req)); } /*Configure port wwn*/ rc = sysctl_handle_int(oidp, &new_value, 0, req); if (rc) return (rc); if (new_value == old_value) { return 0; } snprintf(buf, sizeof(buf), "%d",new_value); rc = set_configured_topology(ocs, NULL, buf); return rc; } static int ocs_sysctl_config_speed(SYSCTL_HANDLER_ARGS) { uint32_t rc = 1; ocs_t *ocs = oidp->oid_arg1; uint32_t old_value; uint32_t new_value; char buf[64]; ocs_hw_get(&ocs->hw, OCS_HW_LINK_CONFIG_SPEED, &old_value); /*Read topo*/ if (!req->newptr) { return (sysctl_handle_int(oidp, &old_value, 0, req)); } /*Configure port wwn*/ rc = sysctl_handle_int(oidp, &new_value, 0, req); if (rc) return (rc); if (new_value == old_value) { return 0; } snprintf(buf, sizeof(buf), "%d",new_value); rc = set_configured_speed(ocs, NULL,buf); return rc; } static int ocs_sysctl_fcid(SYSCTL_HANDLER_ARGS) { ocs_t *ocs = oidp->oid_arg1; char buf[64]; memset(buf, 0, sizeof(buf)); if (ocs->domain && ocs->domain->attached) { snprintf(buf, sizeof(buf), "0x%06x", ocs->domain->sport->fc_id); } return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int ocs_sysctl_port_state(SYSCTL_HANDLER_ARGS) { char new[256] = {0}; uint32_t rc = 1; ocs_xport_stats_t old; ocs_t *ocs = (ocs_t *)arg1; ocs_xport_status(ocs->xport, OCS_XPORT_CONFIG_PORT_STATUS, &old); /*Read port state */ if (!req->newptr) { snprintf(new, sizeof(new), "%s", (old.value == OCS_XPORT_PORT_OFFLINE) ? "offline" : "online"); return (sysctl_handle_string(oidp, new, sizeof(new), req)); } /*Configure port state*/ rc = sysctl_handle_string(oidp, new, sizeof(new), req); if (rc) return (rc); if (ocs_strcasecmp(new, "offline") == 0) { if (old.value == OCS_XPORT_PORT_OFFLINE) { return (0); } ocs_log_debug(ocs, "Setting port to %s\n", new); rc = ocs_xport_control(ocs->xport, OCS_XPORT_PORT_OFFLINE); if (rc != 0) { ocs_log_err(ocs, "Setting port to offline failed\n"); } } else if (ocs_strcasecmp(new, "online") == 0) { if (old.value == OCS_XPORT_PORT_ONLINE) { return (0); } ocs_log_debug(ocs, "Setting port to %s\n", new); rc = ocs_xport_control(ocs->xport, OCS_XPORT_PORT_ONLINE); if (rc != 0) { ocs_log_err(ocs, "Setting port to online failed\n"); } } else { ocs_log_err(ocs, "Unsupported link state %s\n", new); rc = 1; } return (rc); } static int ocs_sysctl_vport_wwpn(SYSCTL_HANDLER_ARGS) { ocs_fcport *fcp = oidp->oid_arg1; char str_wwpn[64]; memset(str_wwpn, 0, sizeof(str_wwpn)); snprintf(str_wwpn, sizeof(str_wwpn), "0x%llx", (unsigned long long)fcp->vport->wwpn); return (sysctl_handle_string(oidp, str_wwpn, sizeof(str_wwpn), req)); } static int ocs_sysctl_vport_wwnn(SYSCTL_HANDLER_ARGS) { ocs_fcport *fcp = oidp->oid_arg1; char str_wwnn[64]; memset(str_wwnn, 0, sizeof(str_wwnn)); snprintf(str_wwnn, sizeof(str_wwnn), "0x%llx", (unsigned long long)fcp->vport->wwnn); return (sysctl_handle_string(oidp, str_wwnn, sizeof(str_wwnn), req)); } /** * @brief Initialize sysctl * * Initialize sysctl so elxsdkutil can query device information. * * @param ocs pointer to ocs * @return void */ static void ocs_sysctl_init(ocs_t *ocs) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(ocs->dev); struct sysctl_oid *tree = device_get_sysctl_tree(ocs->dev); struct sysctl_oid *vtree; const char *str = NULL; char sli_intf[16], name[16]; uint32_t rev, if_type, family, i; ocs_fcport *fcp = NULL; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "devid", CTLFLAG_RD, NULL, pci_get_devid(ocs->dev), "Device ID"); memset(ocs->modeldesc, 0, sizeof(ocs->modeldesc)); if (0 == pci_get_vpd_ident(ocs->dev, &str)) { snprintf(ocs->modeldesc, sizeof(ocs->modeldesc), "%s", str); } SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "modeldesc", CTLFLAG_RD, ocs->modeldesc, 0, "Model Description"); memset(ocs->serialnum, 0, sizeof(ocs->serialnum)); if (0 == pci_get_vpd_readonly(ocs->dev, "SN", &str)) { snprintf(ocs->serialnum, sizeof(ocs->serialnum), "%s", str); } SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "sn", CTLFLAG_RD, ocs->serialnum, 0, "Serial Number"); ocs_hw_get(&ocs->hw, OCS_HW_SLI_REV, &rev); ocs_hw_get(&ocs->hw, OCS_HW_IF_TYPE, &if_type); ocs_hw_get(&ocs->hw, OCS_HW_SLI_FAMILY, &family); memset(ocs->fwrev, 0, sizeof(ocs->fwrev)); snprintf(ocs->fwrev, sizeof(ocs->fwrev), "%s, sli-%d:%d:%x", (char *)ocs_hw_get_ptr(&ocs->hw, OCS_HW_FW_REV), rev, if_type, family); SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "fwrev", CTLFLAG_RD, ocs->fwrev, 0, "Firmware Revision"); memset(ocs->sli_intf, 0, sizeof(ocs->sli_intf)); snprintf(ocs->sli_intf, sizeof(sli_intf), "%08x", ocs_config_read32(ocs, SLI4_INTF_REG)); SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "sli_intf", CTLFLAG_RD, ocs->sli_intf, 0, "SLI Interface"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "fw_upgrade", CTLTYPE_STRING | CTLFLAG_RW, (void *)ocs, 0, ocs_sys_fwupgrade, "A", "Firmware grp file"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "wwnn", CTLTYPE_STRING | CTLFLAG_RW, ocs, 0, ocs_sysctl_wwnn, "A", "World Wide Node Name, wwnn should be in the format 0x"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "wwpn", CTLTYPE_STRING | CTLFLAG_RW, ocs, 0, ocs_sysctl_wwpn, "A", "World Wide Port Name, wwpn should be in the format 0x"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "current_topology", CTLTYPE_UINT | CTLFLAG_RD, ocs, 0, ocs_sysctl_current_topology, "IU", "Current Topology, 1-NPort; 2-Loop; 3-None"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "current_speed", CTLTYPE_UINT | CTLFLAG_RD, ocs, 0, ocs_sysctl_current_speed, "IU", "Current Speed"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "configured_topology", CTLTYPE_UINT | CTLFLAG_RW, ocs, 0, ocs_sysctl_config_topology, "IU", "Configured Topology, 0-Auto; 1-NPort; 2-Loop"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "configured_speed", CTLTYPE_UINT | CTLFLAG_RW, ocs, 0, ocs_sysctl_config_speed, "IU", "Configured Speed, 0-Auto, 2000, 4000, 8000, 16000, 32000"); SYSCTL_ADD_STRING(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "businfo", CTLFLAG_RD, ocs->businfo, 0, "Bus Info"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "fcid", CTLTYPE_STRING | CTLFLAG_RD, ocs, 0, ocs_sysctl_fcid, "A", "Port FC ID"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "port_state", CTLTYPE_STRING | CTLFLAG_RW, ocs, 0, ocs_sysctl_port_state, "A", "configured port state"); for (i = 0; i < ocs->num_vports; i++) { fcp = FCPORT(ocs, i+1); memset(name, 0, sizeof(name)); snprintf(name, sizeof(name), "vport%d", i); vtree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, name, CTLFLAG_RW, 0, "Virtual port"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(vtree), OID_AUTO, "wwnn", CTLTYPE_STRING | CTLFLAG_RW, fcp, 0, ocs_sysctl_vport_wwnn, "A", "World Wide Node Name"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(vtree), OID_AUTO, "wwpn", CTLTYPE_STRING | CTLFLAG_RW, fcp, 0, ocs_sysctl_vport_wwpn, "A", "World Wide Port Name"); } } /** * @brief Initialize the debug module * * Parse device hints (similar to Linux module parameters) here. To use, * run the command * kenv hint.ocs.U.P=V * from the command line replacing U with the unit # (0,1,...), * P with the parameter name (debug_mask), and V with the value */ void ocs_debug_attach(void *os) { struct ocs_softc *ocs = os; int error = 0; char *resname = NULL; int32_t unit = INT32_MAX; uint32_t ocs_debug_mask = 0; resname = "debug_mask"; if (0 == (error = resource_int_value(device_get_name(ocs->dev), device_get_unit(ocs->dev), resname, &ocs_debug_mask))) { device_printf(ocs->dev, "setting %s to %010x\n", resname, ocs_debug_mask); ocs_debug_enable(ocs_debug_mask); } unit = device_get_unit(ocs->dev); ocs->cdev = make_dev(&ocs_cdevsw, unit, UID_ROOT, GID_OPERATOR, 0640, "ocs%d", unit); if (ocs->cdev) { ocs->cdev->si_drv1 = ocs; } /* initialize sysctl interface */ ocs_sysctl_init(ocs); mtx_init(&ocs->dbg_lock, "ocs_dbg_lock", NULL, MTX_DEF); } /** * @brief Free the debug module */ void ocs_debug_detach(void *os) { struct ocs_softc *ocs = os; mtx_destroy(&ocs->dbg_lock); if (ocs->cdev) { ocs->cdev->si_drv1 = NULL; destroy_dev(ocs->cdev); } } Index: projects/pnfs-planb-server/sys/dev/ocs_fc/ocs_sport.c =================================================================== --- projects/pnfs-planb-server/sys/dev/ocs_fc/ocs_sport.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/ocs_fc/ocs_sport.c (revision 334967) @@ -1,1926 +1,1926 @@ /*- * Copyright (c) 2017 Broadcom. All rights reserved. * The term "Broadcom" refers to Broadcom Limited and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * @file * Details SLI port (sport) functions. */ #include "ocs.h" #include "ocs_fabric.h" #include "ocs_els.h" #include "ocs_device.h" static void ocs_vport_update_spec(ocs_sport_t *sport); static void ocs_vport_link_down(ocs_sport_t *sport); void ocs_mgmt_sport_list(ocs_textbuf_t *textbuf, void *sport); void ocs_mgmt_sport_get_all(ocs_textbuf_t *textbuf, void *sport); int ocs_mgmt_sport_get(ocs_textbuf_t *textbuf, char *parent, char *name, void *sport); int ocs_mgmt_sport_set(char *parent, char *name, char *value, void *sport); int ocs_mgmt_sport_exec(char *parent, char *action, void *arg_in, uint32_t arg_in_length, void *arg_out, uint32_t arg_out_length, void *sport); static ocs_mgmt_functions_t sport_mgmt_functions = { .get_list_handler = ocs_mgmt_sport_list, .get_handler = ocs_mgmt_sport_get, .get_all_handler = ocs_mgmt_sport_get_all, .set_handler = ocs_mgmt_sport_set, .exec_handler = ocs_mgmt_sport_exec, }; /*! @defgroup sport_sm SLI Port (sport) State Machine: States */ /** * @ingroup sport_sm * @brief SLI port HW callback. * * @par Description * This function is called in response to a HW sport event. This code resolves * the reference to the sport object, and posts the corresponding event. * * @param arg Pointer to the OCS context. * @param event HW sport event. * @param data Application-specific event (pointer to the sport). * * @return Returns 0 on success, or a negative error value on failure. */ int32_t ocs_port_cb(void *arg, ocs_hw_port_event_e event, void *data) { ocs_t *ocs = arg; ocs_sli_port_t *sport = data; switch (event) { case OCS_HW_PORT_ALLOC_OK: ocs_log_debug(ocs, "OCS_HW_PORT_ALLOC_OK\n"); ocs_sm_post_event(&sport->sm, OCS_EVT_SPORT_ALLOC_OK, NULL); break; case OCS_HW_PORT_ALLOC_FAIL: ocs_log_debug(ocs, "OCS_HW_PORT_ALLOC_FAIL\n"); ocs_sm_post_event(&sport->sm, OCS_EVT_SPORT_ALLOC_FAIL, NULL); break; case OCS_HW_PORT_ATTACH_OK: ocs_log_debug(ocs, "OCS_HW_PORT_ATTACH_OK\n"); ocs_sm_post_event(&sport->sm, OCS_EVT_SPORT_ATTACH_OK, NULL); break; case OCS_HW_PORT_ATTACH_FAIL: ocs_log_debug(ocs, "OCS_HW_PORT_ATTACH_FAIL\n"); ocs_sm_post_event(&sport->sm, OCS_EVT_SPORT_ATTACH_FAIL, NULL); break; case OCS_HW_PORT_FREE_OK: ocs_log_debug(ocs, "OCS_HW_PORT_FREE_OK\n"); ocs_sm_post_event(&sport->sm, OCS_EVT_SPORT_FREE_OK, NULL); break; case OCS_HW_PORT_FREE_FAIL: ocs_log_debug(ocs, "OCS_HW_PORT_FREE_FAIL\n"); ocs_sm_post_event(&sport->sm, OCS_EVT_SPORT_FREE_FAIL, NULL); break; default: ocs_log_test(ocs, "unknown event %#x\n", event); } return 0; } /** * @ingroup sport_sm * @brief Allocate a SLI port object. * * @par Description * A sport object is allocated and associated with the domain. Various * structure members are initialized. * * @param domain Pointer to the domain structure. * @param wwpn World wide port name in host endian. * @param wwnn World wide node name in host endian. * @param fc_id Port ID of sport may be specified, use UINT32_MAX to fabric choose * @param enable_ini Enables initiator capability on this port using a non-zero value. * @param enable_tgt Enables target capability on this port using a non-zero value. * * @return Pointer to an ocs_sport_t object; or NULL. */ ocs_sport_t * ocs_sport_alloc(ocs_domain_t *domain, uint64_t wwpn, uint64_t wwnn, uint32_t fc_id, uint8_t enable_ini, uint8_t enable_tgt) { ocs_sport_t *sport; if (domain->ocs->ctrlmask & OCS_CTRLMASK_INHIBIT_INITIATOR) { enable_ini = 0; } /* Return a failure if this sport has already been allocated */ if (wwpn != 0) { sport = ocs_sport_find_wwn(domain, wwnn, wwpn); if (sport != NULL) { ocs_log_test(domain->ocs, "Failed: SPORT %016llx %016llx already allocated\n", (unsigned long long)wwnn, (unsigned long long)wwpn); return NULL; } } sport = ocs_malloc(domain->ocs, sizeof(*sport), OCS_M_NOWAIT | OCS_M_ZERO); if (sport) { sport->ocs = domain->ocs; ocs_snprintf(sport->display_name, sizeof(sport->display_name), "------"); sport->domain = domain; sport->lookup = spv_new(domain->ocs); sport->instance_index = domain->sport_instance_count++; ocs_sport_lock_init(sport); ocs_list_init(&sport->node_list, ocs_node_t, link); sport->sm.app = sport; sport->enable_ini = enable_ini; sport->enable_tgt = enable_tgt; sport->enable_rscn = (sport->enable_ini || (sport->enable_tgt && enable_target_rscn(sport->ocs))); /* Copy service parameters from domain */ ocs_memcpy(sport->service_params, domain->service_params, sizeof(fc_plogi_payload_t)); /* Update requested fc_id */ sport->fc_id = fc_id; /* Update the sport's service parameters for the new wwn's */ sport->wwpn = wwpn; sport->wwnn = wwnn; ocs_snprintf(sport->wwnn_str, sizeof(sport->wwnn_str), "%016llx" , (unsigned long long)wwnn); /* Initialize node group list */ ocs_lock_init(sport->ocs, &sport->node_group_lock, "node_group_lock[%d]", sport->instance_index); ocs_list_init(&sport->node_group_dir_list, ocs_node_group_dir_t, link); /* if this is the "first" sport of the domain, then make it the "phys" sport */ ocs_domain_lock(domain); if (ocs_list_empty(&domain->sport_list)) { domain->sport = sport; } ocs_list_add_tail(&domain->sport_list, sport); ocs_domain_unlock(domain); sport->mgmt_functions = &sport_mgmt_functions; ocs_log_debug(domain->ocs, "[%s] allocate sport\n", sport->display_name); } return sport; } /** * @ingroup sport_sm * @brief Free a SLI port object. * * @par Description * The sport object is freed. * * @param sport Pointer to the SLI port object. * * @return None. */ void ocs_sport_free(ocs_sport_t *sport) { ocs_domain_t *domain; ocs_node_group_dir_t *node_group_dir; ocs_node_group_dir_t *node_group_dir_next; int post_all_free = FALSE; if (sport) { domain = sport->domain; ocs_log_debug(domain->ocs, "[%s] free sport\n", sport->display_name); ocs_domain_lock(domain); ocs_list_remove(&domain->sport_list, sport); ocs_sport_lock(sport); spv_del(sport->lookup); sport->lookup = NULL; ocs_lock(&domain->lookup_lock); /* Remove the sport from the domain's sparse vector lookup table */ spv_set(domain->lookup, sport->fc_id, NULL); ocs_unlock(&domain->lookup_lock); /* if this is the physical sport, then clear it out of the domain */ if (sport == domain->sport) { domain->sport = NULL; } /* * If the domain's sport_list is empty, then post the ALL_NODES_FREE event to the domain, * after the lock is released. The domain may be free'd as a result of the event. */ if (ocs_list_empty(&domain->sport_list)) { post_all_free = TRUE; } /* Free any node group directories */ ocs_lock(&sport->node_group_lock); ocs_list_foreach_safe(&sport->node_group_dir_list, node_group_dir, node_group_dir_next) { ocs_unlock(&sport->node_group_lock); ocs_node_group_dir_free(node_group_dir); ocs_lock(&sport->node_group_lock); } ocs_unlock(&sport->node_group_lock); ocs_sport_unlock(sport); ocs_domain_unlock(domain); if (post_all_free) { ocs_domain_post_event(domain, OCS_EVT_ALL_CHILD_NODES_FREE, NULL); } ocs_sport_lock_free(sport); ocs_lock_free(&sport->node_group_lock); ocs_scsi_sport_deleted(sport); ocs_free(domain->ocs, sport, sizeof(*sport)); } } /** * @ingroup sport_sm * @brief Free memory resources of a SLI port object. * * @par Description * After the sport object is freed, its child objects are freed. * * @param sport Pointer to the SLI port object. * * @return None. */ void ocs_sport_force_free(ocs_sport_t *sport) { ocs_node_t *node; ocs_node_t *next; /* shutdown sm processing */ ocs_sm_disable(&sport->sm); ocs_scsi_notify_sport_force_free(sport); ocs_sport_lock(sport); ocs_list_foreach_safe(&sport->node_list, node, next) { ocs_node_force_free(node); } ocs_sport_unlock(sport); ocs_sport_free(sport); } /** * @ingroup sport_sm * @brief Return a SLI port object, given an instance index. * * @par Description * A pointer to a sport object is returned, given its instance @c index. * * @param domain Pointer to the domain. * @param index Instance index value to find. * * @return Returns a pointer to the ocs_sport_t object; or NULL. */ ocs_sport_t * ocs_sport_get_instance(ocs_domain_t *domain, uint32_t index) { ocs_sport_t *sport; ocs_domain_lock(domain); ocs_list_foreach(&domain->sport_list, sport) { if (sport->instance_index == index) { ocs_domain_unlock(domain); return sport; } } ocs_domain_unlock(domain); return NULL; } /** * @ingroup sport_sm * @brief Find a SLI port object, given an FC_ID. * * @par Description * Returns a pointer to the sport object, given an FC_ID. * * @param domain Pointer to the domain. * @param d_id FC_ID to find. * * @return Returns a pointer to the ocs_sport_t; or NULL. */ ocs_sport_t * ocs_sport_find(ocs_domain_t *domain, uint32_t d_id) { ocs_sport_t *sport; ocs_assert(domain, NULL); ocs_lock(&domain->lookup_lock); if (domain->lookup == NULL) { ocs_log_test(domain->ocs, "assertion failed: domain->lookup is not valid\n"); ocs_unlock(&domain->lookup_lock); return NULL; } sport = spv_get(domain->lookup, d_id); ocs_unlock(&domain->lookup_lock); return sport; } /** * @ingroup sport_sm * @brief Find a SLI port, given the WWNN and WWPN. * * @par Description * Return a pointer to a sport, given the WWNN and WWPN. * * @param domain Pointer to the domain. * @param wwnn World wide node name. * @param wwpn World wide port name. * * @return Returns a pointer to a SLI port, if found; or NULL. */ ocs_sport_t * ocs_sport_find_wwn(ocs_domain_t *domain, uint64_t wwnn, uint64_t wwpn) { ocs_sport_t *sport = NULL; ocs_domain_lock(domain); ocs_list_foreach(&domain->sport_list, sport) { if ((sport->wwnn == wwnn) && (sport->wwpn == wwpn)) { ocs_domain_unlock(domain); return sport; } } ocs_domain_unlock(domain); return NULL; } /** * @ingroup sport_sm * @brief Request a SLI port attach. * * @par Description * External call to request an attach for a sport, given an FC_ID. * * @param sport Pointer to the sport context. * @param fc_id FC_ID of which to attach. * * @return Returns 0 on success, or a negative error value on failure. */ int32_t ocs_sport_attach(ocs_sport_t *sport, uint32_t fc_id) { ocs_hw_rtn_e rc; ocs_node_t *node; /* Set our lookup */ ocs_lock(&sport->domain->lookup_lock); spv_set(sport->domain->lookup, fc_id, sport); ocs_unlock(&sport->domain->lookup_lock); /* Update our display_name */ ocs_node_fcid_display(fc_id, sport->display_name, sizeof(sport->display_name)); ocs_sport_lock(sport); ocs_list_foreach(&sport->node_list, node) { ocs_node_update_display_name(node); } ocs_sport_unlock(sport); ocs_log_debug(sport->ocs, "[%s] attach sport: fc_id x%06x\n", sport->display_name, fc_id); rc = ocs_hw_port_attach(&sport->ocs->hw, sport, fc_id); if (rc != OCS_HW_RTN_SUCCESS) { ocs_log_err(sport->ocs, "ocs_hw_port_attach failed: %d\n", rc); return -1; } return 0; } /** * @brief Common SLI port state machine declarations and initialization. */ #define std_sport_state_decl() \ ocs_sport_t *sport = NULL; \ ocs_domain_t *domain = NULL; \ ocs_t *ocs = NULL; \ \ ocs_assert(ctx, NULL); \ sport = ctx->app; \ ocs_assert(sport, NULL); \ \ domain = sport->domain; \ ocs_assert(domain, NULL); \ ocs = sport->ocs; \ ocs_assert(ocs, NULL); /** * @brief Common SLI port state machine trace logging. */ #define sport_sm_trace(sport) \ do { \ if (OCS_LOG_ENABLE_DOMAIN_SM_TRACE(ocs)) \ ocs_log_debug(ocs, "[%s] %-20s\n", sport->display_name, ocs_sm_event_name(evt)); \ } while (0) /** * @brief SLI port state machine: Common event handler. * * @par Description * Handle common sport events. * * @param funcname Function name to display. * @param ctx Sport state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ static void * __ocs_sport_common(const char *funcname, ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); switch(evt) { case OCS_EVT_ENTER: case OCS_EVT_REENTER: case OCS_EVT_EXIT: case OCS_EVT_ALL_CHILD_NODES_FREE: break; case OCS_EVT_SPORT_ATTACH_OK: ocs_sm_transition(ctx, __ocs_sport_attached, NULL); break; case OCS_EVT_SHUTDOWN: { ocs_node_t *node; ocs_node_t *node_next; int node_list_empty; /* Flag this sport as shutting down */ sport->shutting_down = 1; if (sport->is_vport) { ocs_vport_link_down(sport); } ocs_sport_lock(sport); node_list_empty = ocs_list_empty(&sport->node_list); ocs_sport_unlock(sport); if (node_list_empty) { /* sm: node list is empty / ocs_hw_port_free * Remove the sport from the domain's sparse vector lookup table */ ocs_lock(&domain->lookup_lock); spv_set(domain->lookup, sport->fc_id, NULL); ocs_unlock(&domain->lookup_lock); ocs_sm_transition(ctx, __ocs_sport_wait_port_free, NULL); if (ocs_hw_port_free(&ocs->hw, sport)) { ocs_log_test(sport->ocs, "ocs_hw_port_free failed\n"); /* Not much we can do, free the sport anyways */ ocs_sport_free(sport); } } else { /* sm: node list is not empty / shutdown nodes */ ocs_sm_transition(ctx, __ocs_sport_wait_shutdown, NULL); ocs_sport_lock(sport); ocs_list_foreach_safe(&sport->node_list, node, node_next) { /* * If this is a vport, logout of the fabric controller so that it * deletes the vport on the switch. */ if((node->rnode.fc_id == FC_ADDR_FABRIC) && (sport->is_vport)) { /* if link is down, don't send logo */ if (sport->ocs->hw.link.status == SLI_LINK_STATUS_DOWN) { ocs_node_post_event(node, OCS_EVT_SHUTDOWN, NULL); } else { ocs_log_debug(ocs,"[%s] sport shutdown vport,sending logo to node\n", node->display_name); if (ocs_send_logo(node, OCS_FC_ELS_SEND_DEFAULT_TIMEOUT, 0, NULL, NULL) == NULL) { /* failed to send LOGO, go ahead and cleanup node anyways */ node_printf(node, "Failed to send LOGO\n"); ocs_node_post_event(node, OCS_EVT_SHUTDOWN_EXPLICIT_LOGO, NULL); } else { /* sent LOGO, wait for response */ ocs_node_transition(node, __ocs_d_wait_logo_rsp, NULL); } } } else { ocs_node_post_event(node, OCS_EVT_SHUTDOWN, NULL); } } ocs_sport_unlock(sport); } break; } default: ocs_log_test(sport->ocs, "[%s] %-20s %-20s not handled\n", sport->display_name, funcname, ocs_sm_event_name(evt)); break; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: Physical sport allocated. * * @par Description * This is the initial state for sport objects. * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_allocated(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { /* the physical sport is attached */ case OCS_EVT_SPORT_ATTACH_OK: ocs_assert(sport == domain->sport, NULL); ocs_sm_transition(ctx, __ocs_sport_attached, NULL); break; case OCS_EVT_SPORT_ALLOC_OK: /* ignore */ break; default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: Handle initial virtual port events. * * @par Description * This state is entered when a virtual port is instantiated, * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_vport_init(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { case OCS_EVT_ENTER: { uint64_t be_wwpn = ocs_htobe64(sport->wwpn); if (sport->wwpn == 0) { ocs_log_debug(ocs, "vport: letting f/w select WWN\n"); } if (sport->fc_id != UINT32_MAX) { ocs_log_debug(ocs, "vport: hard coding port id: %x\n", sport->fc_id); } ocs_sm_transition(ctx, __ocs_sport_vport_wait_alloc, NULL); /* If wwpn is zero, then we'll let the f/w */ if (ocs_hw_port_alloc(&ocs->hw, sport, sport->domain, (sport->wwpn == 0) ? NULL : (uint8_t *)&be_wwpn)) { ocs_log_err(ocs, "Can't allocate port\n"); break; } break; } default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: Wait for the HW SLI port allocation to complete. * * @par Description * Waits for the HW sport allocation request to complete. * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_vport_wait_alloc(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { case OCS_EVT_SPORT_ALLOC_OK: { fc_plogi_payload_t *sp = (fc_plogi_payload_t*) sport->service_params; ocs_node_t *fabric; /* If we let f/w assign wwn's, then sport wwn's with those returned by hw */ if (sport->wwnn == 0) { sport->wwnn = ocs_be64toh(sport->sli_wwnn); sport->wwpn = ocs_be64toh(sport->sli_wwpn); ocs_snprintf(sport->wwnn_str, sizeof(sport->wwnn_str), "%016llx", (unsigned long long) sport->wwpn); } /* Update the sport's service parameters */ sp->port_name_hi = ocs_htobe32((uint32_t) (sport->wwpn >> 32ll)); sp->port_name_lo = ocs_htobe32((uint32_t) sport->wwpn); sp->node_name_hi = ocs_htobe32((uint32_t) (sport->wwnn >> 32ll)); sp->node_name_lo = ocs_htobe32((uint32_t) sport->wwnn); /* if sport->fc_id is uninitialized, then request that the fabric node use FDISC * to find an fc_id. Otherwise we're restoring vports, or we're in * fabric emulation mode, so attach the fc_id */ if (sport->fc_id == UINT32_MAX) { fabric = ocs_node_alloc(sport, FC_ADDR_FABRIC, FALSE, FALSE); if (fabric == NULL) { ocs_log_err(ocs, "ocs_node_alloc() failed\n"); return NULL; } ocs_node_transition(fabric, __ocs_vport_fabric_init, NULL); } else { ocs_snprintf(sport->wwnn_str, sizeof(sport->wwnn_str), "%016llx", (unsigned long long)sport->wwpn); ocs_sport_attach(sport, sport->fc_id); } ocs_sm_transition(ctx, __ocs_sport_vport_allocated, NULL); break; } default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: virtual sport allocated. * * @par Description * This state is entered after the sport is allocated; it then waits for a fabric node * FDISC to complete, which requests a sport attach. * The sport attach complete is handled in this state. * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_vport_allocated(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { case OCS_EVT_SPORT_ATTACH_OK: { ocs_node_t *node; if (!(domain->femul_enable)) { /* Find our fabric node, and forward this event */ node = ocs_node_find(sport, FC_ADDR_FABRIC); if (node == NULL) { ocs_log_test(ocs, "can't find node %06x\n", FC_ADDR_FABRIC); break; } /* sm: / forward sport attach to fabric node */ ocs_node_post_event(node, evt, NULL); } ocs_sm_transition(ctx, __ocs_sport_attached, NULL); break; } default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: Attached. * * @par Description * State entered after the sport attach has completed. * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_attached(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { case OCS_EVT_ENTER: { ocs_node_t *node; - ocs_log_debug(ocs, "[%s] SPORT attached WWPN %016llx WWNN %016llx \n", (unsigned long long)sport->display_name, + ocs_log_debug(ocs, "[%s] SPORT attached WWPN %016llx WWNN %016llx \n", sport->display_name, sport->wwpn, sport->wwnn); ocs_sport_lock(sport); ocs_list_foreach(&sport->node_list, node) { ocs_node_update_display_name(node); } ocs_sport_unlock(sport); sport->tgt_id = sport->fc_id; if (sport->enable_ini) { ocs_scsi_ini_new_sport(sport); } if (sport->enable_tgt) { ocs_scsi_tgt_new_sport(sport); } /* Update the vport (if its not the physical sport) parameters */ if (sport->is_vport) { ocs_vport_update_spec(sport); } break; } case OCS_EVT_EXIT: - ocs_log_debug(ocs, "[%s] SPORT deattached WWPN %016llx WWNN %016llx \n", (unsigned long long)sport->display_name, + ocs_log_debug(ocs, "[%s] SPORT deattached WWPN %016llx WWNN %016llx \n", sport->display_name, sport->wwpn, sport->wwnn); if (sport->enable_ini) { ocs_scsi_ini_del_sport(sport); } if (sport->enable_tgt) { ocs_scsi_tgt_del_sport(sport); } break; default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: Wait for the node shutdowns to complete. * * @par Description * Waits for the ALL_CHILD_NODES_FREE event to be posted from the node * shutdown process. * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_wait_shutdown(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { case OCS_EVT_SPORT_ALLOC_OK: case OCS_EVT_SPORT_ALLOC_FAIL: case OCS_EVT_SPORT_ATTACH_OK: case OCS_EVT_SPORT_ATTACH_FAIL: /* ignore these events - just wait for the all free event */ break; case OCS_EVT_ALL_CHILD_NODES_FREE: { /* Remove the sport from the domain's sparse vector lookup table */ ocs_lock(&domain->lookup_lock); spv_set(domain->lookup, sport->fc_id, NULL); ocs_unlock(&domain->lookup_lock); ocs_sm_transition(ctx, __ocs_sport_wait_port_free, NULL); if (ocs_hw_port_free(&ocs->hw, sport)) { ocs_log_err(sport->ocs, "ocs_hw_port_free failed\n"); /* Not much we can do, free the sport anyways */ ocs_sport_free(sport); } break; } default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief SLI port state machine: Wait for the HW's port free to complete. * * @par Description * Waits for the HW's port free to complete. * * @param ctx Remote node state machine context. * @param evt Event to process. * @param arg Per event optional argument. * * @return Returns NULL. */ void * __ocs_sport_wait_port_free(ocs_sm_ctx_t *ctx, ocs_sm_event_t evt, void *arg) { std_sport_state_decl(); sport_sm_trace(sport); switch(evt) { case OCS_EVT_SPORT_ATTACH_OK: /* Ignore as we are waiting for the free CB */ break; case OCS_EVT_SPORT_FREE_OK: { /* All done, free myself */ ocs_sport_free(sport); break; } default: __ocs_sport_common(__func__, ctx, evt, arg); return NULL; } return NULL; } /** * @ingroup sport_sm * @brief Start the vports on a domain * * @par Description * Use the vport specification to find the associated vports and start them. * * @param domain Pointer to the domain context. * * @return Returns 0 on success, or a negative error value on failure. */ int32_t ocs_vport_start(ocs_domain_t *domain) { ocs_t *ocs = domain->ocs; ocs_xport_t *xport = ocs->xport; ocs_vport_spec_t *vport; ocs_vport_spec_t *next; ocs_sport_t *sport; int32_t rc = 0; ocs_device_lock(ocs); ocs_list_foreach_safe(&xport->vport_list, vport, next) { if (vport->domain_instance == domain->instance_index && vport->sport == NULL) { /* If role not set, skip this vport */ if (!(vport->enable_ini || vport->enable_tgt)) { continue; } /* Allocate a sport */ vport->sport = sport = ocs_sport_alloc(domain, vport->wwpn, vport->wwnn, vport->fc_id, vport->enable_ini, vport->enable_tgt); if (sport == NULL) { rc = -1; } else { sport->is_vport = 1; sport->tgt_data = vport->tgt_data; sport->ini_data = vport->ini_data; /* Transition to vport_init */ ocs_sm_transition(&sport->sm, __ocs_sport_vport_init, NULL); } } } ocs_device_unlock(ocs); return rc; } /** * @ingroup sport_sm * @brief Clear the sport reference in the vport specification. * * @par Description * Clear the sport pointer on the vport specification when the vport is torn down. This allows it to be * re-created when the link is re-established. * * @param sport Pointer to the sport context. */ static void ocs_vport_link_down(ocs_sport_t *sport) { ocs_t *ocs = sport->ocs; ocs_xport_t *xport = ocs->xport; ocs_vport_spec_t *vport; ocs_device_lock(ocs); ocs_list_foreach(&xport->vport_list, vport) { if (vport->sport == sport) { vport->sport = NULL; break; } } ocs_device_unlock(ocs); } /** * @ingroup sport_sm * @brief Allocate a new virtual SLI port. * * @par Description * A new sport is created, in response to an external management request. * * @n @b Note: If the WWPN is zero, the firmware will assign the WWNs. * * @param domain Pointer to the domain context. * @param wwpn World wide port name. * @param wwnn World wide node name * @param fc_id Requested port ID (used in fabric emulation mode). * @param ini TRUE, if port is created as an initiator node. * @param tgt TRUE, if port is created as a target node. * @param tgt_data Pointer to target specific data * @param ini_data Pointer to initiator specific data * @param restore_vport If TRUE, then the vport will be re-created automatically * on link disruption. * * @return Returns 0 on success; or a negative error value on failure. */ int32_t ocs_sport_vport_new(ocs_domain_t *domain, uint64_t wwpn, uint64_t wwnn, uint32_t fc_id, uint8_t ini, uint8_t tgt, void *tgt_data, void *ini_data, uint8_t restore_vport) { ocs_sport_t *sport; if (ini && (domain->ocs->enable_ini == 0)) { ocs_log_test(domain->ocs, "driver initiator functionality not enabled\n"); return -1; } if (tgt && (domain->ocs->enable_tgt == 0)) { ocs_log_test(domain->ocs, "driver target functionality not enabled\n"); return -1; } /* Create a vport spec if we need to recreate this vport after a link up event */ if (restore_vport) { if (ocs_vport_create_spec(domain->ocs, wwnn, wwpn, fc_id, ini, tgt, tgt_data, ini_data)) { ocs_log_test(domain->ocs, "failed to create vport object entry\n"); return -1; } return ocs_vport_start(domain); } /* Allocate a sport */ sport = ocs_sport_alloc(domain, wwpn, wwnn, fc_id, ini, tgt); if (sport == NULL) { return -1; } sport->is_vport = 1; sport->tgt_data = tgt_data; sport->ini_data = ini_data; /* Transition to vport_init */ ocs_sm_transition(&sport->sm, __ocs_sport_vport_init, NULL); return 0; } int32_t ocs_sport_vport_alloc(ocs_domain_t *domain, ocs_vport_spec_t *vport) { ocs_sport_t *sport = NULL; if (domain == NULL) { return (0); } ocs_assert((vport->sport == NULL), -1); /* Allocate a sport */ vport->sport = sport = ocs_sport_alloc(domain, vport->wwpn, vport->wwnn, UINT32_MAX, vport->enable_ini, vport->enable_tgt); if (sport == NULL) { return -1; } sport->is_vport = 1; sport->tgt_data = vport->tgt_data; sport->ini_data = vport->tgt_data; /* Transition to vport_init */ ocs_sm_transition(&sport->sm, __ocs_sport_vport_init, NULL); return (0); } /** * @ingroup sport_sm * @brief Remove a previously-allocated virtual port. * * @par Description * A previously-allocated virtual port is removed by posting the shutdown event to the * sport with a matching WWN. * * @param ocs Pointer to the device object. * @param domain Pointer to the domain structure (may be NULL). * @param wwpn World wide port name of the port to delete (host endian). * @param wwnn World wide node name of the port to delete (host endian). * * @return Returns 0 on success, or a negative error value on failure. */ int32_t ocs_sport_vport_del(ocs_t *ocs, ocs_domain_t *domain, uint64_t wwpn, uint64_t wwnn) { ocs_xport_t *xport = ocs->xport; ocs_sport_t *sport; int found = 0; ocs_vport_spec_t *vport; ocs_vport_spec_t *next; uint32_t instance; /* If no domain is given, use instance 0, otherwise use domain instance */ if (domain == NULL) { instance = 0; } else { instance = domain->instance_index; } /* walk the ocs_vport_list and remove from there */ ocs_device_lock(ocs); ocs_list_foreach_safe(&xport->vport_list, vport, next) { if ((vport->domain_instance == instance) && (vport->wwpn == wwpn) && (vport->wwnn == wwnn)) { vport->sport = NULL; break; } } ocs_device_unlock(ocs); if (domain == NULL) { /* No domain means no sport to look for */ return 0; } ocs_domain_lock(domain); ocs_list_foreach(&domain->sport_list, sport) { if ((sport->wwpn == wwpn) && (sport->wwnn == wwnn)) { found = 1; break; } } if (found) { /* Shutdown this SPORT */ ocs_sm_post_event(&sport->sm, OCS_EVT_SHUTDOWN, NULL); } ocs_domain_unlock(domain); return 0; } /** * @brief Force free all saved vports. * * @par Description * Delete all device vports. * * @param ocs Pointer to the device object. * * @return None. */ void ocs_vport_del_all(ocs_t *ocs) { ocs_xport_t *xport = ocs->xport; ocs_vport_spec_t *vport; ocs_vport_spec_t *next; ocs_device_lock(ocs); ocs_list_foreach_safe(&xport->vport_list, vport, next) { ocs_list_remove(&xport->vport_list, vport); ocs_free(ocs, vport, sizeof(*vport)); } ocs_device_unlock(ocs); } /** * @ingroup sport_sm * @brief Generate a SLI port ddump. * * @par Description * Generates the SLI port ddump data. * * @param textbuf Pointer to the text buffer. * @param sport Pointer to the SLI-4 port. * * @return Returns 0 on success, or a negative value on failure. */ int ocs_ddump_sport(ocs_textbuf_t *textbuf, ocs_sli_port_t *sport) { ocs_node_t *node; ocs_node_group_dir_t *node_group_dir; int retval = 0; ocs_ddump_section(textbuf, "sport", sport->instance_index); ocs_ddump_value(textbuf, "display_name", "%s", sport->display_name); ocs_ddump_value(textbuf, "is_vport", "%d", sport->is_vport); ocs_ddump_value(textbuf, "enable_ini", "%d", sport->enable_ini); ocs_ddump_value(textbuf, "enable_tgt", "%d", sport->enable_tgt); ocs_ddump_value(textbuf, "shutting_down", "%d", sport->shutting_down); ocs_ddump_value(textbuf, "topology", "%d", sport->topology); ocs_ddump_value(textbuf, "p2p_winner", "%d", sport->p2p_winner); ocs_ddump_value(textbuf, "p2p_port_id", "%06x", sport->p2p_port_id); ocs_ddump_value(textbuf, "p2p_remote_port_id", "%06x", sport->p2p_remote_port_id); ocs_ddump_value(textbuf, "wwpn", "%016llx", (unsigned long long)sport->wwpn); ocs_ddump_value(textbuf, "wwnn", "%016llx", (unsigned long long)sport->wwnn); /*TODO: service_params */ ocs_ddump_value(textbuf, "indicator", "x%x", sport->indicator); ocs_ddump_value(textbuf, "fc_id", "x%06x", sport->fc_id); ocs_ddump_value(textbuf, "index", "%d", sport->index); ocs_display_sparams(NULL, "sport_sparams", 1, textbuf, sport->service_params+4); /* HLM dump */ ocs_ddump_section(textbuf, "hlm", sport->instance_index); ocs_lock(&sport->node_group_lock); ocs_list_foreach(&sport->node_group_dir_list, node_group_dir) { ocs_remote_node_group_t *remote_node_group; ocs_ddump_section(textbuf, "node_group_dir", node_group_dir->instance_index); ocs_ddump_value(textbuf, "node_group_list_count", "%d", node_group_dir->node_group_list_count); ocs_ddump_value(textbuf, "next_idx", "%d", node_group_dir->next_idx); ocs_list_foreach(&node_group_dir->node_group_list, remote_node_group) { ocs_ddump_section(textbuf, "node_group", remote_node_group->instance_index); ocs_ddump_value(textbuf, "indicator", "x%x", remote_node_group->indicator); ocs_ddump_value(textbuf, "index", "x%x", remote_node_group->index); ocs_ddump_value(textbuf, "instance_index", "x%x", remote_node_group->instance_index); ocs_ddump_endsection(textbuf, "node_group", 0); } ocs_ddump_endsection(textbuf, "node_group_dir", 0); } ocs_unlock(&sport->node_group_lock); ocs_ddump_endsection(textbuf, "hlm", sport->instance_index); ocs_scsi_ini_ddump(textbuf, OCS_SCSI_DDUMP_SPORT, sport); ocs_scsi_tgt_ddump(textbuf, OCS_SCSI_DDUMP_SPORT, sport); /* Dump all the nodes */ if (ocs_sport_lock_try(sport) != TRUE) { /* Didn't get lock */ return -1; } /* Here the sport lock is held */ ocs_list_foreach(&sport->node_list, node) { retval = ocs_ddump_node(textbuf, node); if (retval != 0) { break; } } ocs_sport_unlock(sport); ocs_ddump_endsection(textbuf, "sport", sport->index); return retval; } void ocs_mgmt_sport_list(ocs_textbuf_t *textbuf, void *object) { ocs_node_t *node; ocs_sport_t *sport = (ocs_sport_t *)object; ocs_mgmt_start_section(textbuf, "sport", sport->instance_index); /* Add my status values to textbuf */ ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "indicator"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "fc_id"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "index"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "display_name"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "is_vport"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "enable_ini"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "enable_tgt"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "p2p"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "p2p_winner"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "p2p_port_id"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "p2p_remote_port_id"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "wwpn"); ocs_mgmt_emit_property_name(textbuf, MGMT_MODE_RD, "wwnn"); if (ocs_sport_lock_try(sport) == TRUE) { /* If we get here, then we are holding the sport lock */ ocs_list_foreach(&sport->node_list, node) { if ((node->mgmt_functions) && (node->mgmt_functions->get_list_handler)) { node->mgmt_functions->get_list_handler(textbuf, node); } } ocs_sport_unlock(sport); } ocs_mgmt_end_section(textbuf, "sport", sport->instance_index); } int ocs_mgmt_sport_get(ocs_textbuf_t *textbuf, char *parent, char *name, void *object) { ocs_node_t *node; ocs_sport_t *sport = (ocs_sport_t *)object; char qualifier[80]; int retval = -1; ocs_mgmt_start_section(textbuf, "sport", sport->instance_index); snprintf(qualifier, sizeof(qualifier), "%s/sport[%d]", parent, sport->instance_index); /* If it doesn't start with my qualifier I don't know what to do with it */ if (ocs_strncmp(name, qualifier, strlen(qualifier)) == 0) { char *unqualified_name = name + strlen(qualifier) +1; /* See if it's a value I can supply */ if (ocs_strcmp(unqualified_name, "indicator") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "indicator", "0x%x", sport->indicator); retval = 0; } else if (ocs_strcmp(unqualified_name, "fc_id") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "fc_id", "0x%06x", sport->fc_id); retval = 0; } else if (ocs_strcmp(unqualified_name, "index") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "index", "%d", sport->index); retval = 0; } else if (ocs_strcmp(unqualified_name, "display_name") == 0) { ocs_mgmt_emit_string(textbuf, MGMT_MODE_RD, "display_name", sport->display_name); retval = 0; } else if (ocs_strcmp(unqualified_name, "is_vport") == 0) { ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "is_vport", sport->is_vport); retval = 0; } else if (ocs_strcmp(unqualified_name, "enable_ini") == 0) { ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "enable_ini", sport->enable_ini); retval = 0; } else if (ocs_strcmp(unqualified_name, "enable_tgt") == 0) { ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "enable_tgt", sport->enable_tgt); retval = 0; } else if (ocs_strcmp(unqualified_name, "p2p_winner") == 0) { ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "p2p_winner", sport->p2p_winner); retval = 0; } else if (ocs_strcmp(unqualified_name, "p2p_port_id") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "p2p_port_id", "0x%06x", sport->p2p_port_id); retval = 0; } else if (ocs_strcmp(unqualified_name, "p2p_remote_port_id") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "p2p_remote_port_id", "0x%06x", sport->p2p_remote_port_id); retval = 0; } else if (ocs_strcmp(unqualified_name, "wwpn") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "wwpn", "0x%016llx", (unsigned long long)sport->wwpn); retval = 0; } else if (ocs_strcmp(unqualified_name, "wwnn") == 0) { ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "wwnn", "0x%016llx", (unsigned long long)sport->wwnn); retval = 0; } else { /* If I didn't know the value of this status pass the request to each of my children */ ocs_sport_lock(sport); ocs_list_foreach(&sport->node_list, node) { if ((node->mgmt_functions) && (node->mgmt_functions->get_handler)) { retval = node->mgmt_functions->get_handler(textbuf, qualifier, name, node); } if (retval == 0) { break; } } ocs_sport_unlock(sport); } } ocs_mgmt_end_section(textbuf, "sport", sport->instance_index); return retval; } void ocs_mgmt_sport_get_all(ocs_textbuf_t *textbuf, void *object) { ocs_node_t *node; ocs_sport_t *sport = (ocs_sport_t *)object; ocs_mgmt_start_section(textbuf, "sport", sport->instance_index); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "indicator", "0x%x", sport->indicator); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "fc_id", "0x%06x", sport->fc_id); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "index", "%d", sport->index); ocs_mgmt_emit_string(textbuf, MGMT_MODE_RD, "display_name", sport->display_name); ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "is_vport", sport->is_vport); ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "enable_ini", sport->enable_ini); ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "enable_tgt", sport->enable_tgt); ocs_mgmt_emit_boolean(textbuf, MGMT_MODE_RD, "p2p_winner", sport->p2p_winner); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "p2p_port_id", "0x%06x", sport->p2p_port_id); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "p2p_remote_port_id", "0x%06x", sport->p2p_remote_port_id); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "wwpn", "0x%016llx" , (unsigned long long)sport->wwpn); ocs_mgmt_emit_int(textbuf, MGMT_MODE_RD, "wwnn", "0x%016llx", (unsigned long long)sport->wwnn); ocs_sport_lock(sport); ocs_list_foreach(&sport->node_list, node) { if ((node->mgmt_functions) && (node->mgmt_functions->get_all_handler)) { node->mgmt_functions->get_all_handler(textbuf, node); } } ocs_sport_unlock(sport); ocs_mgmt_end_section(textbuf, "sport", sport->instance_index); } int ocs_mgmt_sport_set(char *parent, char *name, char *value, void *object) { ocs_node_t *node; ocs_sport_t *sport = (ocs_sport_t *)object; char qualifier[80]; int retval = -1; snprintf(qualifier, sizeof(qualifier), "%s/sport[%d]", parent, sport->instance_index); /* If it doesn't start with my qualifier I don't know what to do with it */ if (ocs_strncmp(name, qualifier, strlen(qualifier)) == 0) { /* The sport has no settable values. Pass the request to each node. */ ocs_sport_lock(sport); ocs_list_foreach(&sport->node_list, node) { if ((node->mgmt_functions) && (node->mgmt_functions->set_handler)) { retval = node->mgmt_functions->set_handler(qualifier, name, value, node); } if (retval == 0) { break; } } ocs_sport_unlock(sport); } return retval; } int ocs_mgmt_sport_exec(char *parent, char *action, void *arg_in, uint32_t arg_in_length, void *arg_out, uint32_t arg_out_length, void *object) { ocs_node_t *node; ocs_sport_t *sport = (ocs_sport_t *)object; char qualifier[80]; int retval = -1; snprintf(qualifier, sizeof(qualifier), "%s.sport%d", parent, sport->instance_index); /* If it doesn't start with my qualifier I don't know what to do with it */ if (ocs_strncmp(action, qualifier, strlen(qualifier)) == 0) { /* See if it's an action I can perform */ /* if (ocs_strcmp .... * { * } else */ { /* If I didn't know how to do this action pass the request to each of my children */ ocs_sport_lock(sport); ocs_list_foreach(&sport->node_list, node) { if ((node->mgmt_functions) && (node->mgmt_functions->exec_handler)) { retval = node->mgmt_functions->exec_handler(qualifier, action, arg_in, arg_in_length, arg_out, arg_out_length, node); } if (retval == 0) { break; } } ocs_sport_unlock(sport); } } return retval; } /** * @brief Save the virtual port's parameters. * * @par Description * The information required to restore a virtual port is saved. * * @param sport Pointer to the sport context. * * @return None. */ static void ocs_vport_update_spec(ocs_sport_t *sport) { ocs_t *ocs = sport->ocs; ocs_xport_t *xport = ocs->xport; ocs_vport_spec_t *vport; ocs_device_lock(ocs); ocs_list_foreach(&xport->vport_list, vport) { if (vport->sport == sport) { vport->wwnn = sport->wwnn; vport->wwpn = sport->wwpn; vport->tgt_data = sport->tgt_data; vport->ini_data = sport->ini_data; break; } } ocs_device_unlock(ocs); } /** * @brief Create a saved vport entry. * * A saved vport entry is added to the vport list, which is restored following * a link up. This function is used to allow vports to be created the first time * the link comes up without having to go through the ioctl() API. * * @param ocs Pointer to device context. * @param wwnn World wide node name (may be zero for auto-select). * @param wwpn World wide port name (may be zero for auto-select). * @param fc_id Requested port ID (used in fabric emulation mode). * @param enable_ini TRUE if vport is to be an initiator port. * @param enable_tgt TRUE if vport is to be a target port. * @param tgt_data Pointer to target specific data. * @param ini_data Pointer to initiator specific data. * * @return None. */ int8_t ocs_vport_create_spec(ocs_t *ocs, uint64_t wwnn, uint64_t wwpn, uint32_t fc_id, uint32_t enable_ini, uint32_t enable_tgt, void *tgt_data, void *ini_data) { ocs_xport_t *xport = ocs->xport; ocs_vport_spec_t *vport; /* walk the ocs_vport_list and return failure if a valid(vport with non zero WWPN and WWNN) vport entry is already created */ ocs_list_foreach(&xport->vport_list, vport) { if ((wwpn && (vport->wwpn == wwpn)) && (wwnn && (vport->wwnn == wwnn))) { ocs_log_test(ocs, "Failed: VPORT %016llx %016llx already allocated\n", (unsigned long long)wwnn, (unsigned long long)wwpn); return -1; } } vport = ocs_malloc(ocs, sizeof(*vport), OCS_M_ZERO | OCS_M_NOWAIT); if (vport == NULL) { ocs_log_err(ocs, "ocs_malloc failed\n"); return -1; } vport->wwnn = wwnn; vport->wwpn = wwpn; vport->fc_id = fc_id; vport->domain_instance = 0; /*TODO: may need to change this */ vport->enable_tgt = enable_tgt; vport->enable_ini = enable_ini; vport->tgt_data = tgt_data; vport->ini_data = ini_data; ocs_device_lock(ocs); ocs_list_add_tail(&xport->vport_list, vport); ocs_device_unlock(ocs); return 0; } /* node group api */ /** * @brief Perform the AND operation on source vectors. * * @par Description * Performs an AND operation on the 8-bit values in source vectors @c b and @c c. * The resulting value is stored in @c a. * * @param a Destination-byte vector. * @param b Source-byte vector. * @param c Source-byte vector. * @param n Byte count. * * @return None. */ static void and8(uint8_t *a, uint8_t *b, uint8_t *c, uint32_t n) { uint32_t i; for (i = 0; i < n; i ++) { *a = *b & *c; a++; b++; c++; } } /** * @brief Service parameters mask data. */ static fc_sparms_t sparms_cmp_mask = { 0, /*uint32_t command_code: 8, */ 0, /* resv1: 24; */ {~0, ~0, ~0, ~0}, /* uint32_t common_service_parameters[4]; */ 0, /* uint32_t port_name_hi; */ 0, /* uint32_t port_name_lo; */ 0, /* uint32_t node_name_hi; */ 0, /* uint32_t node_name_lo; */ {~0, ~0, ~0, ~0}, /* uint32_t class1_service_parameters[4]; */ {~0, ~0, ~0, ~0}, /* uint32_t class2_service_parameters[4]; */ {~0, ~0, ~0, ~0}, /* uint32_t class3_service_parameters[4]; */ {~0, ~0, ~0, ~0}, /* uint32_t class4_service_parameters[4]; */ {~0, ~0, ~0, ~0}}; /* uint32_t vendor_version_level[4]; */ /** * @brief Compare service parameters. * * @par Description * Returns 0 if the two service parameters are the same, excluding the port/node name * elements. * * @param sp1 Pointer to service parameters 1. * @param sp2 Pointer to service parameters 2. * * @return Returns 0 if parameters match; otherwise, returns a positive or negative value, * depending on the arithmetic magnitude of the first mismatching byte. */ int ocs_sparm_cmp(uint8_t *sp1, uint8_t *sp2) { int i; int v; uint8_t *sp3 = (uint8_t*) &sparms_cmp_mask; for (i = 0; i < OCS_SERVICE_PARMS_LENGTH; i ++) { v = ((int)(sp1[i] & sp3[i])) - ((int)(sp2[i] & sp3[i])); if (v) { break; } } return v; } /** * @brief Allocate a node group directory entry. * * @par Description * A node group directory entry is allocated, initialized, and added to the sport's * node group directory list. * * @param sport Pointer to the sport object. * @param sparms Pointer to the service parameters. * * @return Returns a pointer to the allocated ocs_node_group_dir_t; or NULL. */ ocs_node_group_dir_t * ocs_node_group_dir_alloc(ocs_sport_t *sport, uint8_t *sparms) { ocs_node_group_dir_t *node_group_dir; node_group_dir = ocs_malloc(sport->ocs, sizeof(*node_group_dir), OCS_M_ZERO | OCS_M_NOWAIT); if (node_group_dir != NULL) { node_group_dir->sport = sport; ocs_lock(&sport->node_group_lock); node_group_dir->instance_index = sport->node_group_dir_next_instance++; and8(node_group_dir->service_params, sparms, (uint8_t*)&sparms_cmp_mask, OCS_SERVICE_PARMS_LENGTH); ocs_list_init(&node_group_dir->node_group_list, ocs_remote_node_group_t, link); node_group_dir->node_group_list_count = 0; node_group_dir->next_idx = 0; ocs_list_add_tail(&sport->node_group_dir_list, node_group_dir); ocs_unlock(&sport->node_group_lock); ocs_log_debug(sport->ocs, "[%s] [%d] allocating node group directory\n", sport->display_name, node_group_dir->instance_index); } return node_group_dir; } /** * @brief Free a node group directory entry. * * @par Description * The node group directory entry @c node_group_dir is removed * from the sport's node group directory list and freed. * * @param node_group_dir Pointer to the node group directory entry. * * @return None. */ void ocs_node_group_dir_free(ocs_node_group_dir_t *node_group_dir) { ocs_sport_t *sport; if (node_group_dir != NULL) { sport = node_group_dir->sport; ocs_log_debug(sport->ocs, "[%s] [%d] freeing node group directory\n", sport->display_name, node_group_dir->instance_index); ocs_lock(&sport->node_group_lock); if (!ocs_list_empty(&node_group_dir->node_group_list)) { ocs_log_test(sport->ocs, "[%s] WARNING: node group list not empty\n", sport->display_name); } ocs_list_remove(&sport->node_group_dir_list, node_group_dir); ocs_unlock(&sport->node_group_lock); ocs_free(sport->ocs, node_group_dir, sizeof(*node_group_dir)); } } /** * @brief Find a matching node group directory entry. * * @par Description * The sport's node group directory list is searched for a matching set of * service parameters. The first matching entry is returned; otherwise * NULL is returned. * * @param sport Pointer to the sport object. * @param sparms Pointer to the sparams to match. * * @return Returns a pointer to the first matching entry found; or NULL. */ ocs_node_group_dir_t * ocs_node_group_dir_find(ocs_sport_t *sport, uint8_t *sparms) { ocs_node_group_dir_t *node_dir = NULL; ocs_lock(&sport->node_group_lock); ocs_list_foreach(&sport->node_group_dir_list, node_dir) { if (ocs_sparm_cmp(sparms, node_dir->service_params) == 0) { ocs_unlock(&sport->node_group_lock); return node_dir; } } ocs_unlock(&sport->node_group_lock); return NULL; } /** * @brief Allocate a remote node group object. * * @par Description * A remote node group object is allocated, initialized, and placed on the node group * list of @c node_group_dir. The HW remote node group @b alloc function is called. * * @param node_group_dir Pointer to the node group directory. * * @return Returns a pointer to the allocated remote node group object; or NULL. */ ocs_remote_node_group_t * ocs_remote_node_group_alloc(ocs_node_group_dir_t *node_group_dir) { ocs_t *ocs; ocs_sport_t *sport; ocs_remote_node_group_t *node_group; ocs_hw_rtn_e hrc; ocs_assert(node_group_dir, NULL); ocs_assert(node_group_dir->sport, NULL); ocs_assert(node_group_dir->sport->ocs, NULL); sport = node_group_dir->sport; ocs = sport->ocs; node_group = ocs_malloc(ocs, sizeof(*node_group), OCS_M_ZERO | OCS_M_NOWAIT); if (node_group != NULL) { /* set pointer to node group directory */ node_group->node_group_dir = node_group_dir; ocs_lock(&node_group_dir->sport->node_group_lock); node_group->instance_index = sport->node_group_next_instance++; ocs_unlock(&node_group_dir->sport->node_group_lock); /* invoke HW node group inialization */ hrc = ocs_hw_node_group_alloc(&ocs->hw, node_group); if (hrc != OCS_HW_RTN_SUCCESS) { ocs_log_err(ocs, "ocs_hw_node_group_alloc() failed: %d\n", hrc); ocs_free(ocs, node_group, sizeof(*node_group)); return NULL; } ocs_log_debug(ocs, "[%s] [%d] indicator x%03x allocating node group\n", sport->display_name, node_group->indicator, node_group->instance_index); /* add to the node group directory entry node group list */ ocs_lock(&node_group_dir->sport->node_group_lock); ocs_list_add_tail(&node_group_dir->node_group_list, node_group); node_group_dir->node_group_list_count ++; ocs_unlock(&node_group_dir->sport->node_group_lock); } return node_group; } /** * @brief Free a remote node group object. * * @par Description * The remote node group object @c node_group is removed from its * node group directory entry and freed. * * @param node_group Pointer to the remote node group object. * * @return None. */ void ocs_remote_node_group_free(ocs_remote_node_group_t *node_group) { ocs_sport_t *sport; ocs_node_group_dir_t *node_group_dir; if (node_group != NULL) { ocs_assert(node_group->node_group_dir); ocs_assert(node_group->node_group_dir->sport); ocs_assert(node_group->node_group_dir->sport->ocs); node_group_dir = node_group->node_group_dir; sport = node_group_dir->sport; ocs_log_debug(sport->ocs, "[%s] [%d] freeing node group\n", sport->display_name, node_group->instance_index); /* Remove from node group directory node group list */ ocs_lock(&sport->node_group_lock); ocs_list_remove(&node_group_dir->node_group_list, node_group); node_group_dir->node_group_list_count --; /* TODO: note that we're going to have the node_group_dir entry persist forever ... we could delete it if * the group_list_count goes to zero (or the linked list is empty */ ocs_unlock(&sport->node_group_lock); ocs_free(sport->ocs, node_group, sizeof(*node_group)); } } /** * @brief Initialize a node for high login mode. * * @par Description * The @c node is initialized for high login mode. The following steps are performed: * 1. The sports node group directory is searched for a matching set of service parameters. * 2. If a matching set is not found, a node group directory entry is allocated. * 3. If less than the @c hlm_group_size number of remote node group objects is present in the * node group directory, a new remote node group object is allocated and added to the list. * 4. A remote node group object is selected, and the node is attached to the node group. * * @param node Pointer to the node. * * @return Returns 0 on success, or a negative error value on failure. */ int ocs_node_group_init(ocs_node_t *node) { ocs_t *ocs; ocs_sport_t *sport; ocs_node_group_dir_t *node_group_dir; ocs_remote_node_group_t *node_group; ocs_hw_rtn_e hrc; ocs_assert(node, -1); ocs_assert(node->sport, -1); ocs_assert(node->ocs, -1); ocs = node->ocs; sport = node->sport; ocs_assert(ocs->enable_hlm, -1); /* see if there's a node group directory allocated for this service parameter set */ node_group_dir = ocs_node_group_dir_find(sport, node->service_params); if (node_group_dir == NULL) { /* not found, so allocate one */ node_group_dir = ocs_node_group_dir_alloc(sport, node->service_params); if (node_group_dir == NULL) { /* node group directory allocation failed ... can't continue, however, * the node will be allocated with a normal (not shared) RPI */ ocs_log_err(ocs, "ocs_node_group_dir_alloc() failed\n"); return -1; } } /* check to see if we've allocated hlm_group_size's worth of node group structures for this * directory entry, if not, then allocate and use a new one, otherwise pick the next one. */ ocs_lock(&node->sport->node_group_lock); if (node_group_dir->node_group_list_count < ocs->hlm_group_size) { ocs_unlock(&node->sport->node_group_lock); node_group = ocs_remote_node_group_alloc(node_group_dir); if (node_group == NULL) { ocs_log_err(ocs, "ocs_remote_node_group_alloc() failed\n"); return -1; } ocs_lock(&node->sport->node_group_lock); } else { uint32_t idx = 0; ocs_list_foreach(&node_group_dir->node_group_list, node_group) { if (idx >= ocs->hlm_group_size) { ocs_log_err(node->ocs, "assertion failed: idx >= ocs->hlm_group_size\n"); ocs_unlock(&node->sport->node_group_lock); return -1; } if (idx == node_group_dir->next_idx) { break; } idx ++; } if (idx == ocs->hlm_group_size) { node_group = ocs_list_get_head(&node_group_dir->node_group_list); } if (++node_group_dir->next_idx >= node_group_dir->node_group_list_count) { node_group_dir->next_idx = 0; } } ocs_unlock(&node->sport->node_group_lock); /* Initialize a pointer in the node back to the node group */ node->node_group = node_group; /* Join this node into the group */ hrc = ocs_hw_node_group_attach(&ocs->hw, node_group, &node->rnode); return (hrc == OCS_HW_RTN_SUCCESS) ? 0 : -1; } Index: projects/pnfs-planb-server/sys/dev/syscons/sysmouse.c =================================================================== --- projects/pnfs-planb-server/sys/dev/syscons/sysmouse.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/syscons/sysmouse.c (revision 334967) @@ -1,337 +1,331 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 Kazutaka YOKOTA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_evdev.h" #include "opt_syscons.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef EVDEV_SUPPORT #include #include #endif #ifndef SC_NO_SYSMOUSE /* local variables */ static struct tty *sysmouse_tty; static int mouse_level; /* sysmouse protocol level */ static mousestatus_t mouse_status; #ifdef EVDEV_SUPPORT static struct evdev_dev *sysmouse_evdev; static void smdev_evdev_init(void) { int i; sysmouse_evdev = evdev_alloc(); evdev_set_name(sysmouse_evdev, "System mouse"); evdev_set_phys(sysmouse_evdev, "sysmouse"); evdev_set_id(sysmouse_evdev, BUS_VIRTUAL, 0, 0, 0); evdev_support_prop(sysmouse_evdev, INPUT_PROP_POINTER); evdev_support_event(sysmouse_evdev, EV_SYN); evdev_support_event(sysmouse_evdev, EV_REL); evdev_support_event(sysmouse_evdev, EV_KEY); evdev_support_rel(sysmouse_evdev, REL_X); evdev_support_rel(sysmouse_evdev, REL_Y); evdev_support_rel(sysmouse_evdev, REL_WHEEL); evdev_support_rel(sysmouse_evdev, REL_HWHEEL); for (i = 0; i < 8; i++) evdev_support_key(sysmouse_evdev, BTN_MOUSE + i); if (evdev_register(sysmouse_evdev)) { evdev_free(sysmouse_evdev); sysmouse_evdev = NULL; } } static void smdev_evdev_write(int x, int y, int z, int buttons) { if (sysmouse_evdev == NULL || !(evdev_rcpt_mask & EVDEV_RCPT_SYSMOUSE)) return; evdev_push_event(sysmouse_evdev, EV_REL, REL_X, x); evdev_push_event(sysmouse_evdev, EV_REL, REL_Y, y); switch (evdev_sysmouse_t_axis) { case EVDEV_SYSMOUSE_T_AXIS_PSM: switch (z) { case 1: case -1: evdev_push_rel(sysmouse_evdev, REL_WHEEL, -z); break; case 2: case -2: evdev_push_rel(sysmouse_evdev, REL_HWHEEL, z / 2); break; } break; case EVDEV_SYSMOUSE_T_AXIS_UMS: if (buttons & (1 << 6)) evdev_push_rel(sysmouse_evdev, REL_HWHEEL, 1); else if (buttons & (1 << 5)) evdev_push_rel(sysmouse_evdev, REL_HWHEEL, -1); buttons &= ~((1 << 5)|(1 << 6)); /* PASSTHROUGH */ case EVDEV_SYSMOUSE_T_AXIS_NONE: default: evdev_push_rel(sysmouse_evdev, REL_WHEEL, -z); } evdev_push_mouse_btn(sysmouse_evdev, buttons); evdev_sync(sysmouse_evdev); } #endif static void smdev_close(struct tty *tp) { mouse_level = 0; } static int smdev_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td) { mousehw_t *hw; mousemode_t *mode; switch (cmd) { case MOUSE_GETHWINFO: /* get device information */ hw = (mousehw_t *)data; hw->buttons = 10; /* XXX unknown */ hw->iftype = MOUSE_IF_SYSMOUSE; hw->type = MOUSE_MOUSE; hw->model = MOUSE_MODEL_GENERIC; hw->hwid = 0; return 0; case MOUSE_GETMODE: /* get protocol/mode */ mode = (mousemode_t *)data; mode->level = mouse_level; switch (mode->level) { case 0: /* emulate MouseSystems protocol */ mode->protocol = MOUSE_PROTO_MSC; mode->rate = -1; /* unknown */ mode->resolution = -1; /* unknown */ mode->accelfactor = 0; /* disabled */ mode->packetsize = MOUSE_MSC_PACKETSIZE; mode->syncmask[0] = MOUSE_MSC_SYNCMASK; mode->syncmask[1] = MOUSE_MSC_SYNC; break; case 1: /* sysmouse protocol */ mode->protocol = MOUSE_PROTO_SYSMOUSE; mode->rate = -1; mode->resolution = -1; mode->accelfactor = 0; mode->packetsize = MOUSE_SYS_PACKETSIZE; mode->syncmask[0] = MOUSE_SYS_SYNCMASK; mode->syncmask[1] = MOUSE_SYS_SYNC; break; } return 0; case MOUSE_SETMODE: /* set protocol/mode */ mode = (mousemode_t *)data; if (mode->level == -1) ; /* don't change the current setting */ else if ((mode->level < 0) || (mode->level > 1)) return EINVAL; else mouse_level = mode->level; return 0; case MOUSE_GETLEVEL: /* get operation level */ *(int *)data = mouse_level; return 0; case MOUSE_SETLEVEL: /* set operation level */ if ((*(int *)data < 0) || (*(int *)data > 1)) return EINVAL; mouse_level = *(int *)data; return 0; case MOUSE_GETSTATUS: /* get accumulated mouse events */ *(mousestatus_t *)data = mouse_status; mouse_status.flags = 0; mouse_status.obutton = mouse_status.button; mouse_status.dx = 0; mouse_status.dy = 0; mouse_status.dz = 0; return 0; -#ifdef notyet - case MOUSE_GETVARS: /* get internal mouse variables */ - case MOUSE_SETVARS: /* set internal mouse variables */ - return ENODEV; -#endif - case MOUSE_READSTATE: /* read status from the device */ case MOUSE_READDATA: /* read data from the device */ return ENODEV; } return (ENOIOCTL); } static int smdev_param(struct tty *tp, struct termios *t) { /* * Set the output baud rate to zero. The mouse device supports * no output, so we don't want to waste buffers. */ t->c_ispeed = TTYDEF_SPEED; t->c_ospeed = B0; return (0); } static struct ttydevsw smdev_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_close = smdev_close, .tsw_ioctl = smdev_ioctl, .tsw_param = smdev_param, }; static void sm_attach_mouse(void *unused) { if (!vty_enabled(VTY_SC)) return; sysmouse_tty = tty_alloc(&smdev_ttydevsw, NULL); tty_makedev(sysmouse_tty, NULL, "sysmouse"); #ifdef EVDEV_SUPPORT smdev_evdev_init(); #endif } SYSINIT(sysmouse, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, sm_attach_mouse, NULL); int sysmouse_event(mouse_info_t *info) { /* MOUSE_BUTTON?DOWN -> MOUSE_MSC_BUTTON?UP */ static int butmap[8] = { MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON2UP | MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON2UP | MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON3UP, MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON2UP, MOUSE_MSC_BUTTON2UP, MOUSE_MSC_BUTTON1UP, 0, }; u_char buf[8]; int x, y, z; int i, flags = 0; tty_lock(sysmouse_tty); switch (info->operation) { case MOUSE_ACTION: mouse_status.button = info->u.data.buttons; /* FALL THROUGH */ case MOUSE_MOTION_EVENT: x = info->u.data.x; y = info->u.data.y; z = info->u.data.z; break; case MOUSE_BUTTON_EVENT: x = y = z = 0; if (info->u.event.value > 0) mouse_status.button |= info->u.event.id; else mouse_status.button &= ~info->u.event.id; break; default: goto done; } mouse_status.dx += x; mouse_status.dy += y; mouse_status.dz += z; mouse_status.flags |= ((x || y || z) ? MOUSE_POSCHANGED : 0) | (mouse_status.obutton ^ mouse_status.button); flags = mouse_status.flags; if (flags == 0) goto done; #ifdef EVDEV_SUPPORT smdev_evdev_write(x, y, z, mouse_status.button); #endif if (!tty_opened(sysmouse_tty)) goto done; /* the first five bytes are compatible with MouseSystems' */ buf[0] = MOUSE_MSC_SYNC | butmap[mouse_status.button & MOUSE_STDBUTTONS]; x = imax(imin(x, 255), -256); buf[1] = x >> 1; buf[3] = x - buf[1]; y = -imax(imin(y, 255), -256); buf[2] = y >> 1; buf[4] = y - buf[2]; for (i = 0; i < MOUSE_MSC_PACKETSIZE; ++i) ttydisc_rint(sysmouse_tty, buf[i], 0); if (mouse_level >= 1) { /* extended part */ z = imax(imin(z, 127), -128); buf[5] = (z >> 1) & 0x7f; buf[6] = (z - (z >> 1)) & 0x7f; /* buttons 4-10 */ buf[7] = (~mouse_status.button >> 3) & 0x7f; for (i = MOUSE_MSC_PACKETSIZE; i < MOUSE_SYS_PACKETSIZE; ++i) ttydisc_rint(sysmouse_tty, buf[i], 0); } ttydisc_rint_done(sysmouse_tty); done: tty_unlock(sysmouse_tty); return (flags); } #endif /* !SC_NO_SYSMOUSE */ Index: projects/pnfs-planb-server/sys/dev/usb/net/if_muge.c =================================================================== --- projects/pnfs-planb-server/sys/dev/usb/net/if_muge.c (revision 334966) +++ projects/pnfs-planb-server/sys/dev/usb/net/if_muge.c (revision 334967) @@ -1,2202 +1,2218 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012 Ben Gray . * Copyright (C) 2018 The FreeBSD Foundation. * * This software was developed by Arshan Khanifar * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* * USB-To-Ethernet adapter driver for Microchip's LAN78XX and related families. * * USB 3.1 to 10/100/1000 Mbps Ethernet * LAN7800 http://www.microchip.com/wwwproducts/en/LAN7800 * + * USB 2.0 to 10/100/1000 Mbps Ethernet + * LAN7850 http://www.microchip.com/wwwproducts/en/LAN7850 + * * USB 2 to 10/100/1000 Mbps Ethernet with built-in USB hub * LAN7515 (no datasheet available, but probes and functions as LAN7800) * * This driver is based on the if_smsc driver, with lan78xx-specific * functionality modelled on Microchip's Linux lan78xx driver. * * UNIMPLEMENTED FEATURES * ------------------ * A number of features supported by the lan78xx are not yet implemented in * this driver: * * 1. RX/TX checksum offloading: Nothing has been implemented yet for * TX checksumming. RX checksumming works with ICMP messages, but is broken * for TCP/UDP packets. * 2. Direct address translation filtering: Implemented but untested. * 3. VLAN tag removal. * 4. Reading MAC address from the device tree: Specific to the RPi 3B+. * Currently, the driver assigns a random MAC address itself. * 5. Support for USB interrupt endpoints. * 6. Latency Tolerance Messaging (LTM) support. * 7. TCP LSO support. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_platform.h" #include #include #include #include "usbdevs.h" #define USB_DEBUG_VAR lan78xx_debug #include #include #include #include #ifdef USB_DEBUG static int muge_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, muge, CTLFLAG_RW, 0, "Microchip LAN78xx USB-GigE"); SYSCTL_INT(_hw_usb_muge, OID_AUTO, debug, CTLFLAG_RWTUN, &muge_debug, 0, "Debug level"); #endif #define MUGE_DEFAULT_RX_CSUM_ENABLE (false) #define MUGE_DEFAULT_TX_CSUM_ENABLE (false) #define MUGE_DEFAULT_TSO_CSUM_ENABLE (false) /* Supported Vendor and Product IDs. */ static const struct usb_device_id lan78xx_devs[] = { #define MUGE_DEV(p,i) { USB_VPI(USB_VENDOR_SMC2, USB_PRODUCT_SMC2_##p, i) } MUGE_DEV(LAN7800_ETH, 0), MUGE_DEV(LAN7801_ETH, 0), MUGE_DEV(LAN7850_ETH, 0), #undef MUGE_DEV }; #ifdef USB_DEBUG #define muge_dbg_printf(sc, fmt, args...) \ do { \ if (muge_debug > 0) \ device_printf((sc)->sc_ue.ue_dev, "debug: " fmt, ##args); \ } while(0) #else #define muge_dbg_printf(sc, fmt, args...) do { } while (0) #endif #define muge_warn_printf(sc, fmt, args...) \ device_printf((sc)->sc_ue.ue_dev, "warning: " fmt, ##args) #define muge_err_printf(sc, fmt, args...) \ device_printf((sc)->sc_ue.ue_dev, "error: " fmt, ##args) #define ETHER_IS_ZERO(addr) \ (!(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5])) #define ETHER_IS_VALID(addr) \ (!ETHER_IS_MULTICAST(addr) && !ETHER_IS_ZERO(addr)) /* USB endpoints. */ enum { MUGE_BULK_DT_RD, MUGE_BULK_DT_WR, #if 0 /* Ignore interrupt endpoints for now as we poll on MII status. */ MUGE_INTR_DT_WR, MUGE_INTR_DT_RD, #endif MUGE_N_TRANSFER, }; struct muge_softc { struct usb_ether sc_ue; struct mtx sc_mtx; struct usb_xfer *sc_xfer[MUGE_N_TRANSFER]; int sc_phyno; /* Settings for the mac control (MAC_CSR) register. */ uint32_t sc_rfe_ctl; uint32_t sc_mdix_ctl; uint16_t chipid; uint16_t chiprev; uint32_t sc_mchash_table[ETH_DP_SEL_VHF_HASH_LEN]; uint32_t sc_pfilter_table[MUGE_NUM_PFILTER_ADDRS_][2]; uint32_t sc_flags; #define MUGE_FLAG_LINK 0x0001 #define MUGE_FLAG_INIT_DONE 0x0002 }; #define MUGE_IFACE_IDX 0 #define MUGE_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define MUGE_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define MUGE_LOCK_ASSERT(_sc, t) mtx_assert(&(_sc)->sc_mtx, t) static device_probe_t muge_probe; static device_attach_t muge_attach; static device_detach_t muge_detach; static usb_callback_t muge_bulk_read_callback; static usb_callback_t muge_bulk_write_callback; static miibus_readreg_t lan78xx_miibus_readreg; static miibus_writereg_t lan78xx_miibus_writereg; static miibus_statchg_t lan78xx_miibus_statchg; static int muge_attach_post_sub(struct usb_ether *ue); static uether_fn_t muge_attach_post; static uether_fn_t muge_init; static uether_fn_t muge_stop; static uether_fn_t muge_start; static uether_fn_t muge_tick; static uether_fn_t muge_setmulti; static uether_fn_t muge_setpromisc; static int muge_ifmedia_upd(struct ifnet *); static void muge_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int lan78xx_chip_init(struct muge_softc *sc); static int muge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static const struct usb_config muge_config[MUGE_N_TRANSFER] = { [MUGE_BULK_DT_WR] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_OUT, .frames = 16, .bufsize = 16 * (MCLBYTES + 16), .flags = {.pipe_bof = 1,.force_short_xfer = 1,}, .callback = muge_bulk_write_callback, .timeout = 10000, /* 10 seconds */ }, [MUGE_BULK_DT_RD] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_IN, .bufsize = 20480, /* bytes */ .flags = {.pipe_bof = 1,.short_xfer_ok = 1,}, .callback = muge_bulk_read_callback, .timeout = 0, /* no timeout */ }, /* * The chip supports interrupt endpoints, however they aren't * needed as we poll on the MII status. */ }; static const struct usb_ether_methods muge_ue_methods = { .ue_attach_post = muge_attach_post, .ue_attach_post_sub = muge_attach_post_sub, .ue_start = muge_start, .ue_ioctl = muge_ioctl, .ue_init = muge_init, .ue_stop = muge_stop, .ue_tick = muge_tick, .ue_setmulti = muge_setmulti, .ue_setpromisc = muge_setpromisc, .ue_mii_upd = muge_ifmedia_upd, .ue_mii_sts = muge_ifmedia_sts, }; /** * lan78xx_read_reg - Read a 32-bit register on the device * @sc: driver soft context * @off: offset of the register * @data: pointer a value that will be populated with the register value * * LOCKING: * The device lock must be held before calling this function. * * RETURNS: * 0 on success, a USB_ERR_?? error code on failure. */ static int lan78xx_read_reg(struct muge_softc *sc, uint32_t off, uint32_t *data) { struct usb_device_request req; uint32_t buf; usb_error_t err; MUGE_LOCK_ASSERT(sc, MA_OWNED); req.bmRequestType = UT_READ_VENDOR_DEVICE; req.bRequest = UVR_READ_REG; USETW(req.wValue, 0); USETW(req.wIndex, off); USETW(req.wLength, 4); err = uether_do_request(&sc->sc_ue, &req, &buf, 1000); if (err != 0) muge_warn_printf(sc, "Failed to read register 0x%0x\n", off); *data = le32toh(buf); return (err); } /** * lan78xx_write_reg - Write a 32-bit register on the device * @sc: driver soft context * @off: offset of the register * @data: the 32-bit value to write into the register * * LOCKING: * The device lock must be held before calling this function. * * RETURNS: * 0 on success, a USB_ERR_?? error code on failure. */ static int lan78xx_write_reg(struct muge_softc *sc, uint32_t off, uint32_t data) { struct usb_device_request req; uint32_t buf; usb_error_t err; MUGE_LOCK_ASSERT(sc, MA_OWNED); buf = htole32(data); req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UVR_WRITE_REG; USETW(req.wValue, 0); USETW(req.wIndex, off); USETW(req.wLength, 4); err = uether_do_request(&sc->sc_ue, &req, &buf, 1000); if (err != 0) muge_warn_printf(sc, "Failed to write register 0x%0x\n", off); return (err); } /** * lan78xx_wait_for_bits - Poll on a register value until bits are cleared * @sc: soft context * @reg: offset of the register * @bits: if the bits are clear the function returns * * LOCKING: * The device lock must be held before calling this function. * * RETURNS: * 0 on success, or a USB_ERR_?? error code on failure. */ static int lan78xx_wait_for_bits(struct muge_softc *sc, uint32_t reg, uint32_t bits) { usb_ticks_t start_ticks; const usb_ticks_t max_ticks = USB_MS_TO_TICKS(1000); uint32_t val; int err; MUGE_LOCK_ASSERT(sc, MA_OWNED); start_ticks = (usb_ticks_t)ticks; do { if ((err = lan78xx_read_reg(sc, reg, &val)) != 0) return (err); if (!(val & bits)) return (0); uether_pause(&sc->sc_ue, hz / 100); } while (((usb_ticks_t)(ticks - start_ticks)) < max_ticks); return (USB_ERR_TIMEOUT); } /** * lan78xx_eeprom_read_raw - Read the attached EEPROM * @sc: soft context * @off: the eeprom address offset * @buf: stores the bytes * @buflen: the number of bytes to read * * Simply reads bytes from an attached eeprom. * * LOCKING: * The function takes and releases the device lock if not already held. * * RETURNS: * 0 on success, or a USB_ERR_?? error code on failure. */ static int lan78xx_eeprom_read_raw(struct muge_softc *sc, uint16_t off, uint8_t *buf, uint16_t buflen) { usb_ticks_t start_ticks; const usb_ticks_t max_ticks = USB_MS_TO_TICKS(1000); int err, locked; uint32_t val, saved; uint16_t i; locked = mtx_owned(&sc->sc_mtx); /* XXX */ if (!locked) MUGE_LOCK(sc); - err = lan78xx_read_reg(sc, ETH_HW_CFG, &val); - saved = val; + if (sc->chipid == ETH_ID_REV_CHIP_ID_7800_) { + /* EEDO/EECLK muxed with LED0/LED1 on LAN7800. */ + err = lan78xx_read_reg(sc, ETH_HW_CFG, &val); + saved = val; - val &= ~(ETH_HW_CFG_LEDO_EN_ | ETH_HW_CFG_LED1_EN_); - err = lan78xx_write_reg(sc, ETH_HW_CFG, val); + val &= ~(ETH_HW_CFG_LEDO_EN_ | ETH_HW_CFG_LED1_EN_); + err = lan78xx_write_reg(sc, ETH_HW_CFG, val); + } err = lan78xx_wait_for_bits(sc, ETH_E2P_CMD, ETH_E2P_CMD_BUSY_); if (err != 0) { muge_warn_printf(sc, "eeprom busy, failed to read data\n"); goto done; } /* Start reading the bytes, one at a time. */ for (i = 0; i < buflen; i++) { val = ETH_E2P_CMD_BUSY_ | ETH_E2P_CMD_READ_; val |= (ETH_E2P_CMD_ADDR_MASK_ & (off + i)); if ((err = lan78xx_write_reg(sc, ETH_E2P_CMD, val)) != 0) goto done; start_ticks = (usb_ticks_t)ticks; do { if ((err = lan78xx_read_reg(sc, ETH_E2P_CMD, &val)) != 0) goto done; if (!(val & ETH_E2P_CMD_BUSY_) || (val & ETH_E2P_CMD_TIMEOUT_)) break; uether_pause(&sc->sc_ue, hz / 100); } while (((usb_ticks_t)(ticks - start_ticks)) < max_ticks); if (val & (ETH_E2P_CMD_BUSY_ | ETH_E2P_CMD_TIMEOUT_)) { muge_warn_printf(sc, "eeprom command failed\n"); err = USB_ERR_IOERROR; break; } if ((err = lan78xx_read_reg(sc, ETH_E2P_DATA, &val)) != 0) goto done; buf[i] = (val & 0xff); } done: if (!locked) MUGE_UNLOCK(sc); - lan78xx_write_reg(sc, ETH_HW_CFG, saved); + if (sc->chipid == ETH_ID_REV_CHIP_ID_7800_) { + /* Restore saved LED configuration. */ + lan78xx_write_reg(sc, ETH_HW_CFG, saved); + } return (err); } /** * lan78xx_eeprom_read - Read EEPROM and confirm it is programmed * @sc: soft context * @off: the eeprom address offset * @buf: stores the bytes * @buflen: the number of bytes to read * * RETURNS: * 0 on success, or a USB_ERR_?? error code on failure. */ static int lan78xx_eeprom_read(struct muge_softc *sc, uint16_t off, uint8_t *buf, uint16_t buflen) { uint8_t sig; int ret; ret = lan78xx_eeprom_read_raw(sc, ETH_E2P_INDICATOR_OFFSET, &sig, 1); if ((ret == 0) && (sig == ETH_E2P_INDICATOR)) { ret = lan78xx_eeprom_read_raw(sc, off, buf, buflen); muge_dbg_printf(sc, "EEPROM present\n"); } else { ret = -EINVAL; muge_dbg_printf(sc, "EEPROM not present\n"); } return (ret); } /** * lan78xx_otp_read_raw * @sc: soft context * @off: the otp address offset * @buf: stores the bytes * @buflen: the number of bytes to read * * Simply reads bytes from the OTP. * * LOCKING: * The function takes and releases the device lock if not already held. * * RETURNS: * 0 on success, or a USB_ERR_?? error code on failure. * */ static int lan78xx_otp_read_raw(struct muge_softc *sc, uint16_t off, uint8_t *buf, uint16_t buflen) { int locked, err; uint32_t val; uint16_t i; locked = mtx_owned(&sc->sc_mtx); if (!locked) MUGE_LOCK(sc); err = lan78xx_read_reg(sc, OTP_PWR_DN, &val); /* Checking if bit is set. */ if (val & OTP_PWR_DN_PWRDN_N) { /* Clear it, then wait for it to be cleared. */ lan78xx_write_reg(sc, OTP_PWR_DN, 0); err = lan78xx_wait_for_bits(sc, OTP_PWR_DN, OTP_PWR_DN_PWRDN_N); if (err != 0) { muge_warn_printf(sc, "OTP off? failed to read data\n"); goto done; } } /* Start reading the bytes, one at a time. */ for (i = 0; i < buflen; i++) { err = lan78xx_write_reg(sc, OTP_ADDR1, ((off + i) >> 8) & OTP_ADDR1_15_11); err = lan78xx_write_reg(sc, OTP_ADDR2, ((off + i) & OTP_ADDR2_10_3)); err = lan78xx_write_reg(sc, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); err = lan78xx_write_reg(sc, OTP_CMD_GO, OTP_CMD_GO_GO_); err = lan78xx_wait_for_bits(sc, OTP_STATUS, OTP_STATUS_BUSY_); if (err != 0) { muge_warn_printf(sc, "OTP busy failed to read data\n"); goto done; } if ((err = lan78xx_read_reg(sc, OTP_RD_DATA, &val)) != 0) goto done; buf[i] = (uint8_t)(val & 0xff); } done: if (!locked) MUGE_UNLOCK(sc); return (err); } /** * lan78xx_otp_read * @sc: soft context * @off: the otp address offset * @buf: stores the bytes * @buflen: the number of bytes to read * * Simply reads bytes from the otp. * * LOCKING: * The function takes and releases device lock if it is not already held. * * RETURNS: * 0 on success, or a USB_ERR_?? error code on failure. */ static int lan78xx_otp_read(struct muge_softc *sc, uint16_t off, uint8_t *buf, uint16_t buflen) { uint8_t sig; int err; err = lan78xx_otp_read_raw(sc, OTP_INDICATOR_OFFSET, &sig, 1); if (err == 0) { if (sig == OTP_INDICATOR_1) { } else if (sig == OTP_INDICATOR_2) { off += 0x100; /* XXX */ } else { err = -EINVAL; } if (!err) err = lan78xx_otp_read_raw(sc, off, buf, buflen); } return (err); } /** * lan78xx_setmacaddress - Set the mac address in the device * @sc: driver soft context * @addr: pointer to array contain at least 6 bytes of the mac * * LOCKING: * Should be called with the MUGE lock held. * * RETURNS: * Returns 0 on success or a negative error code. */ static int lan78xx_setmacaddress(struct muge_softc *sc, const uint8_t *addr) { int err; uint32_t val; muge_dbg_printf(sc, "setting mac address to %02x:%02x:%02x:%02x:%02x:%02x\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); MUGE_LOCK_ASSERT(sc, MA_OWNED); val = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; if ((err = lan78xx_write_reg(sc, ETH_RX_ADDRL, val)) != 0) goto done; val = (addr[5] << 8) | addr[4]; err = lan78xx_write_reg(sc, ETH_RX_ADDRH, val); done: return (err); } /** * lan78xx_set_rx_max_frame_length * @sc: driver soft context * @size: pointer to array contain at least 6 bytes of the mac * * Sets the maximum frame length to be received. Frames bigger than * this size are aborted. * * RETURNS: * Returns 0 on success or a negative error code. */ static int lan78xx_set_rx_max_frame_length(struct muge_softc *sc, int size) { int err = 0; uint32_t buf; bool rxenabled; /* First we have to disable rx before changing the length. */ err = lan78xx_read_reg(sc, ETH_MAC_RX, &buf); rxenabled = ((buf & ETH_MAC_RX_EN_) != 0); if (rxenabled) { buf &= ~ETH_MAC_RX_EN_; err = lan78xx_write_reg(sc, ETH_MAC_RX, buf); } /* Setting max frame length. */ buf &= ~ETH_MAC_RX_MAX_FR_SIZE_MASK_; buf |= (((size + 4) << ETH_MAC_RX_MAX_FR_SIZE_SHIFT_) & ETH_MAC_RX_MAX_FR_SIZE_MASK_); err = lan78xx_write_reg(sc, ETH_MAC_RX, buf); /* If it were enabled before, we enable it back. */ if (rxenabled) { buf |= ETH_MAC_RX_EN_; err = lan78xx_write_reg(sc, ETH_MAC_RX, buf); } return (0); } /** * lan78xx_miibus_readreg - Read a MII/MDIO register * @dev: usb ether device * @phy: the number of phy reading from * @reg: the register address * * LOCKING: * Takes and releases the device mutex lock if not already held. * * RETURNS: * Returns the 16-bits read from the MII register, if this function fails * 0 is returned. */ static int lan78xx_miibus_readreg(device_t dev, int phy, int reg) { struct muge_softc *sc = device_get_softc(dev); int locked; uint32_t addr, val; val = 0; locked = mtx_owned(&sc->sc_mtx); if (!locked) MUGE_LOCK(sc); if (lan78xx_wait_for_bits(sc, ETH_MII_ACC, ETH_MII_ACC_MII_BUSY_) != 0) { muge_warn_printf(sc, "MII is busy\n"); goto done; } addr = (phy << 11) | (reg << 6) | ETH_MII_ACC_MII_READ_ | ETH_MII_ACC_MII_BUSY_; lan78xx_write_reg(sc, ETH_MII_ACC, addr); if (lan78xx_wait_for_bits(sc, ETH_MII_ACC, ETH_MII_ACC_MII_BUSY_) != 0) { muge_warn_printf(sc, "MII read timeout\n"); goto done; } lan78xx_read_reg(sc, ETH_MII_DATA, &val); val = le32toh(val); done: if (!locked) MUGE_UNLOCK(sc); return (val & 0xFFFF); } /** * lan78xx_miibus_writereg - Writes a MII/MDIO register * @dev: usb ether device * @phy: the number of phy writing to * @reg: the register address * @val: the value to write * * Attempts to write a PHY register through the usb controller registers. * * LOCKING: * Takes and releases the device mutex lock if not already held. * * RETURNS: * Always returns 0 regardless of success or failure. */ static int lan78xx_miibus_writereg(device_t dev, int phy, int reg, int val) { struct muge_softc *sc = device_get_softc(dev); int locked; uint32_t addr; if (sc->sc_phyno != phy) return (0); locked = mtx_owned(&sc->sc_mtx); if (!locked) MUGE_LOCK(sc); if (lan78xx_wait_for_bits(sc, ETH_MII_ACC, ETH_MII_ACC_MII_BUSY_) != 0) { muge_warn_printf(sc, "MII is busy\n"); goto done; } val = htole32(val); lan78xx_write_reg(sc, ETH_MII_DATA, val); addr = (phy << 11) | (reg << 6) | ETH_MII_ACC_MII_WRITE_ | ETH_MII_ACC_MII_BUSY_; lan78xx_write_reg(sc, ETH_MII_ACC, addr); if (lan78xx_wait_for_bits(sc, ETH_MII_ACC, ETH_MII_ACC_MII_BUSY_) != 0) muge_warn_printf(sc, "MII write timeout\n"); done: if (!locked) MUGE_UNLOCK(sc); return (0); } /* * lan78xx_miibus_statchg - Called to detect phy status change * @dev: usb ether device * * This function is called periodically by the system to poll for status * changes of the link. * * LOCKING: * Takes and releases the device mutex lock if not already held. */ static void lan78xx_miibus_statchg(device_t dev) { struct muge_softc *sc = device_get_softc(dev); struct mii_data *mii = uether_getmii(&sc->sc_ue); struct ifnet *ifp; int locked; int err; uint32_t flow = 0; uint32_t fct_flow = 0; locked = mtx_owned(&sc->sc_mtx); if (!locked) MUGE_LOCK(sc); ifp = uether_getifp(&sc->sc_ue); if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* Use the MII status to determine link status */ sc->sc_flags &= ~MUGE_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { muge_dbg_printf(sc, "media is active\n"); switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->sc_flags |= MUGE_FLAG_LINK; muge_dbg_printf(sc, "10/100 ethernet\n"); break; case IFM_1000_T: sc->sc_flags |= MUGE_FLAG_LINK; muge_dbg_printf(sc, "Gigabit ethernet\n"); break; default: break; } } /* Lost link, do nothing. */ if ((sc->sc_flags & MUGE_FLAG_LINK) == 0) { muge_dbg_printf(sc, "link flag not set\n"); goto done; } err = lan78xx_read_reg(sc, ETH_FCT_FLOW, &fct_flow); if (err) { muge_warn_printf(sc, "failed to read initial flow control thresholds, error %d\n", err); goto done; } /* Enable/disable full duplex operation and TX/RX pause. */ if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { muge_dbg_printf(sc, "full duplex operation\n"); /* Enable transmit MAC flow control function. */ if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) flow |= ETH_FLOW_CR_TX_FCEN_ | 0xFFFF; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) flow |= ETH_FLOW_CR_RX_FCEN_; } /* XXX Flow control settings obtained from Microchip's driver. */ switch(usbd_get_speed(sc->sc_ue.ue_udev)) { case USB_SPEED_SUPER: fct_flow = 0x817; break; case USB_SPEED_HIGH: fct_flow = 0x211; break; default: break; } err += lan78xx_write_reg(sc, ETH_FLOW, flow); err += lan78xx_write_reg(sc, ETH_FCT_FLOW, fct_flow); if (err) muge_warn_printf(sc, "media change failed, error %d\n", err); done: if (!locked) MUGE_UNLOCK(sc); } /* * lan78xx_set_mdix_auto - Configure the device to enable automatic * crossover and polarity detection. LAN7800 provides HP Auto-MDIX * functionality for seamless crossover and polarity detection. * * @sc: driver soft context * * LOCKING: * Takes and releases the device mutex lock if not already held. */ static void lan78xx_set_mdix_auto(struct muge_softc *sc) { uint32_t buf, err; err = lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MUGE_EXT_PAGE_ACCESS, MUGE_EXT_PAGE_SPACE_1); buf = lan78xx_miibus_readreg(sc->sc_ue.ue_dev, sc->sc_phyno, MUGE_EXT_MODE_CTRL); buf &= ~MUGE_EXT_MODE_CTRL_MDIX_MASK_; buf |= MUGE_EXT_MODE_CTRL_AUTO_MDIX_; lan78xx_miibus_readreg(sc->sc_ue.ue_dev, sc->sc_phyno, MII_BMCR); err += lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MUGE_EXT_MODE_CTRL, buf); err += lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MUGE_EXT_PAGE_ACCESS, MUGE_EXT_PAGE_SPACE_0); if (err != 0) muge_warn_printf(sc, "error setting PHY's MDIX status\n"); sc->sc_mdix_ctl = buf; } /** * lan78xx_phy_init - Initialises the in-built MUGE phy * @sc: driver soft context * * Resets the PHY part of the chip and then initialises it to default * values. The 'link down' and 'auto-negotiation complete' interrupts * from the PHY are also enabled, however we don't monitor the interrupt * endpoints for the moment. * * RETURNS: * Returns 0 on success or EIO if failed to reset the PHY. */ static int lan78xx_phy_init(struct muge_softc *sc) { muge_dbg_printf(sc, "Initializing PHY.\n"); uint16_t bmcr; usb_ticks_t start_ticks; const usb_ticks_t max_ticks = USB_MS_TO_TICKS(1000); MUGE_LOCK_ASSERT(sc, MA_OWNED); /* Reset phy and wait for reset to complete. */ lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MII_BMCR, BMCR_RESET); start_ticks = ticks; do { uether_pause(&sc->sc_ue, hz / 100); bmcr = lan78xx_miibus_readreg(sc->sc_ue.ue_dev, sc->sc_phyno, MII_BMCR); } while ((bmcr & BMCR_RESET) && ((ticks - start_ticks) < max_ticks)); if (((usb_ticks_t)(ticks - start_ticks)) >= max_ticks) { muge_err_printf(sc, "PHY reset timed-out\n"); return (EIO); } /* Setup phy to interrupt upon link down or autoneg completion. */ lan78xx_miibus_readreg(sc->sc_ue.ue_dev, sc->sc_phyno, MUGE_PHY_INTR_STAT); lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MUGE_PHY_INTR_MASK, (MUGE_PHY_INTR_ANEG_COMP | MUGE_PHY_INTR_LINK_CHANGE)); /* Enable Auto-MDIX for crossover and polarity detection. */ lan78xx_set_mdix_auto(sc); /* Enable all modes. */ lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MII_ANAR, ANAR_10 | ANAR_10_FD | ANAR_TX | ANAR_TX_FD | ANAR_CSMA | ANAR_FC | ANAR_PAUSE_ASYM); /* Restart auto-negotation. */ bmcr |= BMCR_STARTNEG; bmcr |= BMCR_AUTOEN; lan78xx_miibus_writereg(sc->sc_ue.ue_dev, sc->sc_phyno, MII_BMCR, bmcr); bmcr = lan78xx_miibus_readreg(sc->sc_ue.ue_dev, sc->sc_phyno, MII_BMCR); return (0); } /** * lan78xx_chip_init - Initialises the chip after power on * @sc: driver soft context * * This initialisation sequence is modelled on the procedure in the Linux * driver. * * RETURNS: * Returns 0 on success or an error code on failure. */ static int lan78xx_chip_init(struct muge_softc *sc) { int err; uint32_t buf; uint32_t burst_cap; MUGE_LOCK_ASSERT(sc, MA_OWNED); /* Enter H/W config mode. */ lan78xx_write_reg(sc, ETH_HW_CFG, ETH_HW_CFG_LRST_); if ((err = lan78xx_wait_for_bits(sc, ETH_HW_CFG, ETH_HW_CFG_LRST_)) != 0) { muge_warn_printf(sc, "timed-out waiting for lite reset to complete\n"); goto init_failed; } /* Set the mac address. */ if ((err = lan78xx_setmacaddress(sc, sc->sc_ue.ue_eaddr)) != 0) { muge_warn_printf(sc, "failed to set the MAC address\n"); goto init_failed; } /* Read and display the revision register. */ if ((err = lan78xx_read_reg(sc, ETH_ID_REV, &buf)) < 0) { muge_warn_printf(sc, "failed to read ETH_ID_REV (err = %d)\n", err); goto init_failed; } sc->chipid = (buf & ETH_ID_REV_CHIP_ID_MASK_) >> 16; sc->chiprev = buf & ETH_ID_REV_CHIP_REV_MASK_; - if (sc->chipid != ETH_ID_REV_CHIP_ID_7800_) { + switch (sc->chipid) { + case ETH_ID_REV_CHIP_ID_7800_: + case ETH_ID_REV_CHIP_ID_7850_: + break; + default: muge_warn_printf(sc, "Chip ID 0x%04x not yet supported\n", sc->chipid); goto init_failed; } device_printf(sc->sc_ue.ue_dev, "Chip ID 0x%04x rev %04x\n", sc->chipid, sc->chiprev); /* Respond to BULK-IN tokens with a NAK when RX FIFO is empty. */ if ((err = lan78xx_read_reg(sc, ETH_USB_CFG0, &buf)) != 0) { muge_warn_printf(sc, "failed to read ETH_USB_CFG0 (err=%d)\n", err); goto init_failed; } buf |= ETH_USB_CFG_BIR_; lan78xx_write_reg(sc, ETH_USB_CFG0, buf); /* * XXX LTM support will go here. */ /* Configuring the burst cap. */ switch (usbd_get_speed(sc->sc_ue.ue_udev)) { case USB_SPEED_SUPER: burst_cap = MUGE_DEFAULT_BURST_CAP_SIZE/MUGE_SS_USB_PKT_SIZE; break; case USB_SPEED_HIGH: burst_cap = MUGE_DEFAULT_BURST_CAP_SIZE/MUGE_HS_USB_PKT_SIZE; break; default: burst_cap = MUGE_DEFAULT_BURST_CAP_SIZE/MUGE_FS_USB_PKT_SIZE; } lan78xx_write_reg(sc, ETH_BURST_CAP, burst_cap); /* Set the default bulk in delay (same value from Linux driver). */ lan78xx_write_reg(sc, ETH_BULK_IN_DLY, MUGE_DEFAULT_BULK_IN_DELAY); /* Multiple ethernet frames per USB packets. */ err = lan78xx_read_reg(sc, ETH_HW_CFG, &buf); buf |= ETH_HW_CFG_MEF_; err = lan78xx_write_reg(sc, ETH_HW_CFG, buf); /* Enable burst cap. */ if ((err = lan78xx_read_reg(sc, ETH_USB_CFG0, &buf)) < 0) { muge_warn_printf(sc, "failed to read ETH_USB_CFG0 (err=%d)\n", err); goto init_failed; } buf |= ETH_USB_CFG_BCE_; err = lan78xx_write_reg(sc, ETH_USB_CFG0, buf); /* * Set FCL's RX and TX FIFO sizes: according to data sheet this is * already the default value. But we initialize it to the same value * anyways, as that's what the Linux driver does. * */ buf = (MUGE_MAX_RX_FIFO_SIZE - 512) / 512; err = lan78xx_write_reg(sc, ETH_FCT_RX_FIFO_END, buf); buf = (MUGE_MAX_TX_FIFO_SIZE - 512) / 512; err = lan78xx_write_reg(sc, ETH_FCT_TX_FIFO_END, buf); /* Enabling interrupts. (Not using them for now) */ err = lan78xx_write_reg(sc, ETH_INT_STS, ETH_INT_STS_CLEAR_ALL_); /* * Initializing flow control registers to 0. These registers are * properly set is handled in link-reset function in the Linux driver. */ err = lan78xx_write_reg(sc, ETH_FLOW, 0); err = lan78xx_write_reg(sc, ETH_FCT_FLOW, 0); /* * Settings for the RFE, we enable broadcast and destination address * perfect filtering. */ err = lan78xx_read_reg(sc, ETH_RFE_CTL, &buf); buf |= ETH_RFE_CTL_BCAST_EN_ | ETH_RFE_CTL_DA_PERFECT_; err = lan78xx_write_reg(sc, ETH_RFE_CTL, buf); /* * At this point the Linux driver writes multicast tables, and enables * checksum engines. But in FreeBSD that gets done in muge_init, * which gets called when the interface is brought up. */ /* Reset the PHY. */ lan78xx_write_reg(sc, ETH_PMT_CTL, ETH_PMT_CTL_PHY_RST_); if ((err = lan78xx_wait_for_bits(sc, ETH_PMT_CTL, ETH_PMT_CTL_PHY_RST_)) != 0) { muge_warn_printf(sc, "timed-out waiting for phy reset to complete\n"); goto init_failed; } - /* Enable automatic duplex detection and automatic speed detection. */ err = lan78xx_read_reg(sc, ETH_MAC_CR, &buf); - buf |= ETH_MAC_CR_AUTO_DUPLEX_ | ETH_MAC_CR_AUTO_SPEED_; + if (sc->chipid == ETH_ID_REV_CHIP_ID_7800_ && + !lan78xx_eeprom_present(sc)) { + /* Set automatic duplex and speed on LAN7800 without EEPROM. */ + buf |= ETH_MAC_CR_AUTO_DUPLEX_ | ETH_MAC_CR_AUTO_SPEED_; + } err = lan78xx_write_reg(sc, ETH_MAC_CR, buf); /* * Enable PHY interrupts (Not really getting used for now) * ETH_INT_EP_CTL: interrupt endpoint control register * phy events cause interrupts to be issued */ err = lan78xx_read_reg(sc, ETH_INT_EP_CTL, &buf); buf |= ETH_INT_ENP_PHY_INT; err = lan78xx_write_reg(sc, ETH_INT_EP_CTL, buf); /* * Enables mac's transmitter. It will transmit frames from the buffer * onto the cable. */ err = lan78xx_read_reg(sc, ETH_MAC_TX, &buf); buf |= ETH_MAC_TX_TXEN_; err = lan78xx_write_reg(sc, ETH_MAC_TX, buf); /* FIFO is capable of transmitting frames to MAC. */ err = lan78xx_read_reg(sc, ETH_FCT_TX_CTL, &buf); buf |= ETH_FCT_TX_CTL_EN_; err = lan78xx_write_reg(sc, ETH_FCT_TX_CTL, buf); /* * Set max frame length. In linux this is dev->mtu (which by default * is 1500) + VLAN_ETH_HLEN = 1518. */ err = lan78xx_set_rx_max_frame_length(sc, ETHER_MAX_LEN); /* Initialise the PHY. */ if ((err = lan78xx_phy_init(sc)) != 0) goto init_failed; /* Enable MAC RX. */ err = lan78xx_read_reg(sc, ETH_MAC_RX, &buf); buf |= ETH_MAC_RX_EN_; err = lan78xx_write_reg(sc, ETH_MAC_RX, buf); /* Enable FIFO controller RX. */ err = lan78xx_read_reg(sc, ETH_FCT_RX_CTL, &buf); buf |= ETH_FCT_TX_CTL_EN_; err = lan78xx_write_reg(sc, ETH_FCT_RX_CTL, buf); sc->sc_flags |= MUGE_FLAG_INIT_DONE; return (0); init_failed: muge_err_printf(sc, "lan78xx_chip_init failed (err=%d)\n", err); return (err); } static void muge_bulk_read_callback(struct usb_xfer *xfer, usb_error_t error) { struct muge_softc *sc = usbd_xfer_softc(xfer); struct usb_ether *ue = &sc->sc_ue; struct ifnet *ifp = uether_getifp(ue); struct mbuf *m; struct usb_page_cache *pc; uint16_t pktlen; uint32_t rx_cmd_a, rx_cmd_b; uint16_t rx_cmd_c; int off; int actlen; usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL); muge_dbg_printf(sc, "rx : actlen %d\n", actlen); switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: /* * There is always a zero length frame after bringing the * interface up. */ if (actlen < (sizeof(rx_cmd_a) + ETHER_CRC_LEN)) goto tr_setup; /* * There may be multiple packets in the USB frame. Each will * have a header and each needs to have its own mbuf allocated * and populated for it. */ pc = usbd_xfer_get_frame(xfer, 0); off = 0; while (off < actlen) { /* The frame header is aligned on a 4 byte boundary. */ off = ((off + 0x3) & ~0x3); /* Extract RX CMD A. */ if (off + sizeof(rx_cmd_a) > actlen) goto tr_setup; usbd_copy_out(pc, off, &rx_cmd_a, sizeof(rx_cmd_a)); off += (sizeof(rx_cmd_a)); rx_cmd_a = le32toh(rx_cmd_a); /* Extract RX CMD B. */ if (off + sizeof(rx_cmd_b) > actlen) goto tr_setup; usbd_copy_out(pc, off, &rx_cmd_b, sizeof(rx_cmd_b)); off += (sizeof(rx_cmd_b)); rx_cmd_b = le32toh(rx_cmd_b); /* Extract RX CMD C. */ if (off + sizeof(rx_cmd_c) > actlen) goto tr_setup; usbd_copy_out(pc, off, &rx_cmd_c, sizeof(rx_cmd_c)); off += (sizeof(rx_cmd_c)); rx_cmd_c = le32toh(rx_cmd_c); if (off > actlen) goto tr_setup; pktlen = (rx_cmd_a & RX_CMD_A_LEN_MASK_); muge_dbg_printf(sc, "rx_cmd_a 0x%08x rx_cmd_b 0x%08x rx_cmd_c 0x%04x " " pktlen %d actlen %d off %d\n", rx_cmd_a, rx_cmd_b, rx_cmd_c, pktlen, actlen, off); if (rx_cmd_a & RX_CMD_A_RED_) { muge_dbg_printf(sc, "rx error (hdr 0x%08x)\n", rx_cmd_a); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } else { /* Ethernet frame too big or too small? */ if ((pktlen < ETHER_HDR_LEN) || (pktlen > (actlen - off))) goto tr_setup; /* Create a new mbuf to store the packet. */ m = uether_newbuf(); if (m == NULL) { muge_warn_printf(sc, "failed to create new mbuf\n"); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); goto tr_setup; } usbd_copy_out(pc, off, mtod(m, uint8_t *), pktlen); /* * Check if RX checksums are computed, and * offload them */ if ((ifp->if_capabilities & IFCAP_RXCSUM) && !(rx_cmd_a & RX_CMD_A_ICSM_)) { struct ether_header *eh; eh = mtod(m, struct ether_header *); /* * Remove the extra 2 bytes of the csum * * The checksum appears to be * simplistically calculated over the * protocol headers up to the end of the * eth frame. Which means if the eth * frame is padded the csum calculation * is incorrectly performed over the * padding bytes as well. Therefore to * be safe we ignore the H/W csum on * frames less than or equal to * 64 bytes. * * Protocols checksummed: * TCP, UDP, ICMP, IGMP, IP */ if (pktlen > ETHER_MIN_LEN) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; /* * Copy the checksum from the * last 2 bytes of the transfer * and put in the csum_data * field. */ usbd_copy_out(pc, (off + pktlen), &m->m_pkthdr.csum_data, 2); /* * The data is copied in network * order, but the csum algorithm * in the kernel expects it to * be in host network order. */ m->m_pkthdr.csum_data = ntohs(m->m_pkthdr.csum_data); muge_dbg_printf(sc, "RX checksum offloaded (0x%04x)\n", m->m_pkthdr.csum_data); } } /* Enqueue the mbuf on the receive queue. */ if (pktlen < (4 + ETHER_HDR_LEN)) { m_freem(m); goto tr_setup; } /* Remove 4 trailing bytes */ uether_rxmbuf(ue, m, pktlen - 4); } /* * Update the offset to move to the next potential * packet. */ off += pktlen; } /* FALLTHROUGH */ case USB_ST_SETUP: tr_setup: usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer)); usbd_transfer_submit(xfer); uether_rxflush(ue); return; default: if (error != USB_ERR_CANCELLED) { muge_warn_printf(sc, "bulk read error, %s\n", usbd_errstr(error)); usbd_xfer_set_stall(xfer); goto tr_setup; } return; } } /** * muge_bulk_write_callback - Write callback used to send ethernet frame(s) * @xfer: the USB transfer * @error: error code if the transfers is in an errored state * * The main write function that pulls ethernet frames off the queue and * sends them out. * */ static void muge_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error) { struct muge_softc *sc = usbd_xfer_softc(xfer); struct ifnet *ifp = uether_getifp(&sc->sc_ue); struct usb_page_cache *pc; struct mbuf *m; int nframes; uint32_t frm_len = 0, tx_cmd_a = 0, tx_cmd_b = 0; switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: muge_dbg_printf(sc, "USB TRANSFER status: USB_ST_TRANSFERRED\n"); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* FALLTHROUGH */ case USB_ST_SETUP: muge_dbg_printf(sc, "USB TRANSFER status: USB_ST_SETUP\n"); tr_setup: if ((sc->sc_flags & MUGE_FLAG_LINK) == 0 || (ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { muge_dbg_printf(sc, "sc->sc_flags & MUGE_FLAG_LINK: %d\n", (sc->sc_flags & MUGE_FLAG_LINK)); muge_dbg_printf(sc, "ifp->if_drv_flags & IFF_DRV_OACTIVE: %d\n", (ifp->if_drv_flags & IFF_DRV_OACTIVE)); muge_dbg_printf(sc, "USB TRANSFER not sending: no link or controller is busy \n"); /* * Don't send anything if there is no link or * controller is busy. */ return; } for (nframes = 0; nframes < 16 && !IFQ_DRV_IS_EMPTY(&ifp->if_snd); nframes++) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; usbd_xfer_set_frame_offset(xfer, nframes * MCLBYTES, nframes); frm_len = 0; pc = usbd_xfer_get_frame(xfer, nframes); /* * Each frame is prefixed with two 32-bit values * describing the length of the packet and buffer. */ tx_cmd_a = (m->m_pkthdr.len & TX_CMD_A_LEN_MASK_) | TX_CMD_A_FCS_; tx_cmd_a = htole32(tx_cmd_a); usbd_copy_in(pc, 0, &tx_cmd_a, sizeof(tx_cmd_a)); tx_cmd_b = 0; /* TCP LSO Support will probably be implemented here. */ tx_cmd_b = htole32(tx_cmd_b); usbd_copy_in(pc, 4, &tx_cmd_b, sizeof(tx_cmd_b)); frm_len += 8; /* Next copy in the actual packet */ usbd_m_copy_in(pc, frm_len, m, 0, m->m_pkthdr.len); frm_len += m->m_pkthdr.len; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* * If there's a BPF listener, bounce a copy of this * frame to it. */ BPF_MTAP(ifp, m); m_freem(m); /* Set frame length. */ usbd_xfer_set_frame_len(xfer, nframes, frm_len); } muge_dbg_printf(sc, "USB TRANSFER nframes: %d\n", nframes); if (nframes != 0) { muge_dbg_printf(sc, "USB TRANSFER submit attempt\n"); usbd_xfer_set_frames(xfer, nframes); usbd_transfer_submit(xfer); ifp->if_drv_flags |= IFF_DRV_OACTIVE; } return; default: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (error != USB_ERR_CANCELLED) { muge_err_printf(sc, "usb error on tx: %s\n", usbd_errstr(error)); usbd_xfer_set_stall(xfer); goto tr_setup; } return; } } /** * muge_attach_post - Called after the driver attached to the USB interface * @ue: the USB ethernet device * * This is where the chip is intialised for the first time. This is * different from the muge_init() function in that that one is designed to * setup the H/W to match the UE settings and can be called after a reset. * */ static void muge_attach_post(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); uint32_t mac_h, mac_l; muge_dbg_printf(sc, "Calling muge_attach_post.\n"); /* Setup some of the basics */ sc->sc_phyno = 1; /* * Attempt to get the mac address, if an EEPROM is not attached this * will just return FF:FF:FF:FF:FF:FF, so in such cases we invent a MAC * address based on urandom. */ memset(sc->sc_ue.ue_eaddr, 0xff, ETHER_ADDR_LEN); uint32_t val; lan78xx_read_reg(sc, 0, &val); /* Read current MAC address from RX_ADDRx registers. */ if ((lan78xx_read_reg(sc, ETH_RX_ADDRL, &mac_l) == 0) && (lan78xx_read_reg(sc, ETH_RX_ADDRH, &mac_h) == 0)) { sc->sc_ue.ue_eaddr[5] = (uint8_t)((mac_h >> 8) & 0xff); sc->sc_ue.ue_eaddr[4] = (uint8_t)((mac_h) & 0xff); sc->sc_ue.ue_eaddr[3] = (uint8_t)((mac_l >> 24) & 0xff); sc->sc_ue.ue_eaddr[2] = (uint8_t)((mac_l >> 16) & 0xff); sc->sc_ue.ue_eaddr[1] = (uint8_t)((mac_l >> 8) & 0xff); sc->sc_ue.ue_eaddr[0] = (uint8_t)((mac_l) & 0xff); } /* If RX_ADDRx did not provide a valid MAC address, try EEPROM. */ if (!ETHER_IS_VALID(sc->sc_ue.ue_eaddr)) { if ((lan78xx_eeprom_read(sc, ETH_E2P_MAC_OFFSET, sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN) == 0) || (lan78xx_otp_read(sc, OTP_MAC_OFFSET, sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN) == 0)) { if (ETHER_IS_VALID(sc->sc_ue.ue_eaddr)) { muge_dbg_printf(sc, "MAC read from EEPROM\n"); } else { muge_dbg_printf(sc, "MAC assigned randomly\n"); read_random(sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN); sc->sc_ue.ue_eaddr[0] &= ~0x01; /* unicast */ sc->sc_ue.ue_eaddr[0] |= 0x02;/* locally administered */ } } else { muge_dbg_printf(sc, "MAC assigned randomly\n"); arc4rand(sc->sc_ue.ue_eaddr, ETHER_ADDR_LEN, 0); sc->sc_ue.ue_eaddr[0] &= ~0x01; /* unicast */ sc->sc_ue.ue_eaddr[0] |= 0x02; /* locally administered */ } } else { muge_dbg_printf(sc, "MAC assigned from registers\n"); } /* Initialise the chip for the first time */ lan78xx_chip_init(sc); } /** * muge_attach_post_sub - Called after attach to the USB interface * @ue: the USB ethernet device * * Most of this is boilerplate code and copied from the base USB ethernet * driver. It has been overriden so that we can indicate to the system * that the chip supports H/W checksumming. * * RETURNS: * Returns 0 on success or a negative error code. */ static int muge_attach_post_sub(struct usb_ether *ue) { struct muge_softc *sc; struct ifnet *ifp; int error; sc = uether_getsc(ue); muge_dbg_printf(sc, "Calling muge_attach_post_sub.\n"); ifp = ue->ue_ifp; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_start = uether_start; ifp->if_ioctl = muge_ioctl; ifp->if_init = uether_init; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); /* * The chip supports TCP/UDP checksum offloading on TX and RX paths, * however currently only RX checksum is supported in the driver * (see top of file). */ ifp->if_hwassist = 0; if (MUGE_DEFAULT_RX_CSUM_ENABLE) ifp->if_capabilities |= IFCAP_RXCSUM; if (MUGE_DEFAULT_TX_CSUM_ENABLE) ifp->if_capabilities |= IFCAP_TXCSUM; /* * In the Linux driver they also enable scatter/gather (NETIF_F_SG) * here, that's something related to socket buffers used in Linux. * FreeBSD doesn't have that as an interface feature. */ if (MUGE_DEFAULT_TSO_CSUM_ENABLE) ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; #if 0 /* TX checksuming is disabled since not yet implemented. */ ifp->if_capabilities |= IFCAP_TXCSUM; ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist = CSUM_TCP | CSUM_UDP; #endif ifp->if_capenable = ifp->if_capabilities; mtx_lock(&Giant); error = mii_attach(ue->ue_dev, &ue->ue_miibus, ifp, uether_ifmedia_upd, ue->ue_methods->ue_mii_sts, BMSR_DEFCAPMASK, sc->sc_phyno, MII_OFFSET_ANY, 0); mtx_unlock(&Giant); return (0); } /** * muge_start - Starts communication with the LAN78xx chip * @ue: USB ether interface */ static void muge_start(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); /* * Start the USB transfers, if not already started. */ usbd_transfer_start(sc->sc_xfer[MUGE_BULK_DT_RD]); usbd_transfer_start(sc->sc_xfer[MUGE_BULK_DT_WR]); } /** * muge_ioctl - ioctl function for the device * @ifp: interface pointer * @cmd: the ioctl command * @data: data passed in the ioctl call, typically a pointer to struct * ifreq. * * The ioctl routine is overridden to detect change requests for the H/W * checksum capabilities. * * RETURNS: * 0 on success and an error code on failure. */ static int muge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct usb_ether *ue = ifp->if_softc; struct muge_softc *sc; struct ifreq *ifr; int rc; int mask; int reinit; if (cmd == SIOCSIFCAP) { sc = uether_getsc(ue); ifr = (struct ifreq *)data; MUGE_LOCK(sc); rc = 0; reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; /* Modify the RX CSUM enable bits. */ if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; reinit = 1; } } MUGE_UNLOCK(sc); if (reinit) uether_init(ue); } else { rc = uether_ioctl(ifp, cmd, data); } return (rc); } /** * muge_reset - Reset the SMSC chip * @sc: device soft context * * LOCKING: * Should be called with the SMSC lock held. */ static void muge_reset(struct muge_softc *sc) { struct usb_config_descriptor *cd; usb_error_t err; cd = usbd_get_config_descriptor(sc->sc_ue.ue_udev); err = usbd_req_set_config(sc->sc_ue.ue_udev, &sc->sc_mtx, cd->bConfigurationValue); if (err) muge_warn_printf(sc, "reset failed (ignored)\n"); /* Wait a little while for the chip to get its brains in order. */ uether_pause(&sc->sc_ue, hz / 100); /* Reinitialize controller to achieve full reset. */ lan78xx_chip_init(sc); } /** * muge_set_addr_filter * * @sc: device soft context * @index: index of the entry to the perfect address table * @addr: address to be written * */ static void muge_set_addr_filter(struct muge_softc *sc, int index, uint8_t addr[ETHER_ADDR_LEN]) { uint32_t tmp; if ((sc) && (index > 0) && (index < MUGE_NUM_PFILTER_ADDRS_)) { tmp = addr[3]; tmp |= addr[2] | (tmp << 8); tmp |= addr[1] | (tmp << 8); tmp |= addr[0] | (tmp << 8); sc->sc_pfilter_table[index][1] = tmp; tmp = addr[5]; tmp |= addr[4] | (tmp << 8); tmp |= ETH_MAF_HI_VALID_ | ETH_MAF_HI_TYPE_DST_; sc->sc_pfilter_table[index][0] = tmp; } } /** * lan78xx_dataport_write - write to the selected RAM * @sc: The device soft context. * @ram_select: Select which RAM to access. * @addr: Starting address to write to. * @buf: word-sized buffer to write to RAM, starting at @addr. * @length: length of @buf * * * RETURNS: * 0 if write successful. */ static int lan78xx_dataport_write(struct muge_softc *sc, uint32_t ram_select, uint32_t addr, uint32_t length, uint32_t *buf) { uint32_t dp_sel; int i, ret; MUGE_LOCK_ASSERT(sc, MA_OWNED); ret = lan78xx_wait_for_bits(sc, ETH_DP_SEL, ETH_DP_SEL_DPRDY_); if (ret < 0) goto done; ret = lan78xx_read_reg(sc, ETH_DP_SEL, &dp_sel); dp_sel &= ~ETH_DP_SEL_RSEL_MASK_; dp_sel |= ram_select; ret = lan78xx_write_reg(sc, ETH_DP_SEL, dp_sel); for (i = 0; i < length; i++) { ret = lan78xx_write_reg(sc, ETH_DP_ADDR, addr + i); ret = lan78xx_write_reg(sc, ETH_DP_DATA, buf[i]); ret = lan78xx_write_reg(sc, ETH_DP_CMD, ETH_DP_CMD_WRITE_); ret = lan78xx_wait_for_bits(sc, ETH_DP_SEL, ETH_DP_SEL_DPRDY_); if (ret != 0) goto done; } done: return (ret); } /** * muge_multicast_write * @sc: device's soft context * * Writes perfect addres filters and hash address filters to their * corresponding registers and RAMs. * */ static void muge_multicast_write(struct muge_softc *sc) { int i, ret; lan78xx_dataport_write(sc, ETH_DP_SEL_RSEL_VLAN_DA_, ETH_DP_SEL_VHF_VLAN_LEN, ETH_DP_SEL_VHF_HASH_LEN, sc->sc_mchash_table); for (i = 1; i < MUGE_NUM_PFILTER_ADDRS_; i++) { ret = lan78xx_write_reg(sc, PFILTER_HI(i), 0); ret = lan78xx_write_reg(sc, PFILTER_LO(i), sc->sc_pfilter_table[i][1]); ret = lan78xx_write_reg(sc, PFILTER_HI(i), sc->sc_pfilter_table[i][0]); } } /** * muge_hash - Calculate the hash of a mac address * @addr: The mac address to calculate the hash on * * This function is used when configuring a range of multicast mac * addresses to filter on. The hash of the mac address is put in the * device's mac hash table. * * RETURNS: * Returns a value from 0-63 value which is the hash of the mac address. */ static inline uint32_t muge_hash(uint8_t addr[ETHER_ADDR_LEN]) { return (ether_crc32_be(addr, ETHER_ADDR_LEN) >> 26) & 0x3f; } /** * muge_setmulti - Setup multicast * @ue: usb ethernet device context * * Tells the device to either accept frames with a multicast mac address, * a select group of m'cast mac addresses or just the devices mac address. * * LOCKING: * Should be called with the MUGE lock held. */ static void muge_setmulti(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); struct ifnet *ifp = uether_getifp(ue); uint8_t i, *addr; struct ifmultiaddr *ifma; MUGE_LOCK_ASSERT(sc, MA_OWNED); sc->sc_rfe_ctl &= ~(ETH_RFE_CTL_UCAST_EN_ | ETH_RFE_CTL_MCAST_EN_ | ETH_RFE_CTL_DA_PERFECT_ | ETH_RFE_CTL_MCAST_HASH_); /* Initialize hash filter table. */ for (i = 0; i < ETH_DP_SEL_VHF_HASH_LEN; i++) sc->sc_mchash_table[i] = 0; /* Initialize perfect filter table. */ for (i = 1; i < MUGE_NUM_PFILTER_ADDRS_; i++) { sc->sc_pfilter_table[i][0] = sc->sc_pfilter_table[i][1] = 0; } sc->sc_rfe_ctl |= ETH_RFE_CTL_BCAST_EN_; if (ifp->if_flags & IFF_PROMISC) { muge_dbg_printf(sc, "promiscuous mode enabled\n"); sc->sc_rfe_ctl |= ETH_RFE_CTL_MCAST_EN_ | ETH_RFE_CTL_UCAST_EN_; } else if (ifp->if_flags & IFF_ALLMULTI){ muge_dbg_printf(sc, "receive all multicast enabled\n"); sc->sc_rfe_ctl |= ETH_RFE_CTL_MCAST_EN_; } else { /* Lock the mac address list before hashing each of them. */ if_maddr_rlock(ifp); if (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { i = 1; CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { /* First fill up the perfect address table. */ addr = LLADDR((struct sockaddr_dl *) ifma->ifma_addr); if (i < 33 /* XXX */) { muge_set_addr_filter(sc, i, addr); } else { uint32_t bitnum = muge_hash(addr); sc->sc_mchash_table[bitnum / 32] |= (1 << (bitnum % 32)); sc->sc_rfe_ctl |= ETH_RFE_CTL_MCAST_HASH_; } i++; } } if_maddr_runlock(ifp); muge_multicast_write(sc); } lan78xx_write_reg(sc, ETH_RFE_CTL, sc->sc_rfe_ctl); } /** * muge_setpromisc - Enables/disables promiscuous mode * @ue: usb ethernet device context * * LOCKING: * Should be called with the MUGE lock held. */ static void muge_setpromisc(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); struct ifnet *ifp = uether_getifp(ue); muge_dbg_printf(sc, "promiscuous mode %sabled\n", (ifp->if_flags & IFF_PROMISC) ? "en" : "dis"); MUGE_LOCK_ASSERT(sc, MA_OWNED); if (ifp->if_flags & IFF_PROMISC) sc->sc_rfe_ctl |= ETH_RFE_CTL_MCAST_EN_ | ETH_RFE_CTL_UCAST_EN_; else sc->sc_rfe_ctl &= ~(ETH_RFE_CTL_MCAST_EN_); lan78xx_write_reg(sc, ETH_RFE_CTL, sc->sc_rfe_ctl); } /** * muge_sethwcsum - Enable or disable H/W UDP and TCP checksumming * @sc: driver soft context * * LOCKING: * Should be called with the MUGE lock held. * * RETURNS: * Returns 0 on success or a negative error code. */ static int muge_sethwcsum(struct muge_softc *sc) { struct ifnet *ifp = uether_getifp(&sc->sc_ue); int err; if (!ifp) return (-EIO); MUGE_LOCK_ASSERT(sc, MA_OWNED); if (ifp->if_capabilities & IFCAP_RXCSUM) { sc->sc_rfe_ctl |= ETH_RFE_CTL_IGMP_COE_ | ETH_RFE_CTL_ICMP_COE_; sc->sc_rfe_ctl |= ETH_RFE_CTL_TCPUDP_COE_ | ETH_RFE_CTL_IP_COE_; } else { sc->sc_rfe_ctl &= ~(ETH_RFE_CTL_IGMP_COE_ | ETH_RFE_CTL_ICMP_COE_); sc->sc_rfe_ctl &= ~(ETH_RFE_CTL_TCPUDP_COE_ | ETH_RFE_CTL_IP_COE_); } sc->sc_rfe_ctl &= ~ETH_RFE_CTL_VLAN_FILTER_; err = lan78xx_write_reg(sc, ETH_RFE_CTL, sc->sc_rfe_ctl); if (err != 0) { muge_warn_printf(sc, "failed to write ETH_RFE_CTL (err=%d)\n", err); return (err); } return (0); } /** * muge_ifmedia_upd - Set media options * @ifp: interface pointer * * Basically boilerplate code that simply calls the mii functions to set * the media options. * * LOCKING: * The device lock must be held before this function is called. * * RETURNS: * Returns 0 on success or a negative error code. */ static int muge_ifmedia_upd(struct ifnet *ifp) { struct muge_softc *sc = ifp->if_softc; muge_dbg_printf(sc, "Calling muge_ifmedia_upd.\n"); struct mii_data *mii = uether_getmii(&sc->sc_ue); struct mii_softc *miisc; int err; MUGE_LOCK_ASSERT(sc, MA_OWNED); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); err = mii_mediachg(mii); return (err); } /** * muge_init - Initialises the LAN95xx chip * @ue: USB ether interface * * Called when the interface is brought up (i.e. ifconfig ue0 up), this * initialise the interface and the rx/tx pipes. * * LOCKING: * Should be called with the MUGE lock held. */ static void muge_init(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); muge_dbg_printf(sc, "Calling muge_init.\n"); struct ifnet *ifp = uether_getifp(ue); MUGE_LOCK_ASSERT(sc, MA_OWNED); if (lan78xx_setmacaddress(sc, IF_LLADDR(ifp))) muge_dbg_printf(sc, "setting MAC address failed\n"); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* Cancel pending I/O. */ muge_stop(ue); /* Reset the ethernet interface. */ muge_reset(sc); /* Load the multicast filter. */ muge_setmulti(ue); /* TCP/UDP checksum offload engines. */ muge_sethwcsum(sc); usbd_xfer_set_stall(sc->sc_xfer[MUGE_BULK_DT_WR]); /* Indicate we are up and running. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Switch to selected media. */ muge_ifmedia_upd(ifp); muge_start(ue); } /** * muge_stop - Stops communication with the LAN78xx chip * @ue: USB ether interface */ static void muge_stop(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); struct ifnet *ifp = uether_getifp(ue); MUGE_LOCK_ASSERT(sc, MA_OWNED); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->sc_flags &= ~MUGE_FLAG_LINK; /* * Stop all the transfers, if not already stopped. */ usbd_transfer_stop(sc->sc_xfer[MUGE_BULK_DT_WR]); usbd_transfer_stop(sc->sc_xfer[MUGE_BULK_DT_RD]); } /** * muge_tick - Called periodically to monitor the state of the LAN95xx chip * @ue: USB ether interface * * Simply calls the mii status functions to check the state of the link. * * LOCKING: * Should be called with the MUGE lock held. */ static void muge_tick(struct usb_ether *ue) { struct muge_softc *sc = uether_getsc(ue); struct mii_data *mii = uether_getmii(&sc->sc_ue); MUGE_LOCK_ASSERT(sc, MA_OWNED); mii_tick(mii); if ((sc->sc_flags & MUGE_FLAG_LINK) == 0) { lan78xx_miibus_statchg(ue->ue_dev); if ((sc->sc_flags & MUGE_FLAG_LINK) != 0) muge_start(ue); } } /** * muge_ifmedia_sts - Report current media status * @ifp: inet interface pointer * @ifmr: interface media request * * Call the mii functions to get the media status. * * LOCKING: * Internally takes and releases the device lock. */ static void muge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct muge_softc *sc = ifp->if_softc; struct mii_data *mii = uether_getmii(&sc->sc_ue); MUGE_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; MUGE_UNLOCK(sc); } /** * muge_probe - Probe the interface. * @dev: muge device handle * * Checks if the device is a match for this driver. * * RETURNS: * Returns 0 on success or an error code on failure. */ static int muge_probe(device_t dev) { struct usb_attach_arg *uaa = device_get_ivars(dev); if (uaa->usb_mode != USB_MODE_HOST) return (ENXIO); if (uaa->info.bConfigIndex != MUGE_CONFIG_INDEX) return (ENXIO); if (uaa->info.bIfaceIndex != MUGE_IFACE_IDX) return (ENXIO); return (usbd_lookup_id_by_uaa(lan78xx_devs, sizeof(lan78xx_devs), uaa)); } /** * muge_attach - Attach the interface. * @dev: muge device handle * * Allocate softc structures, do ifmedia setup and ethernet/BPF attach. * * RETURNS: * Returns 0 on success or a negative error code. */ static int muge_attach(device_t dev) { struct usb_attach_arg *uaa = device_get_ivars(dev); struct muge_softc *sc = device_get_softc(dev); struct usb_ether *ue = &sc->sc_ue; uint8_t iface_index; int err; sc->sc_flags = USB_GET_DRIVER_INFO(uaa); device_set_usb_desc(dev); mtx_init(&sc->sc_mtx, device_get_nameunit(dev), NULL, MTX_DEF); /* Setup the endpoints for the Microchip LAN78xx device. */ iface_index = MUGE_IFACE_IDX; err = usbd_transfer_setup(uaa->device, &iface_index, sc->sc_xfer, muge_config, MUGE_N_TRANSFER, sc, &sc->sc_mtx); if (err) { device_printf(dev, "error: allocating USB transfers failed\n"); goto err; } ue->ue_sc = sc; ue->ue_dev = dev; ue->ue_udev = uaa->device; ue->ue_mtx = &sc->sc_mtx; ue->ue_methods = &muge_ue_methods; err = uether_ifattach(ue); if (err) { device_printf(dev, "error: could not attach interface\n"); goto err_usbd; } /* Wait for lan78xx_chip_init from post-attach callback to complete. */ uether_ifattach_wait(ue); if (!(sc->sc_flags & MUGE_FLAG_INIT_DONE)) goto err_attached; return (0); err_attached: uether_ifdetach(ue); err_usbd: usbd_transfer_unsetup(sc->sc_xfer, MUGE_N_TRANSFER); err: mtx_destroy(&sc->sc_mtx); return (ENXIO); } /** * muge_detach - Detach the interface. * @dev: muge device handle * * RETURNS: * Returns 0. */ static int muge_detach(device_t dev) { struct muge_softc *sc = device_get_softc(dev); struct usb_ether *ue = &sc->sc_ue; usbd_transfer_unsetup(sc->sc_xfer, MUGE_N_TRANSFER); uether_ifdetach(ue); mtx_destroy(&sc->sc_mtx); return (0); } static device_method_t muge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, muge_probe), DEVMETHOD(device_attach, muge_attach), DEVMETHOD(device_detach, muge_detach), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), /* MII interface */ DEVMETHOD(miibus_readreg, lan78xx_miibus_readreg), DEVMETHOD(miibus_writereg, lan78xx_miibus_writereg), DEVMETHOD(miibus_statchg, lan78xx_miibus_statchg), DEVMETHOD_END }; static driver_t muge_driver = { .name = "muge", .methods = muge_methods, .size = sizeof(struct muge_softc), }; static devclass_t muge_devclass; DRIVER_MODULE(muge, uhub, muge_driver, muge_devclass, NULL, 0); DRIVER_MODULE(miibus, muge, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(muge, uether, 1, 1, 1); MODULE_DEPEND(muge, usb, 1, 1, 1); MODULE_DEPEND(muge, ether, 1, 1, 1); MODULE_DEPEND(muge, miibus, 1, 1, 1); MODULE_VERSION(muge, 1); USB_PNP_HOST_INFO(lan78xx_devs); Index: projects/pnfs-planb-server/sys/dev/usb/usbdevs =================================================================== --- projects/pnfs-planb-server/sys/dev/usb/usbdevs (revision 334966) +++ projects/pnfs-planb-server/sys/dev/usb/usbdevs (revision 334967) @@ -1,4893 +1,4894 @@ $FreeBSD$ /* $NetBSD: usbdevs,v 1.392 2004/12/29 08:38:44 imp Exp $ */ /*- * Copyright (c) 1998-2004 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * List of known USB vendors * * USB.org publishes a VID list of USB-IF member companies at * http://www.usb.org/developers/tools * Note that it does not show companies that have obtained a Vendor ID * without becoming full members. * * Please note that these IDs do not do anything. Adding an ID here and * regenerating the usbdevs.h and usbdevs_data.h only makes a symbolic name * available to the source code and does not change any functionality, nor * does it make your device available to a specific driver. * It will however make the descriptive string available if a device does not * provide the string itself. * * After adding a vendor ID VNDR and a product ID PRDCT you will have the * following extra defines: * #define USB_VENDOR_VNDR 0x???? * #define USB_PRODUCT_VNDR_PRDCT 0x???? * * You may have to add these defines to the respective probe routines to * make the device recognised by the appropriate device driver. */ vendor UNKNOWN1 0x0053 Unknown vendor vendor UNKNOWN2 0x0105 Unknown vendor vendor EGALAX2 0x0123 eGalax, Inc. vendor CHIPSBANK 0x0204 Chipsbank Microelectronics Co. vendor HUMAX 0x02ad HUMAX vendor QUAN 0x01e1 Quan vendor LTS 0x0386 LTS vendor BWCT 0x03da Bernd Walter Computer Technology vendor AOX 0x03e8 AOX vendor THESYS 0x03e9 Thesys vendor DATABROADCAST 0x03ea Data Broadcasting vendor ATMEL 0x03eb Atmel vendor IWATSU 0x03ec Iwatsu America vendor MITSUMI 0x03ee Mitsumi vendor HP 0x03f0 Hewlett Packard vendor GENOA 0x03f1 Genoa vendor OAK 0x03f2 Oak vendor ADAPTEC 0x03f3 Adaptec vendor DIEBOLD 0x03f4 Diebold vendor SIEMENSELECTRO 0x03f5 Siemens Electromechanical vendor EPSONIMAGING 0x03f8 Epson Imaging vendor KEYTRONIC 0x03f9 KeyTronic vendor OPTI 0x03fb OPTi vendor ELITEGROUP 0x03fc Elitegroup vendor XILINX 0x03fd Xilinx vendor FARALLON 0x03fe Farallon Communications vendor NATIONAL 0x0400 National Semiconductor vendor NATIONALREG 0x0401 National Registry vendor ACERLABS 0x0402 Acer Labs vendor FTDI 0x0403 Future Technology Devices vendor NCR 0x0404 NCR vendor SYNOPSYS2 0x0405 Synopsys vendor FUJITSUICL 0x0406 Fujitsu-ICL vendor FUJITSU2 0x0407 Fujitsu Personal Systems vendor QUANTA 0x0408 Quanta vendor NEC 0x0409 NEC vendor KODAK 0x040a Eastman Kodak vendor WELTREND 0x040b Weltrend Semiconductor vendor VIA 0x040d VIA vendor MCCI 0x040e MCCI vendor MELCO 0x0411 Melco vendor LEADTEK 0x0413 Leadtek vendor WINBOND 0x0416 Winbond vendor PHOENIX 0x041a Phoenix vendor CREATIVE 0x041e Creative Labs vendor NOKIA 0x0421 Nokia vendor ADI 0x0422 ADI Systems vendor CATC 0x0423 Computer Access Technology vendor SMC2 0x0424 Microchip (Standard Microsystems) vendor MOTOROLA_HK 0x0425 Motorola HK vendor GRAVIS 0x0428 Advanced Gravis Computer vendor CIRRUSLOGIC 0x0429 Cirrus Logic vendor INNOVATIVE 0x042c Innovative Semiconductors vendor MOLEX 0x042f Molex vendor SUN 0x0430 Sun Microsystems vendor UNISYS 0x0432 Unisys vendor TAUGA 0x0436 Taugagreining HF vendor AMD 0x0438 Advanced Micro Devices vendor LEXMARK 0x043d Lexmark International vendor LG 0x043e LG Electronics vendor NANAO 0x0440 NANAO vendor GATEWAY 0x0443 Gateway 2000 vendor NMB 0x0446 NMB vendor ALPS 0x044e Alps Electric vendor THRUST 0x044f Thrustmaster vendor TI 0x0451 Texas Instruments vendor ANALOGDEVICES 0x0456 Analog Devices vendor SIS 0x0457 Silicon Integrated Systems Corp. vendor KYE 0x0458 KYE Systems vendor DIAMOND2 0x045a Diamond (Supra) vendor RENESAS 0x045b Renesas vendor MICROSOFT 0x045e Microsoft vendor PRIMAX 0x0461 Primax Electronics vendor MGE 0x0463 MGE UPS Systems vendor AMP 0x0464 AMP vendor CHERRY 0x046a Cherry Mikroschalter vendor MEGATRENDS 0x046b American Megatrends vendor LOGITECH 0x046d Logitech vendor BTC 0x046e Behavior Tech. Computer vendor PHILIPS 0x0471 Philips vendor SUN2 0x0472 Sun Microsystems (official) vendor SANYO 0x0474 Sanyo Electric vendor SEAGATE 0x0477 Seagate vendor CONNECTIX 0x0478 Connectix vendor SEMTECH 0x047a Semtech vendor KENSINGTON 0x047d Kensington vendor LUCENT 0x047e Lucent vendor PLANTRONICS 0x047f Plantronics vendor KYOCERA 0x0482 Kyocera Wireless Corp. vendor STMICRO 0x0483 STMicroelectronics vendor FOXCONN 0x0489 Foxconn / Hon Hai vendor MEIZU 0x0492 Meizu Electronics vendor YAMAHA 0x0499 YAMAHA vendor COMPAQ 0x049f Compaq vendor HITACHI 0x04a4 Hitachi vendor ACERP 0x04a5 Acer Peripherals vendor DAVICOM 0x04a6 Davicom vendor VISIONEER 0x04a7 Visioneer vendor CANON 0x04a9 Canon vendor NIKON 0x04b0 Nikon vendor PAN 0x04b1 Pan International vendor IBM 0x04b3 IBM vendor CYPRESS 0x04b4 Cypress Semiconductor vendor ROHM 0x04b5 ROHM vendor COMPAL 0x04b7 Compal vendor EPSON 0x04b8 Seiko Epson vendor RAINBOW 0x04b9 Rainbow Technologies vendor IODATA 0x04bb I-O Data vendor TDK 0x04bf TDK vendor 3COMUSR 0x04c1 U.S. Robotics vendor METHODE 0x04c2 Methode Electronics Far East vendor MAXISWITCH 0x04c3 Maxi Switch vendor LOCKHEEDMER 0x04c4 Lockheed Martin Energy Research vendor FUJITSU 0x04c5 Fujitsu vendor TOSHIBAAM 0x04c6 Toshiba America vendor MICROMACRO 0x04c7 Micro Macro Technologies vendor KONICA 0x04c8 Konica vendor LITEON 0x04ca Lite-On Technology vendor FUJIPHOTO 0x04cb Fuji Photo Film vendor PHILIPSSEMI 0x04cc Philips Semiconductors vendor TATUNG 0x04cd Tatung Co. Of America vendor SCANLOGIC 0x04ce ScanLogic vendor MYSON 0x04cf Myson Technology vendor DIGI2 0x04d0 Digi vendor ITTCANON 0x04d1 ITT Canon vendor ALTEC 0x04d2 Altec Lansing vendor LSI 0x04d4 LSI vendor MENTORGRAPHICS 0x04d6 Mentor Graphics vendor ITUNERNET 0x04d8 I-Tuner Networks vendor HOLTEK 0x04d9 Holtek Semiconductor, Inc. vendor PANASONIC 0x04da Panasonic (Matsushita) vendor HUANHSIN 0x04dc Huan Hsin vendor SHARP 0x04dd Sharp vendor IIYAMA 0x04e1 Iiyama vendor SHUTTLE 0x04e6 Shuttle Technology vendor ELO 0x04e7 Elo TouchSystems vendor SAMSUNG 0x04e8 Samsung Electronics vendor NORTHSTAR 0x04eb Northstar vendor TOKYOELECTRON 0x04ec Tokyo Electron vendor ANNABOOKS 0x04ed Annabooks vendor JVC 0x04f1 JVC vendor CHICONY 0x04f2 Chicony Electronics vendor ELAN 0x04f3 ELAN Microelectronics vendor NEWNEX 0x04f7 Newnex vendor BROTHER 0x04f9 Brother Industries vendor DALLAS 0x04fa Dallas Semiconductor vendor AIPTEK2 0x04fc AIPTEK International vendor PFU 0x04fe PFU vendor FUJIKURA 0x0501 Fujikura/DDK vendor ACER 0x0502 Acer vendor 3COM 0x0506 3Com vendor HOSIDEN 0x0507 Hosiden Corporation vendor AZTECH 0x0509 Aztech Systems vendor BELKIN 0x050d Belkin Components vendor KAWATSU 0x050f Kawatsu Semiconductor vendor FCI 0x0514 FCI vendor LONGWELL 0x0516 Longwell vendor COMPOSITE 0x0518 Composite vendor STAR 0x0519 Star Micronics vendor APC 0x051d American Power Conversion vendor SCIATLANTA 0x051e Scientific Atlanta vendor TSM 0x0520 TSM vendor CONNECTEK 0x0522 Advanced Connectek USA vendor NETCHIP 0x0525 NetChip Technology vendor ALTRA 0x0527 ALTRA vendor ATI 0x0528 ATI Technologies vendor AKS 0x0529 Aladdin Knowledge Systems vendor TEKOM 0x052b Tekom vendor CANONDEV 0x052c Canon vendor WACOMTECH 0x0531 Wacom vendor INVENTEC 0x0537 Inventec vendor SHYHSHIUN 0x0539 Shyh Shiun Terminals vendor PREHWERKE 0x053a Preh Werke Gmbh & Co. KG vendor SYNOPSYS 0x053f Synopsys vendor UNIACCESS 0x0540 Universal Access vendor VIEWSONIC 0x0543 ViewSonic vendor XIRLINK 0x0545 Xirlink vendor ANCHOR 0x0547 Anchor Chips vendor SONY 0x054c Sony vendor FUJIXEROX 0x0550 Fuji Xerox vendor VISION 0x0553 VLSI Vision vendor ASAHIKASEI 0x0556 Asahi Kasei Microsystems vendor ATEN 0x0557 ATEN International vendor SAMSUNG2 0x055d Samsung Electronics vendor MUSTEK 0x055f Mustek Systems vendor TELEX 0x0562 Telex Communications vendor CHINON 0x0564 Chinon vendor PERACOM 0x0565 Peracom Networks vendor ALCOR2 0x0566 Alcor Micro vendor XYRATEX 0x0567 Xyratex vendor WACOM 0x056a WACOM vendor ETEK 0x056c e-TEK Labs vendor EIZO 0x056d EIZO vendor ELECOM 0x056e Elecom vendor CONEXANT 0x0572 Conexant vendor HAUPPAUGE 0x0573 Hauppauge Computer Works vendor BAFO 0x0576 BAFO/Quality Computer Accessories vendor YEDATA 0x057b Y-E Data vendor AVM 0x057c AVM vendor NINTENDO 0x057e Nintendo vendor QUICKSHOT 0x057f Quickshot vendor ROLAND 0x0582 Roland vendor ROCKFIRE 0x0583 Rockfire vendor RATOC 0x0584 RATOC Systems vendor ZYXEL 0x0586 ZyXEL Communication vendor INFINEON 0x058b Infineon vendor MICREL 0x058d Micrel vendor ALCOR 0x058f Alcor Micro vendor OMRON 0x0590 OMRON vendor ZORAN 0x0595 Zoran Microelectronics vendor NIIGATA 0x0598 Niigata vendor IOMEGA 0x059b Iomega vendor ATREND 0x059c A-Trend Technology vendor AID 0x059d Advanced Input Devices vendor LACIE 0x059f LaCie vendor FUJIFILM 0x05a2 Fuji Film vendor ARC 0x05a3 ARC vendor ORTEK 0x05a4 Ortek vendor CISCOLINKSYS3 0x05a6 Cisco-Linksys vendor BOSE 0x05a7 Bose vendor OMNIVISION 0x05a9 OmniVision vendor INSYSTEM 0x05ab In-System Design vendor APPLE 0x05ac Apple Computer vendor YCCABLE 0x05ad Y.C. Cable vendor DIGITALPERSONA 0x05ba DigitalPersona vendor 3G 0x05bc 3G Green Green Globe vendor RAFI 0x05bd RAFI vendor TYCO 0x05be Tyco vendor KAWASAKI 0x05c1 Kawasaki vendor DIGI 0x05c5 Digi International vendor QUALCOMM2 0x05c6 Qualcomm vendor QTRONIX 0x05c7 Qtronix vendor FOXLINK 0x05c8 Foxlink vendor RICOH 0x05ca Ricoh vendor ELSA 0x05cc ELSA vendor SCIWORX 0x05ce sci-worx vendor BRAINBOXES 0x05d1 Brainboxes Limited vendor ULTIMA 0x05d8 Ultima vendor AXIOHM 0x05d9 Axiohm Transaction Solutions vendor MICROTEK 0x05da Microtek vendor SUNTAC 0x05db SUN Corporation vendor LEXAR 0x05dc Lexar Media vendor ADDTRON 0x05dd Addtron vendor SYMBOL 0x05e0 Symbol Technologies vendor SYNTEK 0x05e1 Syntek vendor GENESYS 0x05e3 Genesys Logic vendor FUJI 0x05e5 Fuji Electric vendor KEITHLEY 0x05e6 Keithley Instruments vendor EIZONANAO 0x05e7 EIZO Nanao vendor KLSI 0x05e9 Kawasaki LSI vendor FFC 0x05eb FFC vendor ANKO 0x05ef Anko Electronic vendor PIENGINEERING 0x05f3 P.I. Engineering vendor AOC 0x05f6 AOC International vendor CHIC 0x05fe Chic Technology vendor BARCO 0x0600 Barco Display Systems vendor BRIDGE 0x0607 Bridge Information vendor SMK 0x0609 SMK vendor SOLIDYEAR 0x060b Solid Year vendor BIORAD 0x0614 Bio-Rad Laboratories vendor MACALLY 0x0618 Macally vendor ACTLABS 0x061c Act Labs vendor ALARIS 0x0620 Alaris vendor APEX 0x0624 Apex vendor CREATIVE3 0x062a Creative Labs vendor MICRON 0x0634 Micron Technology vendor VIVITAR 0x0636 Vivitar vendor GUNZE 0x0637 Gunze Electronics USA vendor AVISION 0x0638 Avision vendor TEAC 0x0644 TEAC vendor ACTON 0x0647 Acton Research Corp. vendor OPTO 0x065a Optoelectronics Co., Ltd vendor SGI 0x065e Silicon Graphics vendor SANWASUPPLY 0x0663 Sanwa Supply vendor MEGATEC 0x0665 Megatec vendor LINKSYS 0x066b Linksys vendor ACERSA 0x066e Acer Semiconductor America vendor SIGMATEL 0x066f Sigmatel vendor DRAYTEK 0x0675 DrayTek vendor AIWA 0x0677 Aiwa vendor ACARD 0x0678 ACARD Technology vendor PROLIFIC 0x067b Prolific Technology vendor SIEMENS 0x067c Siemens vendor AVANCELOGIC 0x0680 Avance Logic vendor SIEMENS2 0x0681 Siemens vendor MINOLTA 0x0686 Minolta vendor CHPRODUCTS 0x068e CH Products vendor HAGIWARA 0x0693 Hagiwara Sys-Com vendor CTX 0x0698 Chuntex vendor ASKEY 0x069a Askey Computer vendor SAITEK 0x06a3 Saitek vendor ALCATELT 0x06b9 Alcatel Telecom vendor AGFA 0x06bd AGFA-Gevaert vendor ASIAMD 0x06be Asia Microelectronic Development vendor BIZLINK 0x06c4 Bizlink International vendor KEYSPAN 0x06cd Keyspan / InnoSys Inc. vendor CONTEC 0x06ce Contec products vendor AASHIMA 0x06d6 Aashima Technology vendor LIEBERT 0x06da Liebert vendor MULTITECH 0x06e0 MultiTech vendor ADS 0x06e1 ADS Technologies vendor ALCATELM 0x06e4 Alcatel Microelectronics vendor SIRIUS 0x06ea Sirius Technologies vendor GUILLEMOT 0x06f8 Guillemot vendor BOSTON 0x06fd Boston Acoustics vendor SMC 0x0707 Standard Microsystems vendor PUTERCOM 0x0708 Putercom vendor MCT 0x0711 MCT vendor IMATION 0x0718 Imation vendor TECLAST 0x071b Teclast vendor SONYERICSSON 0x0731 Sony Ericsson vendor EICON 0x0734 Eicon Networks vendor MADCATZ 0x0738 Mad Catz, Inc. vendor SYNTECH 0x0745 Syntech Information vendor DIGITALSTREAM 0x074e Digital Stream vendor AUREAL 0x0755 Aureal Semiconductor vendor MAUDIO 0x0763 M-Audio vendor CYBERPOWER 0x0764 Cyber Power Systems, Inc. vendor SURECOM 0x0769 Surecom Technology vendor HIDGLOBAL 0x076b HID Global vendor LINKSYS2 0x077b Linksys vendor GRIFFIN 0x077d Griffin Technology vendor SANDISK 0x0781 SanDisk vendor JENOPTIK 0x0784 Jenoptik vendor LOGITEC 0x0789 Logitec vendor NOKIA2 0x078b Nokia vendor BRIMAX 0x078e Brimax vendor AXIS 0x0792 Axis Communications vendor ABL 0x0794 ABL Electronics vendor SAGEM 0x079b Sagem vendor SUNCOMM 0x079c Sun Communications, Inc. vendor ALFADATA 0x079d Alfadata Computer vendor NATIONALTECH 0x07a2 National Technical Systems vendor ONNTO 0x07a3 Onnto vendor BE 0x07a4 Be vendor ADMTEK 0x07a6 ADMtek vendor COREGA 0x07aa Corega vendor FREECOM 0x07ab Freecom vendor MICROTECH 0x07af Microtech vendor GENERALINSTMNTS 0x07b2 General Instruments (Motorola) vendor OLYMPUS 0x07b4 Olympus vendor ABOCOM 0x07b8 AboCom Systems vendor KINGSUN 0x07c0 KingSun vendor KEISOKUGIKEN 0x07c1 Keisokugiken vendor ONSPEC 0x07c4 OnSpec vendor APG 0x07c5 APG Cash Drawer vendor BUG 0x07c8 B.U.G. vendor ALLIEDTELESYN 0x07c9 Allied Telesyn International vendor AVERMEDIA 0x07ca AVerMedia Technologies vendor SIIG 0x07cc SIIG vendor CASIO 0x07cf CASIO vendor DLINK2 0x07d1 D-Link vendor APTIO 0x07d2 Aptio Products vendor ARASAN 0x07da Arasan Chip Systems vendor ALLIEDCABLE 0x07e6 Allied Cable vendor STSN 0x07ef STSN vendor BEWAN 0x07fa Bewan vendor CENTURY 0x07f7 Century Corp vendor NEWLINK 0x07ff NEWlink vendor MAGTEK 0x0801 Mag-Tek vendor ZOOM 0x0803 Zoom Telephonics vendor PCS 0x0810 Personal Communication Systems vendor SYNET 0x0812 Synet Electronics vendor ALPHASMART 0x081e AlphaSmart, Inc. vendor BROADLOGIC 0x0827 BroadLogic vendor HANDSPRING 0x082d Handspring vendor PALM 0x0830 Palm Computing vendor SOURCENEXT 0x0833 SOURCENEXT vendor ACTIONSTAR 0x0835 Action Star Enterprise vendor SAMSUNG_TECHWIN 0x0839 Samsung Techwin vendor ACCTON 0x083a Accton Technology vendor DIAMOND 0x0841 Diamond vendor NETGEAR 0x0846 BayNETGEAR vendor TOPRE 0x0853 Topre Corporation vendor ACTIVEWIRE 0x0854 ActiveWire vendor BBELECTRONICS 0x0856 B&B Electronics vendor PORTGEAR 0x085a PortGear vendor NETGEAR2 0x0864 Netgear vendor SYSTEMTALKS 0x086e System Talks vendor METRICOM 0x0870 Metricom vendor ADESSOKBTEK 0x087c ADESSO/Kbtek America vendor JATON 0x087d Jaton vendor APT 0x0880 APT Technologies vendor BOCARESEARCH 0x0885 Boca Research vendor ANDREA 0x08a8 Andrea Electronics vendor BURRBROWN 0x08bb Burr-Brown Japan vendor 2WIRE 0x08c8 2Wire vendor AIPTEK 0x08ca AIPTEK International vendor SMARTBRIDGES 0x08d1 SmartBridges vendor FUJITSUSIEMENS 0x08d4 Fujitsu-Siemens vendor BILLIONTON 0x08dd Billionton Systems vendor GEMALTO 0x08e6 Gemalto SA vendor EXTENDED 0x08e9 Extended Systems vendor MSYSTEMS 0x08ec M-Systems vendor DIGIANSWER 0x08fd Digianswer vendor AUTHENTEC 0x08ff AuthenTec vendor AUDIOTECHNICA 0x0909 Audio-Technica vendor TRUMPION 0x090a Trumpion Microelectronics vendor FEIYA 0x090c Feiya vendor ALATION 0x0910 Alation Systems vendor GLOBESPAN 0x0915 Globespan vendor CONCORDCAMERA 0x0919 Concord Camera vendor GARMIN 0x091e Garmin International vendor GOHUBS 0x0921 GoHubs vendor DYMO 0x0922 DYMO vendor XEROX 0x0924 Xerox vendor BIOMETRIC 0x0929 American Biometric Company vendor TOSHIBA 0x0930 Toshiba vendor PLEXTOR 0x093b Plextor vendor INTREPIDCS 0x093c Intrepid vendor YANO 0x094f Yano vendor KINGSTON 0x0951 Kingston Technology vendor BLUEWATER 0x0956 BlueWater Systems vendor AGILENT 0x0957 Agilent Technologies vendor GUDE 0x0959 Gude ADS vendor PORTSMITH 0x095a Portsmith vendor ACERW 0x0967 Acer vendor ADIRONDACK 0x0976 Adirondack Wire & Cable vendor BECKHOFF 0x0978 Beckhoff vendor MINDSATWORK 0x097a Minds At Work vendor ZIPPY 0x099a Zippy Technology Corporation vendor POINTCHIPS 0x09a6 PointChips vendor INTERSIL 0x09aa Intersil vendor TRIPPLITE2 0x09ae Tripp Lite vendor ALTIUS 0x09b3 Altius Solutions vendor ARRIS 0x09c1 Arris Interactive vendor ACTIVCARD 0x09c3 ACTIVCARD vendor ACTISYS 0x09c4 ACTiSYS vendor NOVATEL2 0x09d7 Novatel Wireless vendor AFOURTECH 0x09da A-FOUR TECH vendor AIMEX 0x09dc AIMEX vendor ADDONICS 0x09df Addonics Technologies vendor AKAI 0x09e8 AKAI professional M.I. vendor ARESCOM 0x09f5 ARESCOM vendor BAY 0x09f9 Bay Associates vendor ALTERA 0x09fb Altera vendor CSR 0x0a12 Cambridge Silicon Radio vendor TREK 0x0a16 Trek Technology vendor ASAHIOPTICAL 0x0a17 Asahi Optical vendor BOCASYSTEMS 0x0a43 Boca Systems vendor SHANTOU 0x0a46 ShanTou vendor MEDIAGEAR 0x0a48 MediaGear vendor BROADCOM 0x0a5c Broadcom vendor GREENHOUSE 0x0a6b GREENHOUSE vendor MEDELI 0x0a67 Medeli vendor GEOCAST 0x0a79 Geocast Network Systems vendor EGO 0x0a92 EGO systems vendor IDQUANTIQUE 0x0aba ID Quantique vendor IDTECH 0x0acd ID TECH vendor ZYDAS 0x0ace Zydas Technology Corporation vendor NEODIO 0x0aec Neodio vendor OPTION 0x0af0 Option N.V. vendor ASUS 0x0b05 ASUSTeK Computer vendor TODOS 0x0b0c Todos Data System vendor SIIG2 0x0b39 SIIG vendor TEKRAM 0x0b3b Tekram Technology vendor HAL 0x0b41 HAL Corporation vendor EMS 0x0b43 EMS Production vendor NEC2 0x0b62 NEC vendor ADLINK 0x0b63 ADLINK Technoligy, Inc. vendor ATI2 0x0b6f ATI Technologies vendor ZEEVO 0x0b7a Zeevo, Inc. vendor KURUSUGAWA 0x0b7e Kurusugawa Electronics, Inc. vendor SMART 0x0b8c Smart Technologies vendor ASIX 0x0b95 ASIX Electronics vendor O2MICRO 0x0b97 O2 Micro, Inc. vendor USR 0x0baf U.S. Robotics vendor AMBIT 0x0bb2 Ambit Microsystems vendor HTC 0x0bb4 HTC vendor REALTEK 0x0bda Realtek vendor ERICSSON2 0x0bdb Ericsson vendor MEI 0x0bed MEI vendor ADDONICS2 0x0bf6 Addonics Technology vendor FSC 0x0bf8 Fujitsu Siemens Computers vendor AGATE 0x0c08 Agate Technologies vendor DMI 0x0c0b DMI vendor CANYON 0x0c10 Canyon vendor ICOM 0x0c26 Icom Inc. vendor GNOTOMETRICS 0x0c33 GN Otometrics vendor CHICONY2 0x0c45 Chicony / Microdia / Sonix Technology Co., Ltd. vendor REINERSCT 0x0c4b Reiner-SCT vendor SEALEVEL 0x0c52 Sealevel System vendor JETI 0x0c6c Jeti vendor LUWEN 0x0c76 Luwen vendor ELEKTOR 0x0c7d ELEKTOR Electronics vendor KYOCERA2 0x0c88 Kyocera Wireless Corp. vendor ZCOM 0x0cde Z-Com vendor ATHEROS2 0x0cf3 Atheros Communications vendor POSIFLEX 0x0d3a POSIFLEX vendor TANGTOP 0x0d3d Tangtop vendor KOBIL 0x0d46 KOBIL vendor SMC3 0x0d5c Standard Microsystems vendor ADDON 0x0d7d Add-on Technology vendor ACDC 0x0d7e American Computer & Digital Components vendor CMEDIA 0x0d8c CMEDIA vendor CONCEPTRONIC 0x0d8e Conceptronic vendor SKANHEX 0x0d96 Skanhex Technology, Inc. vendor MSI 0x0db0 Micro Star International vendor ELCON 0x0db7 ELCON Systemtechnik vendor UNKNOWN4 0x0dcd Unknown vendor vendor NETAC 0x0dd8 Netac vendor SITECOMEU 0x0df6 Sitecom Europe vendor MOBILEACTION 0x0df7 Mobile Action vendor AMIGO 0x0e0b Amigo Technology vendor SPEEDDRAGON 0x0e55 Speed Dragon Multimedia vendor HAWKING 0x0e66 Hawking vendor FOSSIL 0x0e67 Fossil, Inc vendor GMATE 0x0e7e G.Mate, Inc vendor MEDIATEK 0x0e8d MediaTek, Inc. vendor OTI 0x0ea0 Ours Technology vendor YISO 0x0eab Yiso Wireless Co. vendor PILOTECH 0x0eaf Pilotech vendor NOVATECH 0x0eb0 NovaTech vendor ITEGNO 0x0eba iTegno vendor WINMAXGROUP 0x0ed1 WinMaxGroup vendor TOD 0x0ede TOD vendor EGALAX 0x0eef eGalax, Inc. vendor AIRPRIME 0x0f3d AirPrime, Inc. vendor MICROTUNE 0x0f4d Microtune vendor VTECH 0x0f88 VTech vendor FALCOM 0x0f94 Falcom Wireless Communications GmbH vendor RIM 0x0fca Research In Motion vendor DYNASTREAM 0x0fcf Dynastream Innovations vendor LARSENBRUSGAARD 0x0fd8 Larsen and Brusgaard vendor OWL 0x0fde OWL vendor KONTRON 0x0fe6 Kontron AG vendor DVICO 0x0fe9 DViCO vendor QUALCOMM 0x1004 Qualcomm vendor APACER 0x1005 Apacer vendor MOTOROLA4 0x100d Motorola vendor HP3 0x103c Hewlett Packard vendor AIRPLUS 0x1011 Airplus vendor DESKNOTE 0x1019 Desknote vendor AMD2 0x1022 Advanced Micro Devices vendor NEC3 0x1033 NEC vendor TTI 0x103e Thurlby Thandar Instruments vendor GIGABYTE 0x1044 GIGABYTE vendor WESTERN 0x1058 Western Digital vendor MOTOROLA 0x1063 Motorola vendor CCYU 0x1065 CCYU Technology vendor CURITEL 0x106c Curitel Communications Inc vendor SILABS2 0x10a6 SILABS2 vendor USI 0x10ab USI vendor HONEYWELL 0x10ac Honeywell vendor LIEBERT2 0x10af Liebert vendor PLX 0x10b5 PLX vendor ASANTE 0x10bd Asante vendor SILABS 0x10c4 Silicon Labs vendor SILABS3 0x10c5 Silicon Labs vendor SILABS4 0x10ce Silicon Labs vendor ACTIONS 0x10d6 Actions vendor MOXA 0x110a Moxa vendor ANALOG 0x1110 Analog Devices vendor TENX 0x1130 Ten X Technology, Inc. vendor ISSC 0x1131 Integrated System Solution Corp. vendor JRC 0x1145 Japan Radio Company vendor SPHAIRON 0x114b Sphairon Access Systems GmbH vendor DELORME 0x1163 DeLorme vendor SERVERWORKS 0x1166 ServerWorks vendor DLINK3 0x1186 Dlink vendor ACERCM 0x1189 Acer Communications & Multimedia vendor SIERRA 0x1199 Sierra Wireless vendor SANWA 0x11ad Sanwa Electric Instrument Co., Ltd. vendor TOPFIELD 0x11db Topfield Co., Ltd vendor SIEMENS3 0x11f5 Siemens vendor NETINDEX 0x11f6 NetIndex vendor ALCATEL 0x11f7 Alcatel vendor INTERBIOMETRICS 0x1209 Interbiometrics vendor FUJITSU3 0x1221 Fujitsu Ltd. vendor UNKNOWN3 0x1233 Unknown vendor vendor TSUNAMI 0x1241 Tsunami vendor PHEENET 0x124a Pheenet vendor TARGUS 0x1267 Targus vendor TWINMOS 0x126f TwinMOS vendor TENDA 0x1286 Tenda vendor TESTO 0x128d Testo products vendor CREATIVE2 0x1292 Creative Labs vendor BELKIN2 0x1293 Belkin Components vendor CYBERTAN 0x129b CyberTAN Technology vendor HUAWEI 0x12d1 Huawei Technologies vendor ARANEUS 0x12d8 Araneus Information Systems vendor TAPWAVE 0x12ef Tapwave vendor AINCOMM 0x12fd Aincomm vendor MOBILITY 0x1342 Mobility vendor DICKSMITH 0x1371 Dick Smith Electronics vendor NETGEAR3 0x1385 Netgear vendor VALIDITY 0x138a Validity Sensors, Inc. vendor BALTECH 0x13ad Baltech vendor CISCOLINKSYS 0x13b1 Cisco-Linksys vendor SHARK 0x13d2 Shark vendor AZUREWAVE 0x13d3 AsureWave vendor INITIO 0x13fd Initio Corporation vendor EMTEC 0x13fe Emtec vendor NOVATEL 0x1410 Novatel Wireless vendor OMNIVISION2 0x1415 OmniVision Technologies, Inc. vendor MERLIN 0x1416 Merlin vendor REDOCTANE 0x1430 RedOctane vendor WISTRONNEWEB 0x1435 Wistron NeWeb vendor RADIOSHACK 0x1453 Radio Shack vendor FIC 0x1457 FIC / OpenMoko vendor HUAWEI3COM 0x1472 Huawei-3Com vendor ABOCOM2 0x1482 AboCom Systems vendor SILICOM 0x1485 Silicom vendor RALINK 0x148f Ralink Technology vendor IMAGINATION 0x149a Imagination Technologies vendor ATP 0x14af ATP Electronics vendor CONCEPTRONIC2 0x14b2 Conceptronic vendor SUPERTOP 0x14cd Super Top vendor PLANEX3 0x14ea Planex Communications vendor SILICONPORTALS 0x1527 Silicon Portals vendor UBIQUAM 0x1529 UBIQUAM Co., Ltd. vendor JMICRON 0x152d JMicron vendor UBLOX 0x1546 U-blox vendor PNY 0x154b PNY vendor OWEN 0x1555 Owen vendor OQO 0x1557 OQO vendor UMEDIA 0x157e U-MEDIA Communications vendor FIBERLINE 0x1582 Fiberline vendor FREESCALE 0x15a2 Freescale Semiconductor, Inc. vendor AFATECH 0x15a4 Afatech Technologies, Inc. vendor SPARKLAN 0x15a9 SparkLAN vendor OLIMEX 0x15ba Olimex vendor SOUNDGRAPH 0x15c2 Soundgraph, Inc. vendor AMIT2 0x15c5 AMIT vendor TEXTECH 0x15ca Textech International Ltd. vendor SOHOWARE 0x15e8 SOHOware vendor ABIT 0x15eb ABIT Corporation vendor UMAX 0x1606 UMAX Data Systems vendor INSIDEOUT 0x1608 Inside Out Networks vendor AMOI 0x1614 Amoi Electronics vendor GOODWAY 0x1631 Good Way Technology vendor ENTREGA 0x1645 Entrega vendor ACTIONTEC 0x1668 Actiontec Electronics vendor CLIPSAL 0x166a Clipsal vendor CISCOLINKSYS2 0x167b Cisco-Linksys vendor ATHEROS 0x168c Atheros Communications vendor GIGASET 0x1690 Gigaset vendor GLOBALSUN 0x16ab Global Sun Technology vendor ANYDATA 0x16d5 AnyDATA Corporation vendor JABLOTRON 0x16d6 Jablotron vendor CMOTECH 0x16d8 C-motech vendor WIENERPLEINBAUS 0x16dc WIENER Plein & Baus GmbH. vendor AXESSTEL 0x1726 Axesstel Co., Ltd. vendor LINKSYS4 0x1737 Linksys vendor SENAO 0x1740 Senao vendor ASUS2 0x1761 ASUS vendor SWEEX2 0x177f Sweex vendor METAGEEK 0x1781 MetaGeek vendor KAMSTRUP 0x17a8 Kamstrup A/S vendor MISC 0x1781 Misc Vendors vendor DISPLAYLINK 0x17e9 DisplayLink vendor LENOVO 0x17ef Lenovo vendor WAVESENSE 0x17f4 WaveSense vendor VAISALA 0x1843 Vaisala vendor E3C 0x18b4 E3C Technologies vendor AMIT 0x18c5 AMIT vendor GOOGLE 0x18d1 Google vendor QCOM 0x18e8 Qcom vendor ELV 0x18ef ELV vendor LINKSYS3 0x1915 Linksys vendor MEINBERG 0x1938 Meinberg Funkuhren vendor BECEEM 0x198f Beceem Communications +vendor ZTE 0x19d2 ZTE vendor QUALCOMMINC 0x19d2 Qualcomm, Incorporated vendor QUALCOMM3 0x19f5 Qualcomm, Inc. vendor QUANTA2 0x1a32 Quanta vendor TERMINUS 0x1a40 Terminus Technology vendor ABBOTT 0x1a61 Abbott Diabetics vendor BAYER 0x1a79 Bayer vendor WCH2 0x1a86 QinHeng Electronics vendor STELERA 0x1a8d Stelera Wireless vendor SEL 0x1adb Schweitzer Engineering Laboratories vendor CORSAIR 0x1b1c Corsair vendor ASM 0x1b21 ASMedia Technology vendor MATRIXORBITAL 0x1b3d Matrix Orbital vendor OVISLINK 0x1b75 OvisLink vendor TML 0x1b91 The Mobility Lab vendor TCTMOBILE 0x1bbb TCT Mobile vendor ALTI2 0x1bc9 Alti-2 products vendor SUNPLUS 0x1bcf Sunplus Innovation Technology Inc. vendor WAGO 0x1be3 WAGO Kontakttechnik GmbH. vendor TELIT 0x1bc7 Telit vendor IONICS 0x1c0c Ionics PlugComputer vendor LONGCHEER 0x1c9e Longcheer Holdings, Ltd. vendor MPMAN 0x1cae MpMan vendor DRESDENELEKTRONIK 0x1cf1 dresden elektronik vendor NEOTEL 0x1d09 Neotel vendor DREAMLINK 0x1d34 Dream Link vendor PEGATRON 0x1d4d Pegatron vendor QISDA 0x1da5 Qisda vendor METAGEEK2 0x1dd5 MetaGeek vendor ALINK 0x1e0e Alink vendor AIRTIES 0x1eda AirTies vendor FESTO 0x1e29 Festo vendor LAKESHORE 0x1fb9 Lake Shore Cryotronics, Inc. vendor VERTEX 0x1fe7 Vertex Wireless Co., Ltd. vendor DLINK 0x2001 D-Link vendor PLANEX2 0x2019 Planex Communications vendor HAUPPAUGE2 0x2040 Hauppauge Computer Works vendor TLAYTECH 0x20b9 Tlay Tech vendor ENCORE 0x203d Encore vendor QIHARDWARE 0x20b7 QI-hardware vendor PARA 0x20b8 PARA Industrial vendor SIMTEC 0x20df Simtec Electronics vendor TRENDNET 0x20f4 TRENDnet vendor RTSYSTEMS 0x2100 RT Systems vendor DLINK4 0x2101 D-Link vendor INTENSO 0x2109 INTENSO vendor VIALABS 0x2109 VIA Labs vendor ERICSSON 0x2282 Ericsson vendor MOTOROLA2 0x22b8 Motorola vendor WETELECOM 0x22de WeTelecom vendor PINNACLE 0x2304 Pinnacle Systems vendor ARDUINO 0x2341 Arduino SA vendor TPLINK 0x2357 TP-Link vendor WESTMOUNTAIN 0x2405 West Mountain Radio vendor TRIPPLITE 0x2478 Tripp-Lite vendor HIROSE 0x2631 Hirose Electric vendor NHJ 0x2770 NHJ vendor THINGM 0x27b8 ThingM vendor PERASO 0x2932 Peraso Technologies, Inc. vendor PLANEX 0x2c02 Planex Communications vendor QUECTEL 0x2c7c Quectel Wireless Solutions vendor VIDZMEDIA 0x3275 VidzMedia Pte Ltd vendor LINKINSTRUMENTS 0x3195 Link Instruments Inc. vendor AEI 0x3334 AEI vendor HANK 0x3353 Hank Connection vendor PQI 0x3538 PQI vendor DAISY 0x3579 Daisy Technology vendor NI 0x3923 National Instruments vendor MICRONET 0x3980 Micronet Communications vendor IODATA2 0x40bb I-O Data vendor IRIVER 0x4102 iRiver vendor DELL 0x413c Dell vendor WCH 0x4348 QinHeng Electronics vendor ACEECA 0x4766 Aceeca vendor FEIXUN 0x4855 FeiXun Communication vendor PAPOUCH 0x5050 Papouch products vendor AVERATEC 0x50c2 Averatec vendor SWEEX 0x5173 Sweex vendor PROLIFIC2 0x5372 Prolific Technologies vendor ONSPEC2 0x55aa OnSpec Electronic Inc. vendor ZINWELL 0x5a57 Zinwell vendor INGENIC 0x601a Ingenic Semiconductor Ltd. vendor SITECOM 0x6189 Sitecom vendor SPRINGERDESIGN 0x6400 Springer Design, Inc. vendor ARKMICRO 0x6547 Arkmicro Technologies Inc. vendor 3COM2 0x6891 3Com vendor EDIMAX 0x7392 Edimax vendor INTEL 0x8086 Intel vendor INTEL2 0x8087 Intel vendor ALLWIN 0x8516 ALLWIN Tech vendor SITECOM2 0x9016 Sitecom vendor MOSCHIP 0x9710 MosChip Semiconductor vendor NETGEAR4 0x9846 Netgear vendor MARVELL 0x9e88 Marvell Technology Group Ltd. vendor 3COM3 0xa727 3Com vendor CACE 0xcace CACE Technologies vendor COMPARE 0xcdab Compare vendor DATAAPEX 0xdaae DataApex vendor EVOLUTION 0xdeee Evolution Robotics vendor EMPIA 0xeb1a eMPIA Technology vendor HP2 0xf003 Hewlett Packard vendor LOGILINK 0xfc08 LogiLink vendor USRP 0xfffe GNU Radio USRP /* * List of known products. Grouped by vendor. */ /* 3Com products */ product 3COM HOMECONN 0x009d HomeConnect USB Camera product 3COM 3CREB96 0x00a0 Bluetooth USB Adapter product 3COM 3C19250 0x03e8 3C19250 Ethernet Adapter product 3COM 3CRSHEW696 0x0a01 3CRSHEW696 Wireless Adapter product 3COM 3C460 0x11f8 HomeConnect 3C460 product 3COM USR56K 0x3021 U.S.Robotics 56000 Voice FaxModem Pro product 3COM 3C460B 0x4601 HomeConnect 3C460B product 3COM2 3CRUSB10075 0xa727 3CRUSB10075 product 3COM3 AR5523_1 0x6893 AR5523 product 3COM3 AR5523_2 0x6895 AR5523 product 3COM3 AR5523_3 0x6897 AR5523 product 3COMUSR OFFICECONN 0x0082 3Com OfficeConnect Analog Modem product 3COMUSR USRISDN 0x008f 3Com U.S. Robotics Pro ISDN TA product 3COMUSR HOMECONN 0x009d 3Com HomeConnect Camera product 3COMUSR USR56K 0x3021 U.S. Robotics 56000 Voice FaxModem Pro /* Abbott Diabetics */ product ABBOTT STEREO_PLUG 0x3410 Abbott Diabetics Stereo Plug product ABBOTT STRIP_PORT 0x3420 Abbott Diabetics Strip Port /* ABIT products */ product ABIT AK_020 0x7d0e 3G modem product ACDC HUB 0x2315 USB Pen Drive HUB product ACDC SECWRITE 0x2316 USB Pen Drive Secure Write product ACDC PEN 0x2317 USB Pen Drive with Secure Write /* AboCom products */ product ABOCOM XX1 0x110c XX1 product ABOCOM XX2 0x200c XX2 product ABOCOM RT2770 0x2770 RT2770 product ABOCOM RT2870 0x2870 RT2870 product ABOCOM RT3070 0x3070 RT3070 product ABOCOM RT3071 0x3071 RT3071 product ABOCOM RT3072 0x3072 RT3072 product ABOCOM2 RT2870_1 0x3c09 RT2870 product ABOCOM URE450 0x4000 URE450 Ethernet Adapter product ABOCOM UFE1000 0x4002 UFE1000 Fast Ethernet Adapter product ABOCOM DSB650TX_PNA 0x4003 1/10/100 Ethernet Adapter product ABOCOM XX4 0x4004 XX4 product ABOCOM XX5 0x4007 XX5 product ABOCOM XX6 0x400b XX6 product ABOCOM XX7 0x400c XX7 product ABOCOM RTL8151 0x401a RTL8151 product ABOCOM XX8 0x4102 XX8 product ABOCOM XX9 0x4104 XX9 product ABOCOM UF200 0x420a UF200 Ethernet product ABOCOM WL54 0x6001 WL54 product ABOCOM XX10 0xabc1 XX10 product ABOCOM BWU613 0xb000 BWU613 product ABOCOM HWU54DM 0xb21b HWU54DM product ABOCOM RT2573_2 0xb21c RT2573 product ABOCOM RT2573_3 0xb21d RT2573 product ABOCOM RT2573_4 0xb21e RT2573 product ABOCOM RTL8188CU_1 0x8188 RTL8188CU product ABOCOM RTL8188CU_2 0x8189 RTL8188CU product ABOCOM RTL8192CU 0x8178 RTL8192CU product ABOCOM RTL8188EU 0x8179 RTL8188EU product ABOCOM WUG2700 0xb21f WUG2700 /* Acton Research Corp. */ product ACTON SPECTRAPRO 0x0100 FTDI compatible adapter /* Accton products */ product ACCTON USB320_EC 0x1046 USB320-EC Ethernet Adapter product ACCTON 2664W 0x3501 2664W product ACCTON 111 0x3503 T-Sinus 111 Wireless Adapter product ACCTON SMCWUSBG_NF 0x4505 SMCWUSB-G (no firmware) product ACCTON SMCWUSBG 0x4506 SMCWUSB-G product ACCTON SMCWUSBTG2_NF 0x4507 SMCWUSBT-G2 (no firmware) product ACCTON SMCWUSBTG2 0x4508 SMCWUSBT-G2 product ACCTON PRISM_GT 0x4521 PrismGT USB 2.0 WLAN product ACCTON SS1001 0x5046 SpeedStream Ethernet Adapter product ACCTON RT2870_2 0x6618 RT2870 product ACCTON RT3070 0x7511 RT3070 product ACCTON RT2770 0x7512 RT2770 product ACCTON RT2870_3 0x7522 RT2870 product ACCTON RT2870_5 0x8522 RT2870 product ACCTON RT3070_4 0xa512 RT3070 product ACCTON RT2870_4 0xa618 RT2870 product ACCTON RT3070_1 0xa701 RT3070 product ACCTON RT3070_2 0xa702 RT3070 product ACCTON RT2870_1 0xb522 RT2870 product ACCTON RT3070_3 0xc522 RT3070 product ACCTON RT3070_5 0xd522 RT3070 product ACCTON RTL8192SU 0xc512 RTL8192SU product ACCTON ZD1211B 0xe501 ZD1211B product ACCTON WN7512 0xf522 WN7512 /* Aceeca products */ product ACEECA MEZ1000 0x0001 MEZ1000 RDA /* Acer Communications & Multimedia (oemd by Surecom) */ product ACERCM EP1427X2 0x0893 EP-1427X-2 Ethernet Adapter /* Acer Labs products */ product ACERLABS M5632 0x5632 USB 2.0 Data Link /* Acer Peripherals, Inc. products */ product ACERP ACERSCAN_C310U 0x12a6 Acerscan C310U product ACERP ACERSCAN_320U 0x2022 Acerscan 320U product ACERP ACERSCAN_640U 0x2040 Acerscan 640U product ACERP ACERSCAN_620U 0x2060 Acerscan 620U product ACERP ACERSCAN_4300U 0x20b0 Benq 3300U/4300U product ACERP ACERSCAN_640BT 0x20be Acerscan 640BT product ACERP ACERSCAN_1240U 0x20c0 Acerscan 1240U product ACERP S81 0x4027 BenQ S81 phone product ACERP H10 0x4068 AWL400 Wireless Adapter product ACERP ATAPI 0x6003 ATA/ATAPI Adapter product ACERP AWL300 0x9000 AWL300 Wireless Adapter product ACERP AWL400 0x9001 AWL400 Wireless Adapter /* Acer Warp products */ product ACERW WARPLINK 0x0204 Warplink /* Actions products */ product ACTIONS MP4 0x1101 Actions MP4 Player /* Actiontec, Inc. products */ product ACTIONTEC PRISM_25 0x0408 Prism2.5 Wireless Adapter product ACTIONTEC PRISM_25A 0x0421 Prism2.5 Wireless Adapter A product ACTIONTEC FREELAN 0x6106 ROPEX FreeLan 802.11b product ACTIONTEC UAT1 0x7605 UAT1 Wireless Ethernet Adapter /* ACTiSYS products */ product ACTISYS IR2000U 0x0011 ACT-IR2000U FIR /* ActiveWire, Inc. products */ product ACTIVEWIRE IOBOARD 0x0100 I/O Board product ACTIVEWIRE IOBOARD_FW1 0x0101 I/O Board, rev. 1 firmware /* Adaptec products */ product ADAPTEC AWN8020 0x0020 AWN-8020 WLAN /* Addonics products */ product ADDONICS2 205 0xa001 Cable 205 /* Addtron products */ product ADDTRON AWU120 0xff31 AWU-120 /* ADLINK Texhnology products */ product ADLINK ND6530 0x6530 ND-6530 USB-Serial /* ADMtek products */ product ADMTEK PEGASUSII_4 0x07c2 AN986A Ethernet product ADMTEK PEGASUS 0x0986 AN986 Ethernet product ADMTEK PEGASUSII 0x8511 AN8511 Ethernet product ADMTEK PEGASUSII_2 0x8513 AN8513 Ethernet product ADMTEK PEGASUSII_3 0x8515 AN8515 Ethernet /* ADDON products */ /* PNY OEMs these */ product ADDON ATTACHE 0x1300 USB 2.0 Flash Drive product ADDON ATTACHE 0x1300 USB 2.0 Flash Drive product ADDON A256MB 0x1400 Attache 256MB USB 2.0 Flash Drive product ADDON DISKPRO512 0x1420 USB 2.0 Flash Drive (DANE-ELEC zMate 512MB USB flash drive) /* Addonics products */ product ADDONICS2 CABLE_205 0xa001 Cable 205 /* ADS products */ product ADS UBS10BT 0x0008 UBS-10BT Ethernet product ADS UBS10BTX 0x0009 UBS-10BT Ethernet /* AEI products */ product AEI FASTETHERNET 0x1701 Fast Ethernet /* Afatech Technologies, Inc. */ product AFATECH AFATECH1336 0x1336 Flash Card Reader /* Agate Technologies products */ product AGATE QDRIVE 0x0378 Q-Drive /* AGFA products */ product AGFA SNAPSCAN1212U 0x0001 SnapScan 1212U product AGFA SNAPSCAN1236U 0x0002 SnapScan 1236U product AGFA SNAPSCANTOUCH 0x0100 SnapScan Touch product AGFA SNAPSCAN1212U2 0x2061 SnapScan 1212U product AGFA SNAPSCANE40 0x208d SnapScan e40 product AGFA SNAPSCANE50 0x208f SnapScan e50 product AGFA SNAPSCANE20 0x2091 SnapScan e20 product AGFA SNAPSCANE25 0x2095 SnapScan e25 product AGFA SNAPSCANE26 0x2097 SnapScan e26 product AGFA SNAPSCANE52 0x20fd SnapScan e52 /* Ain Communication Technology products */ product AINCOMM AWU2000B 0x1001 AWU2000B Wireless Adapter /* AIPTEK products */ product AIPTEK POCKETCAM3M 0x2011 PocketCAM 3Mega product AIPTEK2 PENCAM_MEGA_1_3 0x504a PenCam Mega 1.3 product AIPTEK2 SUNPLUS_TECH 0x0c15 Sunplus Technology Inc. /* AirPlis products */ product AIRPLUS MCD650 0x3198 MCD650 modem /* AirPrime products */ product AIRPRIME PC5220 0x0112 CDMA Wireless PC Card product AIRPRIME USB308 0x68A3 USB308 HSPA+ USB Modem product AIRPRIME AC313U 0x68aa Sierra Wireless AirCard 313U /* AirTies products */ product AIRTIES RT3070 0x2310 RT3070 /* AKS products */ product AKS USBHASP 0x0001 USB-HASP 0.06 /* Alcatel products */ product ALCATEL OT535 0x02df One Touch 535/735 /* Alcor Micro, Inc. products */ product ALCOR2 KBD_HUB 0x2802 Kbd Hub product ALCOR DUMMY 0x0000 Dummy product product ALCOR SDCR_6335 0x6335 SD/MMC Card Reader product ALCOR SDCR_6362 0x6362 SD/MMC Card Reader product ALCOR SDCR_6366 0x6366 SD/MMC Card Reader product ALCOR TRANSCEND 0x6387 Transcend JetFlash Drive product ALCOR MA_KBD_HUB 0x9213 MacAlly Kbd Hub product ALCOR AU9814 0x9215 AU9814 Hub product ALCOR UMCR_9361 0x9361 USB Multimedia Card Reader product ALCOR SM_KBD 0x9410 MicroConnectors/StrongMan Keyboard product ALCOR NEC_KBD_HUB 0x9472 NEC Kbd Hub product ALCOR AU9720 0x9720 USB2 - RS-232 product ALCOR AU6390 0x6390 AU6390 USB-IDE converter /* Alink products */ product ALINK DWM652U5 0xce16 DWM-652 product ALINK 3G 0x9000 3G modem product ALINK SIM7600E 0x9001 LTE modem product ALINK 3GU 0x9200 3G modem /* Altec Lansing products */ product ALTEC ADA70 0x0070 ADA70 Speakers product ALTEC ASC495 0xff05 ASC495 Speakers /* Alti-2 products */ product ALTI2 N3 0x6001 FTDI compatible adapter /* Allied Telesyn International products */ product ALLIEDTELESYN ATUSB100 0xb100 AT-USB100 /* ALLWIN Tech products */ product ALLWIN RT2070 0x2070 RT2070 product ALLWIN RT2770 0x2770 RT2770 product ALLWIN RT2870 0x2870 RT2870 product ALLWIN RT3070 0x3070 RT3070 product ALLWIN RT3071 0x3071 RT3071 product ALLWIN RT3072 0x3072 RT3072 product ALLWIN RT3572 0x3572 RT3572 /* AlphaSmart, Inc. products */ product ALPHASMART DANA_KB 0xdbac AlphaSmart Dana Keyboard product ALPHASMART DANA_SYNC 0xdf00 AlphaSmart Dana HotSync /* Amoi products */ product AMOI H01 0x0800 H01 3G modem product AMOI H01A 0x7002 H01A 3G modem product AMOI H02 0x0802 H02 3G modem /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Ambit Microsystems products */ product AMBIT WLAN 0x0302 WLAN product AMBIT NTL_250 0x6098 NTL 250 cable modem /* Apacer products */ product APACER HT202 0xb113 USB 2.0 Flash Drive /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Amigo Technology products */ product AMIGO RT2870_1 0x9031 RT2870 product AMIGO RT2870_2 0x9041 RT2870 /* AMIT products */ product AMIT CGWLUSB2GO 0x0002 CG-WLUSB2GO product AMIT CGWLUSB2GNR 0x0008 CG-WLUSB2GNR product AMIT RT2870_1 0x0012 RT2870 /* AMIT(2) products */ product AMIT2 RT2870 0x0008 RT2870 /* Analog Devices products */ product ANALOGDEVICES GNICE 0xf000 FTDI compatible adapter product ANALOGDEVICES GNICEPLUS 0xf001 FTDI compatible adapter /* Anchor products */ product ANCHOR SERIAL 0x2008 Serial product ANCHOR EZUSB 0x2131 EZUSB product ANCHOR EZLINK 0x2720 EZLINK /* AnyData products */ product ANYDATA ADU_620UW 0x6202 CDMA 2000 EV-DO USB Modem product ANYDATA ADU_E100X 0x6501 CDMA 2000 1xRTT/EV-DO USB Modem product ANYDATA ADU_500A 0x6502 CDMA 2000 EV-DO USB Modem /* AOX, Inc. products */ product AOX USB101 0x0008 Ethernet /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Apple Computer products */ product APPLE DUMMY 0x0000 Dummy product product APPLE IMAC_KBD 0x0201 USB iMac Keyboard product APPLE KBD 0x0202 USB Keyboard M2452 product APPLE EXT_KBD 0x020c Apple Extended USB Keyboard /* MacbookAir, aka wellspring */ product APPLE WELLSPRING_ANSI 0x0223 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING_ISO 0x0224 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING_JIS 0x0225 Apple Internal Keyboard/Trackpad /* MacbookProPenryn, aka wellspring2 */ product APPLE WELLSPRING2_ANSI 0x0230 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING2_ISO 0x0231 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING2_JIS 0x0232 Apple Internal Keyboard/Trackpad /* Macbook5,1 (unibody), aka wellspring3 */ product APPLE WELLSPRING3_ANSI 0x0236 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING3_ISO 0x0237 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING3_JIS 0x0238 Apple Internal Keyboard/Trackpad /* MacbookAir3,2 (unibody), aka wellspring4 */ product APPLE WELLSPRING4_ANSI 0x023f Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4_ISO 0x0240 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4_JIS 0x0241 Apple Internal Keyboard/Trackpad /* MacbookAir3,1 (unibody), aka wellspring4 */ product APPLE WELLSPRING4A_ANSI 0x0242 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4A_ISO 0x0243 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4A_JIS 0x0244 Apple Internal Keyboard/Trackpad /* Macbook8 (unibody, March 2011) */ product APPLE WELLSPRING5_ANSI 0x0245 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5_ISO 0x0246 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5_JIS 0x0247 Apple Internal Keyboard/Trackpad /* MacbookAir4,1 (unibody, July 2011) */ product APPLE WELLSPRING6A_ANSI 0x0249 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6A_ISO 0x024a Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6A_JIS 0x024b Apple Internal Keyboard/Trackpad /* MacbookAir4,2 (unibody, July 2011) */ product APPLE WELLSPRING6_ANSI 0x024c Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6_ISO 0x024d Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6_JIS 0x024e Apple Internal Keyboard/Trackpad /* Macbook8,2 (unibody) */ product APPLE WELLSPRING5A_ANSI 0x0252 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5A_ISO 0x0253 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5A_JIS 0x0254 Apple Internal Keyboard/Trackpad /* MacbookPro10,1 (unibody, June 2012) */ product APPLE WELLSPRING7_ANSI 0x0262 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7_ISO 0x0263 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7_JIS 0x0264 Apple Internal Keyboard/Trackpad /* MacbookPro10,2 (unibody, October 2012) */ product APPLE WELLSPRING7A_ANSI 0x0259 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7A_ISO 0x025a Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7A_JIS 0x025b Apple Internal Keyboard/Trackpad /* MacbookAir6,2 (unibody, June 2013) */ product APPLE WELLSPRING8_ANSI 0x0290 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING8_ISO 0x0291 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING8_JIS 0x0292 Apple Internal Keyboard/Trackpad /* MacbookPro12,1 */ product APPLE WELLSPRING9_ANSI 0x0272 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING9_ISO 0x0273 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING9_JIS 0x0274 Apple Internal Keyboard/Trackpad product APPLE MOUSE 0x0301 Mouse M4848 product APPLE OPTMOUSE 0x0302 Optical mouse product APPLE MIGHTYMOUSE 0x0304 Mighty Mouse product APPLE KBD_HUB 0x1001 Hub in Apple USB Keyboard product APPLE EXT_KBD_HUB 0x1003 Hub in Apple Extended USB Keyboard product APPLE SPEAKERS 0x1101 Speakers product APPLE IPOD 0x1201 iPod product APPLE IPOD2G 0x1202 iPod 2G product APPLE IPOD3G 0x1203 iPod 3G product APPLE IPOD_04 0x1204 iPod '04' product APPLE IPODMINI 0x1205 iPod Mini product APPLE IPOD_06 0x1206 iPod '06' product APPLE IPOD_07 0x1207 iPod '07' product APPLE IPOD_08 0x1208 iPod '08' product APPLE IPODVIDEO 0x1209 iPod Video product APPLE IPODNANO 0x120a iPod Nano product APPLE IPHONE 0x1290 iPhone product APPLE IPOD_TOUCH 0x1291 iPod Touch product APPLE IPHONE_3G 0x1292 iPhone 3G product APPLE IPHONE_3GS 0x1294 iPhone 3GS product APPLE IPHONE_4 0x1297 iPhone 4 product APPLE IPHONE_4S 0x12a0 iPhone 4S product APPLE IPHONE_5 0x12a8 iPhone 5 product APPLE IPAD 0x129a iPad product APPLE ETHERNET 0x1402 Ethernet A1277 /* Arkmicro Technologies */ product ARKMICRO ARK3116 0x0232 ARK3116 Serial /* Asahi Optical products */ product ASAHIOPTICAL OPTIO230 0x0004 Digital camera product ASAHIOPTICAL OPTIO330 0x0006 Digital camera /* Asante products */ product ASANTE EA 0x1427 Ethernet /* ASIX Electronics products */ product ASIX AX88172 0x1720 10/100 Ethernet product ASIX AX88178 0x1780 AX88178 product ASIX AX88178A 0x178a AX88178A USB 2.0 10/100/1000 Ethernet product ASIX AX88179 0x1790 AX88179 USB 3.0 10/100/1000 Ethernet product ASIX AX88772 0x7720 AX88772 product ASIX AX88772A 0x772a AX88772A USB 2.0 10/100 Ethernet product ASIX AX88772B 0x772b AX88772B USB 2.0 10/100 Ethernet product ASIX AX88772B_1 0x7e2b AX88772B USB 2.0 10/100 Ethernet /* ASUS products */ product ASUS2 USBN11 0x0b05 USB-N11 product ASUS RT2570 0x1706 RT2500USB Wireless Adapter product ASUS WL167G 0x1707 WL-167g Wireless Adapter product ASUS WL159G 0x170c WL-159g product ASUS A9T_WIFI 0x171b A9T wireless product ASUS P5B_WIFI 0x171d P5B wireless product ASUS RT2573_1 0x1723 RT2573 product ASUS RT2573_2 0x1724 RT2573 product ASUS LCM 0x1726 LCM display product ASUS RT2870_1 0x1731 RT2870 product ASUS RT2870_2 0x1732 RT2870 product ASUS RT2870_3 0x1742 RT2870 product ASUS RT2870_4 0x1760 RT2870 product ASUS RT2870_5 0x1761 RT2870 product ASUS USBN13 0x1784 USB-N13 product ASUS USBN10 0x1786 USB-N10 product ASUS RT3070_1 0x1790 RT3070 product ASUS RTL8192SU 0x1791 RTL8192SU product ASUS USB_N53 0x179d ASUS Black Diamond Dual Band USB-N53 product ASUS RTL8192CU 0x17ab RTL8192CU product ASUS USBN66 0x17ad USB-N66 product ASUS USBN10NANO 0x17ba USB-N10 Nano product ASUS USBAC51 0x17d1 USB-AC51 product ASUS USBAC56 0x17d2 USB-AC56 product ASUS A730W 0x4202 ASUS MyPal A730W product ASUS P535 0x420f ASUS P535 PDA product ASUS GMSC 0x422f ASUS Generic Mass Storage /* ATen products */ product ATEN UC1284 0x2001 Parallel printer product ATEN UC10T 0x2002 10Mbps Ethernet product ATEN UC110T 0x2007 UC-110T Ethernet product ATEN UC232A 0x2008 Serial product ATEN UC210T 0x2009 UC-210T Ethernet product ATEN DSB650C 0x4000 DSB-650C /* ATP Electronics products */ product ATP EUSB 0xaf01 ATP IG eUSB SSD /* Atheros Communications products */ product ATHEROS AR5523 0x0001 AR5523 product ATHEROS AR5523_NF 0x0002 AR5523 (no firmware) product ATHEROS2 AR5523_1 0x0001 AR5523 product ATHEROS2 AR5523_1_NF 0x0002 AR5523 (no firmware) product ATHEROS2 AR5523_2 0x0003 AR5523 product ATHEROS2 AR5523_2_NF 0x0004 AR5523 (no firmware) product ATHEROS2 AR5523_3 0x0005 AR5523 product ATHEROS2 AR5523_3_NF 0x0006 AR5523 (no firmware) product ATHEROS2 TG121N 0x1001 TG121N product ATHEROS2 WN821NV2 0x1002 WN821NV2 product ATHEROS2 3CRUSBN275 0x1010 3CRUSBN275 product ATHEROS2 WN612 0x1011 WN612 product ATHEROS2 AR9170 0x9170 AR9170 /* Atmel Comp. products */ product ATMEL STK541 0x2109 Zigbee Controller product ATMEL UHB124 0x3301 AT43301 USB 1.1 Hub product ATMEL DWL120 0x7603 DWL-120 Wireless Adapter product ATMEL BW002 0x7605 BW002 Wireless Adapter product ATMEL WL1130USB 0x7613 WL-1130 USB product ATMEL AT76C505A 0x7614 AT76c505a Wireless Adapter /* AuthenTec products */ product AUTHENTEC AES1610 0x1600 AES1610 Fingerprint Sensor /* Avision products */ product AVISION 1200U 0x0268 1200U scanner /* AVM products */ product AVM FRITZWLAN 0x8401 FRITZ!WLAN N /* Axesstel products */ product AXESSTEL DATAMODEM 0x1000 Data Modem /* AsureWave products */ product AZUREWAVE RT2870_1 0x3247 RT2870 product AZUREWAVE RT2870_2 0x3262 RT2870 product AZUREWAVE RT3070_1 0x3273 RT3070 product AZUREWAVE RT3070_2 0x3284 RT3070 product AZUREWAVE RT3070_3 0x3305 RT3070 product AZUREWAVE RTL8188CU 0x3357 RTL8188CU product AZUREWAVE RTL8188CE_1 0x3358 RTL8188CE product AZUREWAVE RTL8188CE_2 0x3359 RTL8188CE product AZUREWAVE RTL8192SU_1 0x3306 RTL8192SU product AZUREWAVE RTL8192SU_2 0x3309 RTL8192SU product AZUREWAVE RTL8192SU_3 0x3310 RTL8192SU product AZUREWAVE RTL8192SU_4 0x3311 RTL8192SU product AZUREWAVE RTL8192SU_5 0x3325 RTL8192SU /* Baltech products */ product BALTECH CARDREADER 0x9999 Card reader /* Bayer products */ product BAYER CONTOUR_CABLE 0x6001 FTDI compatible adapter /* B&B Electronics products */ product BBELECTRONICS USOTL4 0xAC01 RS-422/485 product BBELECTRONICS 232USB9M 0xac27 FTDI compatible adapter product BBELECTRONICS 485USB9F_2W 0xac25 FTDI compatible adapter product BBELECTRONICS 485USB9F_4W 0xac26 FTDI compatible adapter product BBELECTRONICS 485USBTB_2W 0xac33 FTDI compatible adapter product BBELECTRONICS 485USBTB_4W 0xac34 FTDI compatible adapter product BBELECTRONICS TTL3USB9M 0xac50 FTDI compatible adapter product BBELECTRONICS TTL5USB9M 0xac49 FTDI compatible adapter product BBELECTRONICS USO9ML2 0xac03 FTDI compatible adapter product BBELECTRONICS USO9ML2DR 0xac17 FTDI compatible adapter product BBELECTRONICS USO9ML2DR_2 0xac16 FTDI compatible adapter product BBELECTRONICS USOPTL4 0xac11 FTDI compatible adapter product BBELECTRONICS USOPTL4DR 0xac19 FTDI compatible adapter product BBELECTRONICS USOPTL4DR2 0xac18 FTDI compatible adapter product BBELECTRONICS USPTL4 0xac12 FTDI compatible adapter product BBELECTRONICS USTL4 0xac02 FTDI compatible adapter product BBELECTRONICS ZZ_PROG1_USB 0xba02 FTDI compatible adapter /* Belkin products */ /*product BELKIN F5U111 0x???? F5U111 Ethernet*/ product BELKIN F5D6050 0x0050 F5D6050 802.11b Wireless Adapter product BELKIN FBT001V 0x0081 FBT001v2 Bluetooth product BELKIN FBT003V 0x0084 FBT003v2 Bluetooth product BELKIN F5U103 0x0103 F5U103 Serial product BELKIN F5U109 0x0109 F5U109 Serial product BELKIN USB2SCSI 0x0115 USB to SCSI product BELKIN F8T012 0x0121 F8T012xx1 Bluetooth USB Adapter product BELKIN USB2LAN 0x0121 USB to LAN product BELKIN F5U208 0x0208 F5U208 VideoBus II product BELKIN F5U237 0x0237 F5U237 USB 2.0 7-Port Hub product BELKIN F5U257 0x0257 F5U257 Serial product BELKIN F5U409 0x0409 F5U409 Serial product BELKIN F6C550AVR 0x0551 F6C550-AVR UPS product BELKIN F5U120 0x1203 F5U120-PC Hub product BELKIN RTL8188CU 0x1102 RTL8188CU Wireless Adapter product BELKIN F9L1103 0x1103 F9L1103 Wireless Adapter product BELKIN RTL8192CU 0x2102 RTL8192CU Wireless Adapter product BELKIN F7D2102 0x2103 F7D2102 Wireless Adapter product BELKIN F5U258 0x258A F5U258 Host to Host cable product BELKIN ZD1211B 0x4050 ZD1211B product BELKIN F5D5055 0x5055 F5D5055 product BELKIN F5D7050 0x7050 F5D7050 Wireless Adapter product BELKIN F5D7051 0x7051 F5D7051 54g USB Network Adapter product BELKIN F5D7050A 0x705a F5D7050A Wireless Adapter /* Also sold as 'Ativa 802.11g wireless card' */ product BELKIN F5D7050_V4000 0x705c F5D7050 v4000 Wireless Adapter product BELKIN F5D7050E 0x705e F5D7050E Wireless Adapter product BELKIN RT2870_1 0x8053 RT2870 product BELKIN RT2870_2 0x805c RT2870 product BELKIN F5D8053V3 0x815c F5D8053 v3 product BELKIN RTL8192SU_1 0x815f RTL8192SU product BELKIN RTL8192SU_2 0x845a RTL8192SU product BELKIN RTL8192SU_3 0x945a RTL8192SU product BELKIN F5D8055 0x825a F5D8055 product BELKIN F5D8055V2 0x825b F5D8055 v2 product BELKIN F5D9050V3 0x905b F5D9050 ver 3 Wireless Adapter product BELKIN2 F5U002 0x0002 F5U002 Parallel printer product BELKIN F6D4050V1 0x935a F6D4050 v1 product BELKIN F6D4050V2 0x935b F6D4050 v2 /* Billionton products */ product BILLIONTON USB100 0x0986 USB100N 10/100 FastEthernet product BILLIONTON USBLP100 0x0987 USB100LP product BILLIONTON USBEL100 0x0988 USB100EL product BILLIONTON USBE100 0x8511 USBE100 product BILLIONTON USB2AR 0x90ff USB2AR Ethernet /* Broadcom products */ product BROADCOM BCM2033 0x2033 BCM2033 Bluetooth USB dongle /* Brother Industries products */ product BROTHER HL1050 0x0002 HL-1050 laser printer product BROTHER MFC8600_9650 0x0100 MFC8600/9650 multifunction device /* Behavior Technology Computer products */ product BTC BTC6100 0x5550 6100C Keyboard product BTC BTC7932 0x6782 Keyboard with mouse port /* CACE Technologies products */ product CACE AIRPCAPNX 0x0300 AirPcap NX /* Canon, Inc. products */ product CANON N656U 0x2206 CanoScan N656U product CANON N1220U 0x2207 CanoScan N1220U product CANON D660U 0x2208 CanoScan D660U product CANON N676U 0x220d CanoScan N676U product CANON N1240U 0x220e CanoScan N1240U product CANON LIDE25 0x2220 CanoScan LIDE 25 product CANON S10 0x3041 PowerShot S10 product CANON S100 0x3045 PowerShot S100 product CANON S200 0x3065 PowerShot S200 product CANON REBELXT 0x30ef Digital Rebel XT /* CATC products */ product CATC NETMATE 0x000a Netmate Ethernet product CATC NETMATE2 0x000c Netmate2 Ethernet product CATC CHIEF 0x000d USB Chief Bus & Protocol Analyzer product CATC ANDROMEDA 0x1237 Andromeda hub /* CASIO products */ product CASIO QV_DIGICAM 0x1001 QV DigiCam product CASIO EXS880 0x1105 Exilim EX-S880 product CASIO BE300 0x2002 BE-300 PDA product CASIO NAMELAND 0x4001 CASIO Nameland EZ-USB /* CCYU products */ product CCYU ED1064 0x2136 EasyDisk ED1064 /* Century products */ product CENTURY EX35QUAT 0x011e Century USB Disk Enclosure product CENTURY EX35SW4_SB4 0x011f Century USB Disk Enclosure /* Cherry products */ product CHERRY MY3000KBD 0x0001 My3000 keyboard product CHERRY MY3000HUB 0x0003 My3000 hub product CHERRY CYBOARD 0x0004 CyBoard Keyboard /* Chic Technology products */ product CHIC MOUSE1 0x0001 mouse product CHIC CYPRESS 0x0003 Cypress USB Mouse /* Chicony products */ product CHICONY KB8933 0x0001 KB-8933 keyboard product CHICONY KU0325 0x0116 KU-0325 keyboard product CHICONY CNF7129 0xb071 Notebook Web Camera product CHICONY HDUVCCAM 0xb40a HD UVC WebCam product CHICONY RTL8188CUS_1 0xaff7 RTL8188CUS product CHICONY RTL8188CUS_2 0xaff8 RTL8188CUS product CHICONY RTL8188CUS_3 0xaff9 RTL8188CUS product CHICONY RTL8188CUS_4 0xaffa RTL8188CUS product CHICONY RTL8188CUS_5 0xaffa RTL8188CUS product CHICONY2 TWINKLECAM 0x600d TwinkleCam USB camera /* CH Products */ product CHPRODUCTS PROTHROTTLE 0x00f1 Pro Throttle product CHPRODUCTS PROPEDALS 0x00f2 Pro Pedals product CHPRODUCTS FIGHTERSTICK 0x00f3 Fighterstick product CHPRODUCTS FLIGHTYOKE 0x00ff Flight Sim Yoke /* Cisco-Linksys products */ product CISCOLINKSYS WUSB54AG 0x000c WUSB54AG Wireless Adapter product CISCOLINKSYS WUSB54G 0x000d WUSB54G Wireless Adapter product CISCOLINKSYS WUSB54GP 0x0011 WUSB54GP Wireless Adapter product CISCOLINKSYS USB200MV2 0x0018 USB200M v2 product CISCOLINKSYS HU200TS 0x001a HU200TS Wireless Adapter product CISCOLINKSYS WUSB54GC 0x0020 WUSB54GC product CISCOLINKSYS WUSB54GR 0x0023 WUSB54GR product CISCOLINKSYS WUSBF54G 0x0024 WUSBF54G product CISCOLINKSYS AE1000 0x002f AE1000 product CISCOLINKSYS WUSB6300 0x003f WUSB6300 product CISCOLINKSYS USB3GIGV1 0x0041 USB3GIGV1 USB Ethernet Adapter product CISCOLINKSYS2 RT3070 0x4001 RT3070 product CISCOLINKSYS3 RT3070 0x0101 RT3070 /* Clipsal products */ product CLIPSAL 560884 0x0101 560884 C-Bus Audio Matrix Switch product CLIPSAL 5500PACA 0x0201 5500PACA C-Bus Pascal Automation Controller product CLIPSAL 5800PC 0x0301 5800PC C-Bus Wireless Interface product CLIPSAL 5500PCU 0x0303 5500PCU C-Bus Interface product CLIPSAL 5000CT2 0x0304 5000CT2 C-Bus Touch Screen product CLIPSAL C5000CT2 0x0305 C5000CT2 C-Bus Touch Screen product CLIPSAL L51xx 0x0401 L51xx C-Bus Dimmer /* C-Media products */ product CMEDIA CM6206 0x0102 CM106 compatible sound device /* CMOTECH products */ product CMOTECH CNU510 0x5141 CDMA Technologies USB modem product CMOTECH CNU550 0x5543 CDMA 2000 1xRTT/1xEVDO USB modem product CMOTECH CGU628 0x6006 CGU-628 product CMOTECH CDMA_MODEM1 0x6280 CDMA Technologies USB modem product CMOTECH DISK 0xf000 disk mode /* Compaq products */ product COMPAQ IPAQPOCKETPC 0x0003 iPAQ PocketPC product COMPAQ PJB100 0x504a Personal Jukebox PJB100 product COMPAQ IPAQLINUX 0x505a iPAQ Linux /* Composite Corp products looks the same as "TANGTOP" */ product COMPOSITE USBPS2 0x0001 USB to PS2 Adaptor /* Conceptronic products */ product CONCEPTRONIC PRISM_GT 0x3762 PrismGT USB 2.0 WLAN product CONCEPTRONIC C11U 0x7100 C11U product CONCEPTRONIC WL210 0x7110 WL-210 product CONCEPTRONIC AR5523_1 0x7801 AR5523 product CONCEPTRONIC AR5523_1_NF 0x7802 AR5523 (no firmware) product CONCEPTRONIC AR5523_2 0x7811 AR5523 product CONCEPTRONIC AR5523_2_NF 0x7812 AR5523 (no firmware) product CONCEPTRONIC2 RTL8192SU_1 0x3300 RTL8192SU product CONCEPTRONIC2 RTL8192SU_2 0x3301 RTL8192SU product CONCEPTRONIC2 RTL8192SU_3 0x3302 RTL8192SU product CONCEPTRONIC2 C54RU 0x3c02 C54RU WLAN product CONCEPTRONIC2 C54RU2 0x3c22 C54RU product CONCEPTRONIC2 RT3070_1 0x3c08 RT3070 product CONCEPTRONIC2 RT3070_2 0x3c11 RT3070 product CONCEPTRONIC2 VIGORN61 0x3c25 VIGORN61 product CONCEPTRONIC2 RT2870_1 0x3c06 RT2870 product CONCEPTRONIC2 RT2870_2 0x3c07 RT2870 product CONCEPTRONIC2 RT2870_7 0x3c09 RT2870 product CONCEPTRONIC2 RT2870_8 0x3c12 RT2870 product CONCEPTRONIC2 RT2870_3 0x3c23 RT2870 product CONCEPTRONIC2 RT2870_4 0x3c25 RT2870 product CONCEPTRONIC2 RT2870_5 0x3c27 RT2870 product CONCEPTRONIC2 RT2870_6 0x3c28 RT2870 /* Connectix products */ product CONNECTIX QUICKCAM 0x0001 QuickCam /* Conect products */ product CONTEC COM1USBH 0x8311 FTDI compatible adapter /* Corega products */ product COREGA ETHER_USB_T 0x0001 Ether USB-T product COREGA FETHER_USB_TX 0x0004 FEther USB-TX product COREGA WLAN_USB_USB_11 0x000c WirelessLAN USB-11 product COREGA FETHER_USB_TXS 0x000d FEther USB-TXS product COREGA WLANUSB 0x0012 Wireless LAN Stick-11 product COREGA FETHER_USB2_TX 0x0017 FEther USB2-TX product COREGA WLUSB_11_KEY 0x001a ULUSB-11 Key product COREGA CGUSBRS232R 0x002a CG-USBRS232R product COREGA CGWLUSB2GL 0x002d CG-WLUSB2GL product COREGA CGWLUSB2GPX 0x002e CG-WLUSB2GPX product COREGA RT2870_1 0x002f RT2870 product COREGA RT2870_2 0x003c RT2870 product COREGA RT2870_3 0x003f RT2870 product COREGA RT3070 0x0041 RT3070 product COREGA CGWLUSB300GNM 0x0042 CG-WLUSB300GNM product COREGA RTL8192SU 0x0047 RTL8192SU product COREGA RTL8192CU 0x0056 RTL8192CU product COREGA WLUSB_11_STICK 0x7613 WLAN USB Stick 11 product COREGA FETHER_USB_TXC 0x9601 FEther USB-TXC /* Corsair products */ product CORSAIR K60 0x0a60 Corsair Vengeance K60 keyboard product CORSAIR K70 0x1b09 Corsair Vengeance K70 keyboard product CORSAIR K70_RGB 0x1b13 Corsair K70 RGB Keyboard product CORSAIR STRAFE 0x1b15 Cossair STRAFE Gaming keyboard /* Creative products */ product CREATIVE NOMAD_II 0x1002 Nomad II MP3 player product CREATIVE NOMAD_IIMG 0x4004 Nomad II MG product CREATIVE NOMAD 0x4106 Nomad product CREATIVE2 VOIP_BLASTER 0x0258 Voip Blaster product CREATIVE3 OPTICAL_MOUSE 0x0001 Notebook Optical Mouse /* Cambridge Silicon Radio Ltd. products */ product CSR BT_DONGLE 0x0001 Bluetooth USB dongle product CSR CSRDFU 0xffff USB Bluetooth Device in DFU State /* Chipsbank Microelectronics Co., Ltd */ product CHIPSBANK USBMEMSTICK 0x6025 CBM2080 Flash drive controller product CHIPSBANK USBMEMSTICK1 0x6026 CBM1180 Flash drive controller /* CTX products */ product CTX EX1300 0x9999 Ex1300 hub /* Curitel products */ product CURITEL HX550C 0x1101 CDMA 2000 1xRTT USB modem (HX-550C) product CURITEL HX57XB 0x2101 CDMA 2000 1xRTT USB modem (HX-570/575B/PR-600) product CURITEL PC5740 0x3701 Broadband Wireless modem product CURITEL UM150 0x3711 EVDO modem product CURITEL UM175 0x3714 EVDO modem /* CyberPower products */ product CYBERPOWER 1500CAVRLCD 0x0501 1500CAVRLCD /* CyberTAN Technology products */ product CYBERTAN TG54USB 0x1666 TG54USB product CYBERTAN RT2870 0x1828 RT2870 /* Cypress Semiconductor products */ product CYPRESS MOUSE 0x0001 mouse product CYPRESS THERMO 0x0002 thermometer product CYPRESS WISPY1A 0x0bad MetaGeek Wi-Spy product CYPRESS KBDHUB 0x0101 Keyboard/Hub product CYPRESS FMRADIO 0x1002 FM Radio product CYPRESS IKARILASER 0x121f Ikari Laser SteelSeries ApS product CYPRESS USBRS232 0x5500 USB-RS232 Interface product CYPRESS SLIM_HUB 0x6560 Slim Hub product CYPRESS XX6830XX 0x6830 PATA Storage Device product CYPRESS SILVERSHIELD 0xfd13 Gembird Silver Shield PM /* Daisy Technology products */ product DAISY DMC 0x6901 USB MultiMedia Reader /* Dallas Semiconductor products */ product DALLAS J6502 0x4201 J-6502 speakers /* DataApex products */ product DATAAPEX MULTICOM 0xead6 MultiCom /* Dell products */ product DELL PORT 0x0058 Port Replicator product DELL AIO926 0x5115 Photo AIO Printer 926 product DELL BC02 0x8000 BC02 Bluetooth USB Adapter product DELL PRISM_GT_1 0x8102 PrismGT USB 2.0 WLAN product DELL TM350 0x8103 TrueMobile 350 Bluetooth USB Adapter product DELL PRISM_GT_2 0x8104 PrismGT USB 2.0 WLAN product DELL U5700 0x8114 Dell 5700 3G product DELL U5500 0x8115 Dell 5500 3G product DELL U5505 0x8116 Dell 5505 3G product DELL U5700_2 0x8117 Dell 5700 3G product DELL U5510 0x8118 Dell 5510 3G product DELL U5700_3 0x8128 Dell 5700 3G product DELL U5700_4 0x8129 Dell 5700 3G product DELL U5720 0x8133 Dell 5720 3G product DELL U5720_2 0x8134 Dell 5720 3G product DELL U740 0x8135 Dell U740 CDMA product DELL U5520 0x8136 Dell 5520 3G product DELL U5520_2 0x8137 Dell 5520 3G product DELL U5520_3 0x8138 Dell 5520 3G product DELL U5730 0x8180 Dell 5730 3G product DELL U5730_2 0x8181 Dell 5730 3G product DELL U5730_3 0x8182 Dell 5730 3G product DELL DW700 0x9500 Dell DW700 GPS /* Delorme Paublishing products */ product DELORME EARTHMATE 0x0100 Earthmate GPS /* Desknote products */ product DESKNOTE UCR_61S2B 0x0c55 UCR-61S2B /* Diamond products */ product DIAMOND RIO500USB 0x0001 Rio 500 USB /* Dick Smith Electronics (really C-Net) products */ product DICKSMITH RT2573 0x9022 RT2573 product DICKSMITH CWD854F 0x9032 C-Net CWD-854 rev F /* Digi International products */ product DIGI ACCELEPORT2 0x0002 AccelePort USB 2 product DIGI ACCELEPORT4 0x0004 AccelePort USB 4 product DIGI ACCELEPORT8 0x0008 AccelePort USB 8 /* Digianswer A/S products */ product DIGIANSWER ZIGBEE802154 0x000a ZigBee/802.15.4 MAC /* D-Link products */ /*product DLINK DSBS25 0x0100 DSB-S25 serial*/ product DLINK DUBE100 0x1a00 10/100 Ethernet product DLINK DUBE100C1 0x1a02 DUB-E100 rev C1 product DLINK DSB650TX4 0x200c 10/100 Ethernet product DLINK DWL120E 0x3200 DWL-120 rev E product DLINK RTL8192CU_1 0x3307 RTL8192CU product DLINK RTL8188CU 0x3308 RTL8188CU product DLINK RTL8192CU_2 0x3309 RTL8192CU product DLINK RTL8192CU_3 0x330a RTL8192CU product DLINK DWA131B 0x330d DWA-131 rev B product DLINK DWA125D1 0x330f DWA-125 rev D1 product DLINK DWA123D1 0x3310 DWA-123 rev D1 product DLINK DWA171A1 0x3314 DWA-171 rev A1 product DLINK DWA182C1 0x3315 DWA-182 rev C1 product DLINK DWA180A1 0x3316 DWA-180 rev A1 product DLINK DWA172A1 0x3318 DWA-172 rev A1 product DLINK DWA131E1 0x3319 DWA-131 rev E1 product DLINK DWL122 0x3700 DWL-122 product DLINK DWLG120 0x3701 DWL-G120 product DLINK DWL120F 0x3702 DWL-120 rev F product DLINK DWLAG132 0x3a00 DWL-AG132 product DLINK DWLAG132_NF 0x3a01 DWL-AG132 (no firmware) product DLINK DWLG132 0x3a02 DWL-G132 product DLINK DWLG132_NF 0x3a03 DWL-G132 (no firmware) product DLINK DWLAG122 0x3a04 DWL-AG122 product DLINK DWLAG122_NF 0x3a05 DWL-AG122 (no firmware) product DLINK DWLG122 0x3c00 DWL-G122 b1 Wireless Adapter product DLINK DUBE100B1 0x3c05 DUB-E100 rev B1 product DLINK RT2870 0x3c09 RT2870 product DLINK RT3072 0x3c0a RT3072 product DLINK DWA140B3 0x3c15 DWA-140 rev B3 product DLINK DWA160B2 0x3c1a DWA-160 rev B2 product DLINK DWA127 0x3c1b DWA-127 Wireless Adapter product DLINK DWA162 0x3c1f DWA-162 Wireless Adapter product DLINK DWA140D1 0x3c20 DWA-140 rev D1 product DLINK DSB650C 0x4000 10Mbps Ethernet product DLINK DSB650TX1 0x4001 10/100 Ethernet product DLINK DSB650TX 0x4002 10/100 Ethernet product DLINK DSB650TX_PNA 0x4003 1/10/100 Ethernet product DLINK DSB650TX3 0x400b 10/100 Ethernet product DLINK DSB650TX2 0x4102 10/100 Ethernet product DLINK DUB1312 0x4a00 10/100/1000 Ethernet product DLINK DWM157 0x7d02 DWM-157 product DLINK DWR510 0x7e12 DWR-510 product DLINK DWM222 0x7e35 DWM-222 product DLINK DWM157_CD 0xa707 DWM-157 CD-ROM Mode product DLINK DWR510_CD 0xa805 DWR-510 CD-ROM Mode product DLINK DWM222_CD 0xab00 DWM-222 CD-ROM Mode product DLINK DSB650 0xabc1 10/100 Ethernet product DLINK DUBH7 0xf103 DUB-H7 USB 2.0 7-Port Hub product DLINK2 RTL8192SU_1 0x3300 RTL8192SU product DLINK2 RTL8192SU_2 0x3302 RTL8192SU product DLINK2 DWA131A1 0x3303 DWA-131 A1 product DLINK2 DWA160A2 0x3a09 DWA-160 A2 product DLINK2 DWA120 0x3a0c DWA-120 product DLINK2 DWA120_NF 0x3a0d DWA-120 (no firmware) product DLINK2 DWA130D1 0x3a0f DWA-130 D1 product DLINK2 DWLG122C1 0x3c03 DWL-G122 c1 product DLINK2 WUA1340 0x3c04 WUA-1340 product DLINK2 DWA111 0x3c06 DWA-111 product DLINK2 DWA110 0x3c07 DWA-110 product DLINK2 RT2870_1 0x3c09 RT2870 product DLINK2 RT3072 0x3c0a RT3072 product DLINK2 RT3072_1 0x3c0b RT3072 product DLINK2 RT3070_1 0x3c0d RT3070 product DLINK2 RT3070_2 0x3c0e RT3070 product DLINK2 RT3070_3 0x3c0f RT3070 product DLINK2 DWA160A1 0x3c10 DWA-160 A1 product DLINK2 RT2870_2 0x3c11 RT2870 product DLINK2 DWA130 0x3c13 DWA-130 product DLINK2 RT3070_4 0x3c15 RT3070 product DLINK2 RT3070_5 0x3c16 RT3070 product DLINK3 DWM652 0x3e04 DWM-652 /* DisplayLink products */ product DISPLAYLINK LCD4300U 0x01ba LCD-4300U product DISPLAYLINK LCD8000U 0x01bb LCD-8000U product DISPLAYLINK LD220 0x0100 Samsung LD220 product DISPLAYLINK GUC2020 0x0059 IOGEAR DVI GUC2020 product DISPLAYLINK VCUD60 0x0136 Rextron DVI product DISPLAYLINK CONV 0x0138 StarTech CONV-USB2DVI product DISPLAYLINK DLDVI 0x0141 DisplayLink DVI product DISPLAYLINK VGA10 0x015a CMP-USBVGA10 product DISPLAYLINK WSDVI 0x0198 WS Tech DVI product DISPLAYLINK EC008 0x019b EasyCAP008 DVI product DISPLAYLINK HPDOCK 0x01d4 HP USB Docking product DISPLAYLINK NL571 0x01d7 HP USB DVI product DISPLAYLINK M01061 0x01e2 Lenovo DVI product DISPLAYLINK SWDVI 0x024c SUNWEIT DVI product DISPLAYLINK NBDOCK 0x0215 VideoHome NBdock1920 product DISPLAYLINK LUM70 0x02a9 Lilliput UM-70 product DISPLAYLINK UM7X0 0x401a nanovision MiMo product DISPLAYLINK LT1421 0x03e0 Lenovo ThinkVision LT1421 product DISPLAYLINK POLARIS2 0x0117 Polaris2 USB dock product DISPLAYLINK PLUGABLE 0x0377 Plugable docking station product DISPLAYLINK ITEC 0x02e9 i-tec USB 2.0 Docking Station /* DMI products */ product DMI CFSM_RW 0xa109 CF/SM Reader/Writer product DMI DISK 0x2bcf Generic Disk /* DrayTek products */ product DRAYTEK VIGOR550 0x0550 Vigor550 /* Dream Link products */ product DREAMLINK DL100B 0x0004 USB Webmail Notifier /* dresden elektronik products */ product DRESDENELEKTRONIK SENSORTERMINALBOARD 0x0001 SensorTerminalBoard product DRESDENELEKTRONIK WIRELESSHANDHELDTERMINAL 0x0004 Wireless Handheld Terminal product DRESDENELEKTRONIK DE_RFNODE 0x001c deRFnode product DRESDENELEKTRONIK LEVELSHIFTERSTICKLOWCOST 0x0022 Levelshifter Stick Low Cost /* DYMO */ product DYMO LABELMANAGERPNP 0x1001 DYMO LabelManager PnP /* Dynastream Innovations */ product DYNASTREAM ANTDEVBOARD 0x1003 ANT dev board product DYNASTREAM ANT2USB 0x1004 ANT2USB product DYNASTREAM ANTDEVBOARD2 0x1006 ANT dev board /* Edimax products */ product EDIMAX EW7318USG 0x7318 USB Wireless dongle product EDIMAX RTL8192SU_1 0x7611 RTL8192SU product EDIMAX RTL8192SU_2 0x7612 RTL8192SU product EDIMAX EW7622UMN 0x7622 EW-7622UMn product EDIMAX RT2870_1 0x7711 RT2870 product EDIMAX EW7717 0x7717 EW-7717 product EDIMAX EW7718 0x7718 EW-7718 product EDIMAX EW7733UND 0x7733 EW-7733UnD product EDIMAX EW7811UN 0x7811 EW-7811Un product EDIMAX RTL8192CU 0x7822 RTL8192CU product EDIMAX EW7811UTC_1 0xa811 EW-7811UTC product EDIMAX EW7811UTC_2 0xa812 EW-7811UTC product EDIMAX EW7822UAC 0xa822 EW-7822UAC /* eGalax Products */ product EGALAX TPANEL 0x0001 Touch Panel product EGALAX TPANEL2 0x0002 Touch Panel product EGALAX2 TPANEL 0x0001 Touch Panel /* EGO Products */ product EGO DUMMY 0x0000 Dummy Product product EGO M4U 0x1020 ESI M4U /* Eicon Networks */ product EICON DIVA852 0x4905 Diva 852 ISDN TA /* EIZO products */ product EIZO HUB 0x0000 hub product EIZO MONITOR 0x0001 monitor /* ELCON Systemtechnik products */ product ELCON PLAN 0x0002 Goldpfeil P-LAN /* Elecom products */ product ELECOM MOUSE29UO 0x0002 mouse 29UO product ELECOM LDUSBTX0 0x200c LD-USB/TX product ELECOM LDUSBTX1 0x4002 LD-USB/TX product ELECOM LDUSBLTX 0x4005 LD-USBL/TX product ELECOM WDC150SU2M 0x4008 WDC-150SU2M product ELECOM LDUSBTX2 0x400b LD-USB/TX product ELECOM LDUSB20 0x4010 LD-USB20 product ELECOM UCSGT 0x5003 UC-SGT product ELECOM UCSGT0 0x5004 UC-SGT product ELECOM LDUSBTX3 0xabc1 LD-USB/TX /* Elektor products */ product ELEKTOR FT323R 0x0005 FTDI compatible adapter /* Elsa products */ product ELSA MODEM1 0x2265 ELSA Modem Board product ELSA USB2ETHERNET 0x3000 Microlink USB2Ethernet /* ELV products */ product ELV USBI2C 0xe00f USB-I2C interface /* EMS products */ product EMS DUAL_SHOOTER 0x0003 PSX gun controller converter /* Emtec products */ product EMTEC RUF2PS 0x2240 Flash Drive /* Encore products */ product ENCORE RT3070_1 0x1480 RT3070 product ENCORE RT3070_2 0x14a1 RT3070 product ENCORE RT3070_3 0x14a9 RT3070 /* Entrega products */ product ENTREGA 1S 0x0001 1S serial product ENTREGA 2S 0x0002 2S serial product ENTREGA 1S25 0x0003 1S25 serial product ENTREGA 4S 0x0004 4S serial product ENTREGA E45 0x0005 E45 Ethernet product ENTREGA CENTRONICS 0x0006 Parallel Port product ENTREGA XX1 0x0008 Ethernet product ENTREGA 1S9 0x0093 1S9 serial product ENTREGA EZUSB 0x8000 EZ-USB /*product ENTREGA SERIAL 0x8001 DB25 Serial*/ product ENTREGA 2U4S 0x8004 2U4S serial/usb hub product ENTREGA XX2 0x8005 Ethernet /*product ENTREGA SERIAL_DB9 0x8093 DB9 Serial*/ /* Epson products */ product EPSON PRINTER1 0x0001 USB Printer product EPSON PRINTER2 0x0002 ISD USB Smart Cable for Mac product EPSON PRINTER3 0x0003 ISD USB Smart Cable product EPSON PRINTER5 0x0005 USB Printer product EPSON 636 0x0101 Perfection 636U / 636Photo scanner product EPSON 610 0x0103 Perfection 610 scanner product EPSON 1200 0x0104 Perfection 1200U / 1200Photo scanner product EPSON 1600 0x0107 Expression 1600 scanner product EPSON 1640 0x010a Perfection 1640SU scanner product EPSON 1240 0x010b Perfection 1240U / 1240Photo scanner product EPSON 640U 0x010c Perfection 640U scanner product EPSON 1250 0x010f Perfection 1250U / 1250Photo scanner product EPSON 1650 0x0110 Perfection 1650 scanner product EPSON GT9700F 0x0112 GT-9700F scanner product EPSON GT9300UF 0x011b GT-9300UF scanner product EPSON 3200 0x011c Perfection 3200 scanner product EPSON 1260 0x011d Perfection 1260 scanner product EPSON 1660 0x011e Perfection 1660 scanner product EPSON 1670 0x011f Perfection 1670 scanner product EPSON 1270 0x0120 Perfection 1270 scanner product EPSON 2480 0x0121 Perfection 2480 scanner product EPSON 3590 0x0122 Perfection 3590 scanner product EPSON 4990 0x012a Perfection 4990 Photo scanner product EPSON CRESSI_EDY 0x0521 Cressi Edy diving computer product EPSON N2ITION3 0x0522 Zeagle N2iTion3 diving computer product EPSON STYLUS_875DC 0x0601 Stylus Photo 875DC Card Reader product EPSON STYLUS_895 0x0602 Stylus Photo 895 Card Reader product EPSON CX5400 0x0808 CX5400 scanner product EPSON 3500 0x080e CX-3500/3600/3650 MFP product EPSON RX425 0x080f Stylus Photo RX425 scanner product EPSON DX3800 0x0818 CX3700/CX3800/DX38x0 MFP scanner product EPSON 4800 0x0819 CX4700/CX4800/DX48x0 MFP scanner product EPSON 4200 0x0820 CX4100/CX4200/DX4200 MFP scanner product EPSON 5000 0x082b CX4900/CX5000/DX50x0 MFP scanner product EPSON 6000 0x082e CX5900/CX6000/DX60x0 MFP scanner product EPSON DX4000 0x082f DX4000 MFP scanner product EPSON DX7400 0x0838 CX7300/CX7400/DX7400 MFP scanner product EPSON DX8400 0x0839 CX8300/CX8400/DX8400 MFP scanner product EPSON SX100 0x0841 SX100/NX100 MFP scanner product EPSON NX300 0x0848 NX300 MFP scanner product EPSON SX200 0x0849 SX200/SX205 MFP scanner product EPSON SX400 0x084a SX400/NX400/TX400 MFP scanner /* e-TEK Labs products */ product ETEK 1COM 0x8007 Serial /* Evolution products */ product EVOLUTION ER1 0x0300 FTDI compatible adapter product EVOLUTION HYBRID 0x0302 FTDI compatible adapter product EVOLUTION RCM4 0x0303 FTDI compatible adapter /* Extended Systems products */ product EXTENDED XTNDACCESS 0x0100 XTNDAccess IrDA /* Falcom products */ product FALCOM TWIST 0x0001 USB GSM/GPRS Modem product FALCOM SAMBA 0x0005 FTDI compatible adapter /* FEIYA products */ product FEIYA DUMMY 0x0000 Dummy product product FEIYA 5IN1 0x1132 5-in-1 Card Reader product FEIYA ELANGO 0x6200 MicroSDHC Card Reader product FEIYA AC110 0x6300 AC-110 Card Reader /* FeiXun Communication products */ product FEIXUN RTL8188CU 0x0090 RTL8188CU product FEIXUN RTL8192CU 0x0091 RTL8192CU /* Festo */ product FESTO CPX_USB 0x0102 CPX-USB product FESTO CMSP 0x0501 CMSP /* Fiberline */ product FIBERLINE WL430U 0x6003 WL-430U /* FIC / OpenMoko */ product FIC NEO1973_DEBUG 0x5118 FTDI compatible adapter /* Fossil, Inc products */ product FOSSIL WRISTPDA 0x0002 Wrist PDA /* Foxconn products */ product FOXCONN TCOM_TC_300 0xe000 T-Com TC 300 product FOXCONN PIRELLI_DP_L10 0xe003 Pirelli DP-L10 /* Freecom products */ product FREECOM DVD 0xfc01 DVD drive product FREECOM HDD 0xfc05 Classic SL Hard Drive /* Fujitsu Siemens Computers products */ product FSC E5400 0x1009 PrismGT USB 2.0 WLAN /* Future Technology Devices products */ product FTDI SCX8_USB_PHOENIX 0x5259 SCx8 USB Phoenix interface product FTDI SERIAL_8U100AX 0x8372 8U100AX Serial product FTDI SERIAL_8U232AM 0x6001 8U232AM Serial product FTDI SERIAL_8U232AM4 0x6004 8U232AM Serial product FTDI SERIAL_232RL 0x6006 FT232RL Serial product FTDI SERIAL_2232C 0x6010 FT2232C Dual port Serial product FTDI 232H 0x6014 FTDI compatible adapter product FTDI 232EX 0x6015 FTDI compatible adapter product FTDI SERIAL_2232D 0x9e90 FT2232D Dual port Serial product FTDI SERIAL_4232H 0x6011 FT4232H Quad port Serial product FTDI XDS100V2 0xa6d0 TI XDS100V1/V2 and early Beaglebones product FTDI XDS100V3 0xa6d1 TI XDS100V3 product FTDI KTLINK 0xbbe2 KT-LINK Embedded Hackers Multitool product FTDI TURTELIZER2 0xbdc8 egnite Turtelizer 2 JTAG/RS232 Adapter /* Gude Analog- und Digitalsysteme products also uses FTDI's id: */ product FTDI TACTRIX_OPENPORT_13M 0xcc48 OpenPort 1.3 Mitsubishi product FTDI TACTRIX_OPENPORT_13S 0xcc49 OpenPort 1.3 Subaru product FTDI TACTRIX_OPENPORT_13U 0xcc4a OpenPort 1.3 Universal product FTDI GAMMASCOUT 0xd678 Gamma-Scout product FTDI KBS 0xe6c8 Pyramid KBS USB LCD product FTDI EISCOU 0xe888 Expert ISDN Control USB product FTDI UOPTBR 0xe889 USB-RS232 OptoBridge product FTDI EMCU2D 0xe88a Expert mouseCLOCK USB II product FTDI PCMSFU 0xe88b Precision Clock MSF USB product FTDI EMCU2H 0xe88c Expert mouseCLOCK USB II HBG product FTDI MAXSTREAM 0xee18 Maxstream PKG-U product FTDI USB_UIRT 0xf850 USB-UIRT product FTDI USBSERIAL 0xfa00 Matrix Orbital USB Serial product FTDI MX2_3 0xfa01 Matrix Orbital MX2 or MX3 product FTDI MX4_5 0xfa02 Matrix Orbital MX4 or MX5 product FTDI LK202 0xfa03 Matrix Orbital VK/LK202 Family product FTDI LK204 0xfa04 Matrix Orbital VK/LK204 Family product FTDI CFA_632 0xfc08 Crystalfontz CFA-632 USB LCD product FTDI CFA_634 0xfc09 Crystalfontz CFA-634 USB LCD product FTDI CFA_633 0xfc0b Crystalfontz CFA-633 USB LCD product FTDI CFA_631 0xfc0c Crystalfontz CFA-631 USB LCD product FTDI CFA_635 0xfc0d Crystalfontz CFA-635 USB LCD product FTDI SEMC_DSS20 0xfc82 SEMC DSS-20 SyncStation /* Commerzielle und Technische Informationssysteme GmbH products */ product FTDI CTI_USB_NANO_485 0xf60b CTI USB-Nano 485 product FTDI CTI_USB_MINI_485 0xf608 CTI USB-Mini 485 /* Other products */ product FTDI 232RL 0xfbfa FTDI compatible adapter product FTDI 4N_GALAXY_DE_1 0xf3c0 FTDI compatible adapter product FTDI 4N_GALAXY_DE_2 0xf3c1 FTDI compatible adapter product FTDI 4N_GALAXY_DE_3 0xf3c2 FTDI compatible adapter product FTDI 8U232AM_ALT 0x6006 FTDI compatible adapter product FTDI ACCESSO 0xfad0 FTDI compatible adapter product FTDI ACG_HFDUAL 0xdd20 FTDI compatible adapter product FTDI ACTIVE_ROBOTS 0xe548 FTDI compatible adapter product FTDI ACTZWAVE 0xf2d0 FTDI compatible adapter product FTDI AMC232 0xff00 FTDI compatible adapter product FTDI ARTEMIS 0xdf28 FTDI compatible adapter product FTDI ASK_RDR400 0xc991 FTDI compatible adapter product FTDI ATIK_ATK16 0xdf30 FTDI compatible adapter product FTDI ATIK_ATK16C 0xdf32 FTDI compatible adapter product FTDI ATIK_ATK16HR 0xdf31 FTDI compatible adapter product FTDI ATIK_ATK16HRC 0xdf33 FTDI compatible adapter product FTDI ATIK_ATK16IC 0xdf35 FTDI compatible adapter product FTDI BCS_SE923 0xfb99 FTDI compatible adapter product FTDI CANDAPTER 0x9f80 FTDI compatible adapter product FTDI CANUSB 0xffa8 FTDI compatible adapter product FTDI CCSICDU20_0 0xf9d0 FTDI compatible adapter product FTDI CCSICDU40_1 0xf9d1 FTDI compatible adapter product FTDI CCSICDU64_4 0xf9d4 FTDI compatible adapter product FTDI CCSLOAD_N_GO_3 0xf9d3 FTDI compatible adapter product FTDI CCSMACHX_2 0xf9d2 FTDI compatible adapter product FTDI CCSPRIME8_5 0xf9d5 FTDI compatible adapter product FTDI CHAMSYS_24_MASTER_WING 0xdaf8 FTDI compatible adapter product FTDI CHAMSYS_MAXI_WING 0xdafd FTDI compatible adapter product FTDI CHAMSYS_MEDIA_WING 0xdafe FTDI compatible adapter product FTDI CHAMSYS_MIDI_TIMECODE 0xdafb FTDI compatible adapter product FTDI CHAMSYS_MINI_WING 0xdafc FTDI compatible adapter product FTDI CHAMSYS_PC_WING 0xdaf9 FTDI compatible adapter product FTDI CHAMSYS_USB_DMX 0xdafa FTDI compatible adapter product FTDI CHAMSYS_WING 0xdaff FTDI compatible adapter product FTDI COM4SM 0xd578 FTDI compatible adapter product FTDI CONVERTER_0 0xd388 FTDI compatible adapter product FTDI CONVERTER_1 0xd389 FTDI compatible adapter product FTDI CONVERTER_2 0xd38a FTDI compatible adapter product FTDI CONVERTER_3 0xd38b FTDI compatible adapter product FTDI CONVERTER_4 0xd38c FTDI compatible adapter product FTDI CONVERTER_5 0xd38d FTDI compatible adapter product FTDI CONVERTER_6 0xd38e FTDI compatible adapter product FTDI CONVERTER_7 0xd38f FTDI compatible adapter product FTDI DMX4ALL 0xc850 FTDI compatible adapter product FTDI DOMINTELL_DGQG 0xef50 FTDI compatible adapter product FTDI DOMINTELL_DUSB 0xef51 FTDI compatible adapter product FTDI DOTEC 0x9868 FTDI compatible adapter product FTDI ECLO_COM_1WIRE 0xea90 FTDI compatible adapter product FTDI ECO_PRO_CDS 0xe520 FTDI compatible adapter product FTDI ELSTER_UNICOM 0xe700 FTDI compatible adapter product FTDI ELV_ALC8500 0xf06e FTDI compatible adapter product FTDI ELV_CLI7000 0xfb59 FTDI compatible adapter product FTDI ELV_CSI8 0xe0f0 FTDI compatible adapter product FTDI ELV_EC3000 0xe006 FTDI compatible adapter product FTDI ELV_EM1000DL 0xe0f1 FTDI compatible adapter product FTDI ELV_EM1010PC 0xe0ef FTDI compatible adapter product FTDI ELV_FEM 0xe00a FTDI compatible adapter product FTDI ELV_FHZ1000PC 0xf06f FTDI compatible adapter product FTDI ELV_FHZ1300PC 0xe0e8 FTDI compatible adapter product FTDI ELV_FM3RX 0xe0ed FTDI compatible adapter product FTDI ELV_FS20SIG 0xe0f4 FTDI compatible adapter product FTDI ELV_HS485 0xe0ea FTDI compatible adapter product FTDI ELV_KL100 0xe002 FTDI compatible adapter product FTDI ELV_MSM1 0xe001 FTDI compatible adapter product FTDI ELV_PCD200 0xf06c FTDI compatible adapter product FTDI ELV_PCK100 0xe0f2 FTDI compatible adapter product FTDI ELV_PPS7330 0xfb5c FTDI compatible adapter product FTDI ELV_RFP500 0xe0f3 FTDI compatible adapter product FTDI ELV_T1100 0xf06b FTDI compatible adapter product FTDI ELV_TFD128 0xe0ec FTDI compatible adapter product FTDI ELV_TFM100 0xfb5d FTDI compatible adapter product FTDI ELV_TWS550 0xe009 FTDI compatible adapter product FTDI ELV_UAD8 0xf068 FTDI compatible adapter product FTDI ELV_UDA7 0xf069 FTDI compatible adapter product FTDI ELV_UDF77 0xfb5e FTDI compatible adapter product FTDI ELV_UIO88 0xfb5f FTDI compatible adapter product FTDI ELV_ULA200 0xf06d FTDI compatible adapter product FTDI ELV_UM100 0xfb5a FTDI compatible adapter product FTDI ELV_UMS100 0xe0eb FTDI compatible adapter product FTDI ELV_UO100 0xfb5b FTDI compatible adapter product FTDI ELV_UR100 0xfb58 FTDI compatible adapter product FTDI ELV_USI2 0xf06a FTDI compatible adapter product FTDI ELV_USR 0xe000 FTDI compatible adapter product FTDI ELV_UTP8 0xe0f5 FTDI compatible adapter product FTDI ELV_WS300PC 0xe0f6 FTDI compatible adapter product FTDI ELV_WS444PC 0xe0f7 FTDI compatible adapter product FTDI ELV_WS500 0xe0e9 FTDI compatible adapter product FTDI ELV_WS550 0xe004 FTDI compatible adapter product FTDI ELV_WS777 0xe0ee FTDI compatible adapter product FTDI ELV_WS888 0xe008 FTDI compatible adapter product FTDI FUTURE_0 0xf44a FTDI compatible adapter product FTDI FUTURE_1 0xf44b FTDI compatible adapter product FTDI FUTURE_2 0xf44c FTDI compatible adapter product FTDI GENERIC 0x9378 FTDI compatible adapter product FTDI GUDEADS_E808 0xe808 FTDI compatible adapter product FTDI GUDEADS_E809 0xe809 FTDI compatible adapter product FTDI GUDEADS_E80A 0xe80a FTDI compatible adapter product FTDI GUDEADS_E80B 0xe80b FTDI compatible adapter product FTDI GUDEADS_E80C 0xe80c FTDI compatible adapter product FTDI GUDEADS_E80D 0xe80d FTDI compatible adapter product FTDI GUDEADS_E80E 0xe80e FTDI compatible adapter product FTDI GUDEADS_E80F 0xe80f FTDI compatible adapter product FTDI GUDEADS_E88D 0xe88d FTDI compatible adapter product FTDI GUDEADS_E88E 0xe88e FTDI compatible adapter product FTDI GUDEADS_E88F 0xe88f FTDI compatible adapter product FTDI HD_RADIO 0x937c FTDI compatible adapter product FTDI HO720 0xed72 FTDI compatible adapter product FTDI HO730 0xed73 FTDI compatible adapter product FTDI HO820 0xed74 FTDI compatible adapter product FTDI HO870 0xed71 FTDI compatible adapter product FTDI IBS_APP70 0xff3d FTDI compatible adapter product FTDI IBS_PCMCIA 0xff3a FTDI compatible adapter product FTDI IBS_PEDO 0xff3e FTDI compatible adapter product FTDI IBS_PICPRO 0xff39 FTDI compatible adapter product FTDI IBS_PK1 0xff3b FTDI compatible adapter product FTDI IBS_PROD 0xff3f FTDI compatible adapter product FTDI IBS_RS232MON 0xff3c FTDI compatible adapter product FTDI IBS_US485 0xff38 FTDI compatible adapter product FTDI IPLUS 0xd070 FTDI compatible adapter product FTDI IPLUS2 0xd071 FTDI compatible adapter product FTDI IRTRANS 0xfc60 FTDI compatible adapter product FTDI LENZ_LIUSB 0xd780 FTDI compatible adapter product FTDI LM3S_DEVEL_BOARD 0xbcd8 FTDI compatible adapter product FTDI LM3S_EVAL_BOARD 0xbcd9 FTDI compatible adapter product FTDI LM3S_ICDI_B_BOARD 0xbcda FTDI compatible adapter product FTDI MASTERDEVEL2 0xf449 FTDI compatible adapter product FTDI MHAM_DB9 0xeeed FTDI compatible adapter product FTDI MHAM_IC 0xeeec FTDI compatible adapter product FTDI MHAM_KW 0xeee8 FTDI compatible adapter product FTDI MHAM_RS232 0xeeee FTDI compatible adapter product FTDI MHAM_Y6 0xeeea FTDI compatible adapter product FTDI MHAM_Y8 0xeeeb FTDI compatible adapter product FTDI MHAM_Y9 0xeeef FTDI compatible adapter product FTDI MHAM_YS 0xeee9 FTDI compatible adapter product FTDI MICRO_CHAMELEON 0xcaa0 FTDI compatible adapter product FTDI MTXORB_5 0xfa05 FTDI compatible adapter product FTDI MTXORB_6 0xfa06 FTDI compatible adapter product FTDI NXTCAM 0xabb8 FTDI compatible adapter product FTDI OCEANIC 0xf460 FTDI compatible adapter product FTDI OOCDLINK 0xbaf8 FTDI compatible adapter product FTDI OPENDCC 0xbfd8 FTDI compatible adapter product FTDI OPENDCC_GATEWAY 0xbfdb FTDI compatible adapter product FTDI OPENDCC_GBM 0xbfdc FTDI compatible adapter product FTDI OPENDCC_SNIFFER 0xbfd9 FTDI compatible adapter product FTDI OPENDCC_THROTTLE 0xbfda FTDI compatible adapter product FTDI PCDJ_DAC2 0xfa88 FTDI compatible adapter product FTDI PERLE_ULTRAPORT 0xf0c0 FTDI compatible adapter product FTDI PHI_FISCO 0xe40b FTDI compatible adapter product FTDI PIEGROUP 0xf208 FTDI compatible adapter product FTDI PROPOX_JTAGCABLEII 0xd738 FTDI compatible adapter product FTDI R2000KU_TRUE_RNG 0xfb80 FTDI compatible adapter product FTDI R2X0 0xfc71 FTDI compatible adapter product FTDI RELAIS 0xfa10 FTDI compatible adapter product FTDI REU_TINY 0xed22 FTDI compatible adapter product FTDI RMP200 0xe729 FTDI compatible adapter product FTDI RM_CANVIEW 0xfd60 FTDI compatible adapter product FTDI RRCIRKITS_LOCOBUFFER 0xc7d0 FTDI compatible adapter product FTDI SCIENCESCOPE_HS_LOGBOOK 0xff1d FTDI compatible adapter product FTDI SCIENCESCOPE_LOGBOOKML 0xff18 FTDI compatible adapter product FTDI SCIENCESCOPE_LS_LOGBOOK 0xff1c FTDI compatible adapter product FTDI SCS_DEVICE_0 0xd010 FTDI compatible adapter product FTDI SCS_DEVICE_1 0xd011 FTDI compatible adapter product FTDI SCS_DEVICE_2 0xd012 FTDI compatible adapter product FTDI SCS_DEVICE_3 0xd013 FTDI compatible adapter product FTDI SCS_DEVICE_4 0xd014 FTDI compatible adapter product FTDI SCS_DEVICE_5 0xd015 FTDI compatible adapter product FTDI SCS_DEVICE_6 0xd016 FTDI compatible adapter product FTDI SCS_DEVICE_7 0xd017 FTDI compatible adapter product FTDI SDMUSBQSS 0xf448 FTDI compatible adapter product FTDI SIGNALYZER_SH2 0xbca2 FTDI compatible adapter product FTDI SIGNALYZER_SH4 0xbca4 FTDI compatible adapter product FTDI SIGNALYZER_SLITE 0xbca1 FTDI compatible adapter product FTDI SIGNALYZER_ST 0xbca0 FTDI compatible adapter product FTDI SPECIAL_1 0xfc70 FTDI compatible adapter product FTDI SPECIAL_3 0xfc72 FTDI compatible adapter product FTDI SPECIAL_4 0xfc73 FTDI compatible adapter product FTDI SPROG_II 0xf0c8 FTDI compatible adapter product FTDI SR_RADIO 0x9379 FTDI compatible adapter product FTDI SUUNTO_SPORTS 0xf680 FTDI compatible adapter product FTDI TAVIR_STK500 0xfa33 FTDI compatible adapter product FTDI TERATRONIK_D2XX 0xec89 FTDI compatible adapter product FTDI TERATRONIK_VCP 0xec88 FTDI compatible adapter product FTDI THORLABS 0xfaf0 FTDI compatible adapter product FTDI TIAO 0x8a98 FTDI compatible adapter product FTDI TNC_X 0xebe0 FTDI compatible adapter product FTDI TTUSB 0xff20 FTDI compatible adapter product FTDI USBX_707 0xf857 FTDI compatible adapter product FTDI USINT_CAT 0xb810 FTDI compatible adapter product FTDI USINT_RS232 0xb812 FTDI compatible adapter product FTDI USINT_WKEY 0xb811 FTDI compatible adapter product FTDI VARDAAN 0xf070 FTDI compatible adapter product FTDI VNHCPCUSB_D 0xfe38 FTDI compatible adapter product FTDI WESTREX_MODEL_777 0xdc00 FTDI compatible adapter product FTDI WESTREX_MODEL_8900F 0xdc01 FTDI compatible adapter product FTDI XF_547 0xfc0a FTDI compatible adapter product FTDI XF_640 0xfc0e FTDI compatible adapter product FTDI XF_642 0xfc0f FTDI compatible adapter product FTDI XM_RADIO 0x937a FTDI compatible adapter product FTDI YEI_SERVOCENTER31 0xe050 FTDI compatible adapter /* Fuji photo products */ product FUJIPHOTO MASS0100 0x0100 Mass Storage /* Fujitsu protducts */ product FUJITSU AH_F401U 0x105b AH-F401U Air H device /* Fujitsu-Siemens protducts */ product FUJITSUSIEMENS SCR 0x0009 Fujitsu-Siemens SCR USB Reader /* Garmin products */ product GARMIN FORERUNNER230 0x086d ForeRunner 230 product GARMIN IQUE_3600 0x0004 iQue 3600 /* Gemalto products */ product GEMALTO PROXPU 0x5501 Prox-PU/CU RFID Card Reader /* General Instruments (Motorola) products */ product GENERALINSTMNTS SB5100 0x5100 SURFboard SB5100 Cable modem /* Genesys Logic products */ product GENESYS GL620USB 0x0501 GL620USB Host-Host interface product GENESYS GL650 0x0604 GL650 HUB product GENESYS GL606 0x0606 GL606 USB 2.0 HUB product GENESYS GL850G 0x0608 GL850G USB 2.0 HUB product GENESYS GL3520_2 0x0610 GL3520 4-Port USB 2.0 DataPath product GENESYS GL3520_SS 0x0616 GL3520 4-Port USB 3.0 DataPath product GENESYS GL641USB 0x0700 GL641USB CompactFlash Card Reader product GENESYS GL641USB2IDE_2 0x0701 GL641USB USB-IDE Bridge No 2 product GENESYS GL641USB2IDE 0x0702 GL641USB USB-IDE Bridge product GENESYS GL3233 0x0743 GL3233 USB 3.0 AiO Card Reader product GENESYS GL641USB_2 0x0760 GL641USB 6-in-1 Card Reader /* GIGABYTE products */ product GIGABYTE GN54G 0x8001 GN-54G product GIGABYTE GNBR402W 0x8002 GN-BR402W product GIGABYTE GNWLBM101 0x8003 GN-WLBM101 product GIGABYTE GNWBKG 0x8007 GN-WBKG product GIGABYTE GNWB01GS 0x8008 GN-WB01GS product GIGABYTE GNWI05GS 0x800a GN-WI05GS /* Gigaset products */ product GIGASET WLAN 0x0701 WLAN product GIGASET SMCWUSBTG 0x0710 SMCWUSBT-G product GIGASET SMCWUSBTG_NF 0x0711 SMCWUSBT-G (no firmware) product GIGASET AR5523 0x0712 AR5523 product GIGASET AR5523_NF 0x0713 AR5523 (no firmware) product GIGASET RT2573 0x0722 RT2573 product GIGASET RT3070_1 0x0740 RT3070 product GIGASET RT3070_2 0x0744 RT3070 product GIGABYTE RT2870_1 0x800b RT2870 product GIGABYTE GNWB31N 0x800c GN-WB31N product GIGABYTE GNWB32L 0x800d GN-WB32L /* Global Sun Technology product */ product GLOBALSUN AR5523_1 0x7801 AR5523 product GLOBALSUN AR5523_1_NF 0x7802 AR5523 (no firmware) product GLOBALSUN AR5523_2 0x7811 AR5523 product GLOBALSUN AR5523_2_NF 0x7812 AR5523 (no firmware) /* Globespan products */ product GLOBESPAN MODEM_1 0x1329 USB Modem product GLOBESPAN PRISM_GT_1 0x2000 PrismGT USB 2.0 WLAN product GLOBESPAN PRISM_GT_2 0x2002 PrismGT USB 2.0 WLAN /* G.Mate, Inc products */ product GMATE YP3X00 0x1001 YP3X00 PDA /* GN Otometrics */ product GNOTOMETRICS USB 0x0010 FTDI compatible adapter /* GoHubs products */ product GOHUBS GOCOM232 0x1001 GoCOM232 Serial /* Good Way Technology products */ product GOODWAY GWUSB2E 0x6200 GWUSB2E product GOODWAY RT2573 0xc019 RT2573 /* Google products */ product GOOGLE NEXUSONE 0x4e11 Nexus One /* Gravis products */ product GRAVIS GAMEPADPRO 0x4001 GamePad Pro /* GREENHOUSE products */ product GREENHOUSE KANA21 0x0001 CF-writer with MP3 /* Griffin Technology */ product GRIFFIN IMATE 0x0405 iMate, ADB Adapter /* Guillemot Corporation */ product GUILLEMOT DALEADER 0xa300 DA Leader product GUILLEMOT HWGUSB254 0xe000 HWGUSB2-54 WLAN product GUILLEMOT HWGUSB254LB 0xe010 HWGUSB2-54-LB product GUILLEMOT HWGUSB254V2AP 0xe020 HWGUSB2-54V2-AP product GUILLEMOT HWNU300 0xe030 HWNU-300 product GUILLEMOT HWNUM300 0xe031 HWNUm-300 product GUILLEMOT HWGUN54 0xe032 HWGUn-54 product GUILLEMOT HWNUP150 0xe033 HWNUP-150 /* Hagiwara products */ product HAGIWARA FGSM 0x0002 FlashGate SmartMedia Card Reader product HAGIWARA FGCF 0x0003 FlashGate CompactFlash Card Reader product HAGIWARA FG 0x0005 FlashGate /* HAL Corporation products */ product HAL IMR001 0x0011 Crossam2+USB IR commander /* Handspring, Inc. */ product HANDSPRING VISOR 0x0100 Handspring Visor product HANDSPRING TREO 0x0200 Handspring Treo product HANDSPRING TREO600 0x0300 Handspring Treo 600 /* Hauppauge Computer Works */ product HAUPPAUGE WINTV_USB_FM 0x4d12 WinTV USB FM product HAUPPAUGE2 NOVAT500 0x9580 NovaT 500Stick /* Hawking Technologies products */ product HAWKING RT2870_1 0x0001 RT2870 product HAWKING RT2870_2 0x0003 RT2870 product HAWKING HWUN2 0x0009 HWUN2 product HAWKING RT3070 0x000b RT3070 product HAWKING RTL8192CU 0x0019 RTL8192CU product HAWKING UF100 0x400c 10/100 USB Ethernet product HAWKING RTL8192SU_1 0x0015 RTL8192SU product HAWKING RTL8192SU_2 0x0016 RTL8192SU product HAWKING HD65U 0x0023 HD65U /* HID Global GmbH products */ product HIDGLOBAL CM2020 0x0596 Omnikey Cardman 2020 product HIDGLOBAL CM6020 0x1784 Omnikey Cardman 6020 /* Hitachi, Ltd. products */ product HITACHI DVDCAM_DZ_MV100A 0x0004 DVD-CAM DZ-MV100A Camcorder product HITACHI DVDCAM_USB 0x001e DVDCAM USB HS Interface /* Holtek products */ product HOLTEK F85 0xa030 Holtek USB gaming keyboard /* Honeywell */ product HONEYWELL HGI80 0x0102 Honeywell HGI80 Wireless USB Gateway /* HP products */ product HP 895C 0x0004 DeskJet 895C product HP 4100C 0x0101 Scanjet 4100C product HP S20 0x0102 Photosmart S20 product HP 880C 0x0104 DeskJet 880C product HP 4200C 0x0105 ScanJet 4200C product HP CDWRITERPLUS 0x0107 CD-Writer Plus product HP KBDHUB 0x010c Multimedia Keyboard Hub product HP G55XI 0x0111 OfficeJet G55xi product HP HN210W 0x011c HN210W 802.11b WLAN product HP 49GPLUS 0x0121 49g+ graphing calculator product HP 6200C 0x0201 ScanJet 6200C product HP S20b 0x0202 PhotoSmart S20 product HP 815C 0x0204 DeskJet 815C product HP 3300C 0x0205 ScanJet 3300C product HP CDW8200 0x0207 CD-Writer Plus 8200e product HP MMKEYB 0x020c Multimedia keyboard product HP 1220C 0x0212 DeskJet 1220C product HP UN2420_QDL 0x241d UN2420 QDL Firmware Loader product HP UN2420 0x251d UN2420 WWAN/GPS Module product HP 810C 0x0304 DeskJet 810C/812C product HP 4300C 0x0305 Scanjet 4300C product HP CDW4E 0x0307 CD-Writer+ CD-4e product HP G85XI 0x0311 OfficeJet G85xi product HP 1200 0x0317 LaserJet 1200 product HP 5200C 0x0401 Scanjet 5200C product HP 830C 0x0404 DeskJet 830C product HP 3400CSE 0x0405 ScanJet 3400cse product HP 6300C 0x0601 Scanjet 6300C product HP 840C 0x0604 DeskJet 840c product HP 2200C 0x0605 ScanJet 2200C product HP 5300C 0x0701 Scanjet 5300C product HP 4400C 0x0705 Scanjet 4400C product HP 4470C 0x0805 Scanjet 4470C product HP 82x0C 0x0b01 Scanjet 82x0C product HP 2300D 0x0b17 Laserjet 2300d product HP 970CSE 0x1004 Deskjet 970Cse product HP 5400C 0x1005 Scanjet 5400C product HP 2215 0x1016 iPAQ 22xx/Jornada 548 product HP 568J 0x1116 Jornada 568 product HP 930C 0x1204 DeskJet 930c product HP3 RTL8188CU 0x1629 RTL8188CU product HP P2000U 0x1801 Inkjet P-2000U product HP HS2300 0x1e1d HS2300 HSDPA (aka MC8775) product HP 640C 0x2004 DeskJet 640c product HP 4670V 0x3005 ScanJet 4670v product HP P1100 0x3102 Photosmart P1100 product HP LD220 0x3524 LD220 POS Display product HP OJ4215 0x3d11 OfficeJet 4215 product HP HN210E 0x811c Ethernet HN210E product HP2 C500 0x6002 PhotoSmart C500 product HP EV2200 0x1b1d ev2200 HSDPA (aka MC5720) product HP HS2300 0x1e1d hs2300 HSDPA (aka MC8775) /* HTC products */ product HTC WINMOBILE 0x00ce HTC USB Sync product HTC PPC6700MODEM 0x00cf PPC6700 Modem product HTC SMARTPHONE 0x0a51 SmartPhone USB Sync product HTC WIZARD 0x0bce HTC Wizard USB Sync product HTC LEGENDSYNC 0x0c97 HTC Legend USB Sync product HTC LEGEND 0x0ff9 HTC Legend product HTC LEGENDINTERNET 0x0ffe HTC Legend Internet Sharing /* HUAWEI products */ product HUAWEI MOBILE 0x1001 Huawei Mobile product HUAWEI E220 0x1003 HSDPA modem product HUAWEI E220BIS 0x1004 HSDPA modem product HUAWEI E1401 0x1401 3G modem product HUAWEI E1402 0x1402 3G modem product HUAWEI E1403 0x1403 3G modem product HUAWEI E1404 0x1404 3G modem product HUAWEI E1405 0x1405 3G modem product HUAWEI E1406 0x1406 3G modem product HUAWEI E1407 0x1407 3G modem product HUAWEI E1408 0x1408 3G modem product HUAWEI E1409 0x1409 3G modem product HUAWEI E140A 0x140a 3G modem product HUAWEI E140B 0x140b 3G modem product HUAWEI E180V 0x140c E180V product HUAWEI E140D 0x140d 3G modem product HUAWEI E140E 0x140e 3G modem product HUAWEI E140F 0x140f 3G modem product HUAWEI E1410 0x1410 3G modem product HUAWEI E1411 0x1411 3G modem product HUAWEI E1412 0x1412 3G modem product HUAWEI E1413 0x1413 3G modem product HUAWEI E1414 0x1414 3G modem product HUAWEI E1415 0x1415 3G modem product HUAWEI E1416 0x1416 3G modem product HUAWEI E1417 0x1417 3G modem product HUAWEI E1418 0x1418 3G modem product HUAWEI E1419 0x1419 3G modem product HUAWEI E141A 0x141a 3G modem product HUAWEI E141B 0x141b 3G modem product HUAWEI E141C 0x141c 3G modem product HUAWEI E141D 0x141d 3G modem product HUAWEI E141E 0x141e 3G modem product HUAWEI E141F 0x141f 3G modem product HUAWEI E1420 0x1420 3G modem product HUAWEI E1421 0x1421 3G modem product HUAWEI E1422 0x1422 3G modem product HUAWEI E1423 0x1423 3G modem product HUAWEI E1424 0x1424 3G modem product HUAWEI E1425 0x1425 3G modem product HUAWEI E1426 0x1426 3G modem product HUAWEI E1427 0x1427 3G modem product HUAWEI E1428 0x1428 3G modem product HUAWEI E1429 0x1429 3G modem product HUAWEI E142A 0x142a 3G modem product HUAWEI E142B 0x142b 3G modem product HUAWEI E142C 0x142c 3G modem product HUAWEI E142D 0x142d 3G modem product HUAWEI E142E 0x142e 3G modem product HUAWEI E142F 0x142f 3G modem product HUAWEI E1430 0x1430 3G modem product HUAWEI E1431 0x1431 3G modem product HUAWEI E1432 0x1432 3G modem product HUAWEI E1433 0x1433 3G modem product HUAWEI E1434 0x1434 3G modem product HUAWEI E1435 0x1435 3G modem product HUAWEI E1436 0x1436 3G modem product HUAWEI E1437 0x1437 3G modem product HUAWEI E1438 0x1438 3G modem product HUAWEI E1439 0x1439 3G modem product HUAWEI E143A 0x143a 3G modem product HUAWEI E143B 0x143b 3G modem product HUAWEI E143C 0x143c 3G modem product HUAWEI E143D 0x143d 3G modem product HUAWEI E143E 0x143e 3G modem product HUAWEI E143F 0x143f 3G modem product HUAWEI E1752 0x1446 3G modem product HUAWEI K4505 0x1464 3G modem product HUAWEI K3765 0x1465 3G modem product HUAWEI E1820 0x14ac E1820 HSPA+ USB Slider product HUAWEI K3771_INIT 0x14c4 K3771 Initial product HUAWEI K3770 0x14c9 3G modem product HUAWEI K3771 0x14ca K3771 product HUAWEI K3772 0x14cf K3772 product HUAWEI K3770_INIT 0x14d1 K3770 Initial product HUAWEI E3131_INIT 0x14fe 3G modem initial product HUAWEI E392 0x1505 LTE modem product HUAWEI E3131 0x1506 3G modem product HUAWEI K3765_INIT 0x1520 K3765 Initial product HUAWEI K4505_INIT 0x1521 K4505 Initial product HUAWEI K3772_INIT 0x1526 K3772 Initial product HUAWEI E3272_INIT 0x155b LTE modem initial product HUAWEI ME909U 0x1573 LTE modem product HUAWEI R215_INIT 0x1582 LTE modem initial product HUAWEI R215 0x1588 LTE modem product HUAWEI ME909S 0x15c1 LTE modem product HUAWEI ETS2055 0x1803 CDMA modem product HUAWEI E173 0x1c05 3G modem product HUAWEI E173_INIT 0x1c0b 3G modem initial product HUAWEI E3272 0x1c1e LTE modem /* HUAWEI 3com products */ product HUAWEI3COM WUB320G 0x0009 Aolynk WUB320g /* IBM Corporation */ product IBM USBCDROMDRIVE 0x4427 USB CD-ROM Drive product IBM USB4543 0x4543 TI IBM USB 4543 Modem product IBM USB454B 0x454b TI IBM USB 454B Modem product IBM USB454C 0x454c TI IBM USB 454C Modem /* Icom products */ product ICOM SP1 0x0004 FTDI compatible adapter product ICOM OPC_U_UC 0x0018 FTDI compatible adapter product ICOM RP2C1 0x0009 FTDI compatible adapter product ICOM RP2C2 0x000a FTDI compatible adapter product ICOM RP2D 0x000b FTDI compatible adapter product ICOM RP2KVR 0x0013 FTDI compatible adapter product ICOM RP2KVT 0x0012 FTDI compatible adapter product ICOM RP2VR 0x000d FTDI compatible adapter product ICOM RP2VT 0x000c FTDI compatible adapter product ICOM RP4KVR 0x0011 FTDI compatible adapter product ICOM RP4KVT 0x0010 FTDI compatible adapter /* ID-tech products */ product IDTECH IDT1221U 0x0300 FTDI compatible adapter /* Imagination Technologies products */ product IMAGINATION DBX1 0x2107 DBX1 DSP core /* Initio Corporation products */ product INITIO DUMMY 0x0000 Dummy product product INITIO INIC_1610P 0x1e40 USB to SATA Bridge /* Inside Out Networks products */ product INSIDEOUT EDGEPORT4 0x0001 EdgePort/4 serial ports /* In-System products */ product INSYSTEM F5U002 0x0002 Parallel printer product INSYSTEM ATAPI 0x0031 ATAPI Adapter product INSYSTEM ISD110 0x0200 IDE Adapter ISD110 product INSYSTEM ISD105 0x0202 IDE Adapter ISD105 product INSYSTEM USBCABLE 0x081a USB cable product INSYSTEM STORAGE_V2 0x5701 USB Storage Adapter V2 /* Intenso products */ product INTENSO MEMORY_BOX 0x0701 External disk /* Intel products */ product INTEL EASYPC_CAMERA 0x0110 Easy PC Camera product INTEL TESTBOARD 0x9890 82930 test board product INTEL2 IRMH 0x0020 Integrated Rate Matching Hub product INTEL2 IRMH2 0x0024 Integrated Rate Matching Hub product INTEL2 IRMH3 0x8000 Integrated Rate Matching Hub product INTEL2 IRMH4 0x8008 Integrated Rate Matching Hub product INTEL2 SFP 0x0aa7 Sandy Peak (3168) Bluetooth Module product INTEL2 JFP 0x0aaa Jefferson Peak (9460/9560) Bluetooth Module product INTEL2 THP 0x0025 Thunder Peak (9160/9260) Bluetooth Module product INTEL2 HSP 0x0026 Harrison Peak (22560) Bluetooth Module /* Interbiometric products */ product INTERBIOMETRICS IOBOARD 0x1002 FTDI compatible adapter product INTERBIOMETRICS MINI_IOBOARD 0x1006 FTDI compatible adapter /* Intersil products */ product INTERSIL PRISM_GT 0x1000 PrismGT USB 2.0 WLAN product INTERSIL PRISM_2X 0x3642 Prism2.x or Atmel WLAN /* Interpid Control Systems products */ product INTREPIDCS VALUECAN 0x0601 ValueCAN CAN bus interface product INTREPIDCS NEOVI 0x0701 NeoVI Blue vehicle bus interface /* I/O DATA products */ product IODATA IU_CD2 0x0204 DVD Multi-plus unit iU-CD2 product IODATA DVR_UEH8 0x0206 DVD Multi-plus unit DVR-UEH8 product IODATA USBSSMRW 0x0314 USB-SSMRW SD-card product IODATA USBSDRW 0x031e USB-SDRW SD-card product IODATA USBETT 0x0901 USB ETT product IODATA USBETTX 0x0904 USB ETTX product IODATA USBETTXS 0x0913 USB ETTX product IODATA USBWNB11A 0x0919 USB WN-B11 product IODATA USBWNB11 0x0922 USB Airport WN-B11 product IODATA ETGUS2 0x0930 ETG-US2 product IODATA WNGDNUS2 0x093f WN-GDN/US2 product IODATA RT3072_1 0x0944 RT3072 product IODATA RT3072_2 0x0945 RT3072 product IODATA RT3072_3 0x0947 RT3072 product IODATA RT3072_4 0x0948 RT3072 product IODATA WNAC867U 0x0952 WN-AC867U product IODATA USBRSAQ 0x0a03 Serial USB-RSAQ1 product IODATA USBRSAQ5 0x0a0e Serial USB-RSAQ5 product IODATA2 USB2SC 0x0a09 USB2.0-SCSI Bridge USB2-SC /* Iomega products */ product IOMEGA ZIP100 0x0001 Zip 100 product IOMEGA ZIP250 0x0030 Zip 250 /* Ionic products */ product IONICS PLUGCOMPUTER 0x0102 FTDI compatible adapter /* Integrated System Solution Corp. products */ product ISSC ISSCBTA 0x1001 Bluetooth USB Adapter /* iTegno products */ product ITEGNO WM1080A 0x1080 WM1080A GSM/GPRS modem product ITEGNO WM2080A 0x2080 WM2080A CDMA modem /* Ituner networks products */ product ITUNERNET USBLCD2X20 0x0002 USB-LCD 2x20 product ITUNERNET USBLCD4X20 0xc001 USB-LCD 4x20 /* Jablotron products */ product JABLOTRON PC60B 0x0001 PC-60B /* Jaton products */ product JATON EDA 0x5704 Ethernet /* Jeti products */ product JETI SPC1201 0x04b2 FTDI compatible adapter /* JMicron products */ product JMICRON JMS567 0x0567 USB to SATA 6.0Gb/s bridge product JMICRON JM20336 0x2336 USB to SATA Bridge product JMICRON JM20337 0x2338 USB to ATA/ATAPI Bridge /* JVC products */ product JVC GR_DX95 0x000a GR-DX95 product JVC MP_PRX1 0x3008 MP-PRX1 Ethernet /* JRC products */ product JRC AH_J3001V_J3002V 0x0001 AirH PHONE AH-J3001V/J3002V /* Kamstrrup products */ product KAMSTRUP OPTICALEYE 0x0001 Optical Eye/3-wire product KAMSTRUP MBUS_250D 0x0005 M-Bus Master MultiPort 250D /* Kawatsu products */ product KAWATSU MH4000P 0x0003 MiniHub 4000P /* Keisokugiken Corp. products */ product KEISOKUGIKEN USBDAQ 0x0068 HKS-0200 USBDAQ /* Kensington products */ product KENSINGTON ORBIT 0x1003 Orbit USB/PS2 trackball product KENSINGTON TURBOBALL 0x1005 TurboBall /* Keyspan products */ product KEYSPAN USA28_NF 0x0101 USA-28 serial Adapter (no firmware) product KEYSPAN USA28X_NF 0x0102 USA-28X serial Adapter (no firmware) product KEYSPAN USA19_NF 0x0103 USA-19 serial Adapter (no firmware) product KEYSPAN USA18_NF 0x0104 USA-18 serial Adapter (no firmware) product KEYSPAN USA18X_NF 0x0105 USA-18X serial Adapter (no firmware) product KEYSPAN USA19W_NF 0x0106 USA-19W serial Adapter (no firmware) product KEYSPAN USA19 0x0107 USA-19 serial Adapter product KEYSPAN USA19W 0x0108 USA-19W serial Adapter product KEYSPAN USA49W_NF 0x0109 USA-49W serial Adapter (no firmware) product KEYSPAN USA49W 0x010a USA-49W serial Adapter product KEYSPAN USA19QI_NF 0x010b USA-19QI serial Adapter (no firmware) product KEYSPAN USA19QI 0x010c USA-19QI serial Adapter product KEYSPAN USA19Q_NF 0x010d USA-19Q serial Adapter (no firmware) product KEYSPAN USA19Q 0x010e USA-19Q serial Adapter product KEYSPAN USA28 0x010f USA-28 serial Adapter product KEYSPAN USA28XXB 0x0110 USA-28X/XB serial Adapter product KEYSPAN USA18 0x0111 USA-18 serial Adapter product KEYSPAN USA18X 0x0112 USA-18X serial Adapter product KEYSPAN USA28XB_NF 0x0113 USA-28XB serial Adapter (no firmware) product KEYSPAN USA28XA_NF 0x0114 USA-28XB serial Adapter (no firmware) product KEYSPAN USA28XA 0x0115 USA-28XA serial Adapter product KEYSPAN USA18XA_NF 0x0116 USA-18XA serial Adapter (no firmware) product KEYSPAN USA18XA 0x0117 USA-18XA serial Adapter product KEYSPAN USA19QW_NF 0x0118 USA-19WQ serial Adapter (no firmware) product KEYSPAN USA19QW 0x0119 USA-19WQ serial Adapter product KEYSPAN USA19HA 0x0121 USA-19HS serial Adapter product KEYSPAN UIA10 0x0201 UIA-10 remote control product KEYSPAN UIA11 0x0202 UIA-11 remote control /* Kingston products */ product KINGSTON XX1 0x0008 Ethernet product KINGSTON KNU101TX 0x000a KNU101TX USB Ethernet product KINGSTON HYPERX3_0 0x162b DT HyperX 3.0 /* Kawasaki products */ product KLSI DUH3E10BT 0x0008 USB Ethernet product KLSI DUH3E10BTN 0x0009 USB Ethernet /* Kobil products */ product KOBIL CONV_B1 0x2020 FTDI compatible adapter product KOBIL CONV_KAAN 0x2021 FTDI compatible adapter /* Kodak products */ product KODAK DC220 0x0100 Digital Science DC220 product KODAK DC260 0x0110 Digital Science DC260 product KODAK DC265 0x0111 Digital Science DC265 product KODAK DC290 0x0112 Digital Science DC290 product KODAK DC240 0x0120 Digital Science DC240 product KODAK DC280 0x0130 Digital Science DC280 /* Kontron AG products */ product KONTRON DM9601 0x8101 USB Ethernet product KONTRON JP1082 0x9700 USB Ethernet /* Konica Corp. Products */ product KONICA CAMERA 0x0720 Digital Color Camera /* KYE products */ product KYE NICHE 0x0001 Niche mouse product KYE NETSCROLL 0x0003 Genius NetScroll mouse product KYE FLIGHT2000 0x1004 Flight 2000 joystick product KYE VIVIDPRO 0x2001 ColorPage Vivid-Pro scanner /* Kyocera products */ product KYOCERA FINECAM_S3X 0x0100 Finecam S3x product KYOCERA FINECAM_S4 0x0101 Finecam S4 product KYOCERA FINECAM_S5 0x0103 Finecam S5 product KYOCERA FINECAM_L3 0x0105 Finecam L3 product KYOCERA AHK3001V 0x0203 AH-K3001V product KYOCERA2 CDMA_MSM_K 0x17da Qualcomm Kyocera CDMA Technologies MSM product KYOCERA2 KPC680 0x180a Qualcomm Kyocera CDMA Technologies MSM /* LaCie products */ product LACIE HD 0xa601 Hard Disk product LACIE CDRW 0xa602 CD R/W /* Lake Shore Cryotronics products */ product LAKESHORE 121 0x0100 121 Current Source product LAKESHORE 218A 0x0200 218A Temperature Monitor product LAKESHORE 219 0x0201 219 Temperature Monitor product LAKESHORE 233 0x0202 233 Temperature Transmitter product LAKESHORE 235 0x0203 235 Temperature Transmitter product LAKESHORE 335 0x0300 335 Temperature Controller product LAKESHORE 336 0x0301 336 Temperature Controller product LAKESHORE 350 0x0302 350 Temperature Controller product LAKESHORE 371 0x0303 371 AC Bridge product LAKESHORE 411 0x0400 411 Handheld Gaussmeter product LAKESHORE 425 0x0401 425 Gaussmeter product LAKESHORE 455A 0x0402 455A DSP Gaussmeter product LAKESHORE 475A 0x0403 475A DSP Gaussmeter product LAKESHORE 465 0x0404 465 Gaussmeter product LAKESHORE 625A 0x0600 625A Magnet PSU product LAKESHORE 642A 0x0601 642A Magnet PSU product LAKESHORE 648 0x0602 648 Magnet PSU product LAKESHORE 737 0x0700 737 VSM Controller product LAKESHORE 776 0x0701 776 Matrix Switch /* Larsen and Brusgaard products */ product LARSENBRUSGAARD ALTITRACK 0x0001 FTDI compatible adapter /* Leadtek products */ product LEADTEK 9531 0x2101 9531 GPS /* Lenovo products */ product LENOVO GIGALAN 0x304b USB 3.0 Ethernet product LENOVO ETHERNET 0x7203 USB 2.0 Ethernet product LENOVO RTL8153 0x7205 USB 3.0 Ethernet /* Lexar products */ product LEXAR JUMPSHOT 0x0001 jumpSHOT CompactFlash Reader product LEXAR CF_READER 0xb002 USB CF Reader product LEXAR JUMPDRIVE 0xa833 USB Jumpdrive Flash Drive /* Lexmark products */ product LEXMARK S2450 0x0009 Optra S 2450 /* Liebert products */ product LIEBERT POWERSURE_PXT 0xffff PowerSure Personal XT product LIEBERT2 PSI1000 0x0004 UPS PSI 1000 FW:08 /* Link Instruments Inc. products */ product LINKINSTRUMENTS MSO19 0xf190 Link Instruments MSO-19 product LINKINSTRUMENTS MSO28 0xf280 Link Instruments MSO-28 product LINKINSTRUMENTS MSO28_2 0xf281 Link Instruments MSO-28 /* Linksys products */ product LINKSYS MAUSB2 0x0105 Camedia MAUSB-2 product LINKSYS USB10TX1 0x200c USB10TX product LINKSYS USB10T 0x2202 USB10T Ethernet product LINKSYS USB100TX 0x2203 USB100TX Ethernet product LINKSYS USB100H1 0x2204 USB100H1 Ethernet/HPNA product LINKSYS USB10TA 0x2206 USB10TA Ethernet product LINKSYS USB10TX2 0x400b USB10TX product LINKSYS2 WUSB11 0x2219 WUSB11 Wireless Adapter product LINKSYS2 USB200M 0x2226 USB 2.0 10/100 Ethernet product LINKSYS3 WUSB11v28 0x2233 WUSB11 v2.8 Wireless Adapter product LINKSYS4 USB1000 0x0039 USB1000 product LINKSYS4 WUSB100 0x0070 WUSB100 product LINKSYS4 WUSB600N 0x0071 WUSB600N product LINKSYS4 WUSB54GCV2 0x0073 WUSB54GC v2 product LINKSYS4 WUSB54GCV3 0x0077 WUSB54GC v3 product LINKSYS4 RT3070 0x0078 RT3070 product LINKSYS4 WUSB600NV2 0x0079 WUSB600N v2 /* Logilink products */ product LOGILINK DUMMY 0x0000 Dummy product product LOGILINK U2M 0x0101 LogiLink USB MIDI Cable /* Logitech products */ product LOGITECH M2452 0x0203 M2452 keyboard product LOGITECH M4848 0x0301 M4848 mouse product LOGITECH PAGESCAN 0x040f PageScan product LOGITECH QUICKCAMWEB 0x0801 QuickCam Web product LOGITECH QUICKCAMPRO 0x0810 QuickCam Pro product LOGITECH WEBCAMC100 0X0817 Webcam C100 product LOGITECH QUICKCAMEXP 0x0840 QuickCam Express product LOGITECH QUICKCAM 0x0850 QuickCam product LOGITECH QUICKCAMPRO3 0x0990 QuickCam Pro 9000 product LOGITECH N43 0xc000 N43 product LOGITECH N48 0xc001 N48 mouse product LOGITECH MBA47 0xc002 M-BA47 mouse product LOGITECH WMMOUSE 0xc004 WingMan Gaming Mouse product LOGITECH BD58 0xc00c BD58 mouse product LOGITECH UN58A 0xc030 iFeel Mouse product LOGITECH UN53B 0xc032 iFeel MouseMan product LOGITECH WMPAD 0xc208 WingMan GamePad Extreme product LOGITECH WMRPAD 0xc20a WingMan RumblePad product LOGITECH WMJOY 0xc281 WingMan Force joystick product LOGITECH BB13 0xc401 USB-PS/2 Trackball product LOGITECH RK53 0xc501 Cordless mouse product LOGITECH RB6 0xc503 Cordless keyboard product LOGITECH MX700 0xc506 Cordless optical mouse product LOGITECH UNIFYING 0xc52b Logitech Unifying Receiver product LOGITECH QUICKCAMPRO2 0xd001 QuickCam Pro /* Logitec Corp. products */ product LOGITEC LDR_H443SU2 0x0033 DVD Multi-plus unit LDR-H443SU2 product LOGITEC LDR_H443U2 0x00b3 DVD Multi-plus unit LDR-H443U2 product LOGITEC LAN_GTJU2A 0x0160 LAN-GTJ/U2A Ethernet product LOGITEC RT2870_1 0x0162 RT2870 product LOGITEC RT2870_2 0x0163 RT2870 product LOGITEC RT2870_3 0x0164 RT2870 product LOGITEC LANW300NU2 0x0166 LAN-W300N/U2 product LOGITEC LANW150NU2 0x0168 LAN-W150N/U2 product LOGITEC LANW300NU2S 0x0169 LAN-W300N/U2S /* Longcheer Holdings, Ltd. products */ product LONGCHEER WM66 0x6061 Longcheer WM66 HSDPA product LONGCHEER W14 0x9603 Mobilcom W14 product LONGCHEER DISK 0xf000 Driver disk product LONGCHEER XSSTICK 0x9605 4G Systems XSStick P14 /* Lucent products */ product LUCENT EVALKIT 0x1001 USS-720 evaluation kit /* Luwen products */ product LUWEN EASYDISK 0x0005 EasyDisc /* Macally products */ product MACALLY MOUSE1 0x0101 mouse /* Mag-Tek products */ product MAGTEK USBSWIPE 0x0002 USB Mag Stripe Swipe Reader /* Marvell Technology Group, Ltd. products */ product MARVELL SHEEVAPLUG 0x9e8f SheevaPlug serial interface /* Matrix Orbital products */ product MATRIXORBITAL FTDI_RANGE_0100 0x0100 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0101 0x0101 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0102 0x0102 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0103 0x0103 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0104 0x0104 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0105 0x0105 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0106 0x0106 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0107 0x0107 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0108 0x0108 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0109 0x0109 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010A 0x010a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010B 0x010b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010C 0x010c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010D 0x010d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010E 0x010e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010F 0x010f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0110 0x0110 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0111 0x0111 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0112 0x0112 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0113 0x0113 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0114 0x0114 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0115 0x0115 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0116 0x0116 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0117 0x0117 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0118 0x0118 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0119 0x0119 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011A 0x011a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011B 0x011b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011C 0x011c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011D 0x011d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011E 0x011e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011F 0x011f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0120 0x0120 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0121 0x0121 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0122 0x0122 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0123 0x0123 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0124 0x0124 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0125 0x0125 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0126 0x0126 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0128 0x0128 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0129 0x0129 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012A 0x012a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012B 0x012b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012D 0x012d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012E 0x012e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012F 0x012f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0130 0x0130 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0131 0x0131 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0132 0x0132 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0133 0x0133 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0134 0x0134 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0135 0x0135 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0136 0x0136 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0137 0x0137 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0138 0x0138 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0139 0x0139 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013A 0x013a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013B 0x013b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013C 0x013c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013D 0x013d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013E 0x013e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013F 0x013f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0140 0x0140 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0141 0x0141 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0142 0x0142 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0143 0x0143 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0144 0x0144 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0145 0x0145 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0146 0x0146 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0147 0x0147 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0148 0x0148 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0149 0x0149 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014A 0x014a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014B 0x014b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014C 0x014c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014D 0x014d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014E 0x014e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014F 0x014f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0150 0x0150 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0151 0x0151 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0152 0x0152 FTDI compatible adapter product MATRIXORBITAL MOUA 0x0153 Martrix Orbital MOU-Axxxx LCD displays product MATRIXORBITAL FTDI_RANGE_0159 0x0159 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015A 0x015a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015B 0x015b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015C 0x015c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015D 0x015d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015E 0x015e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015F 0x015f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0160 0x0160 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0161 0x0161 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0162 0x0162 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0163 0x0163 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0164 0x0164 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0165 0x0165 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0166 0x0166 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0167 0x0167 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0168 0x0168 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0169 0x0169 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016A 0x016a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016B 0x016b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016C 0x016c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016D 0x016d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016E 0x016e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016F 0x016f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0170 0x0170 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0171 0x0171 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0172 0x0172 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0173 0x0173 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0174 0x0174 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0175 0x0175 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0176 0x0176 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0177 0x0177 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0178 0x0178 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0179 0x0179 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017A 0x017a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017B 0x017b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017C 0x017c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017D 0x017d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017E 0x017e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017F 0x017f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0180 0x0180 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0181 0x0181 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0182 0x0182 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0183 0x0183 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0184 0x0184 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0185 0x0185 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0186 0x0186 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0187 0x0187 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0188 0x0188 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0189 0x0189 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018A 0x018a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018B 0x018b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018C 0x018c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018D 0x018d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018E 0x018e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018F 0x018f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0190 0x0190 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0191 0x0191 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0192 0x0192 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0193 0x0193 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0194 0x0194 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0195 0x0195 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0196 0x0196 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0197 0x0197 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0198 0x0198 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0199 0x0199 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019A 0x019a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019B 0x019b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019C 0x019c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019D 0x019d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019E 0x019e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019F 0x019f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A0 0x01a0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A1 0x01a1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A2 0x01a2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A3 0x01a3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A4 0x01a4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A5 0x01a5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A6 0x01a6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A7 0x01a7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A8 0x01a8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A9 0x01a9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AA 0x01aa FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AB 0x01ab FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AC 0x01ac FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AD 0x01ad FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AE 0x01ae FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AF 0x01af FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B0 0x01b0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B1 0x01b1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B2 0x01b2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B3 0x01b3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B4 0x01b4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B5 0x01b5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B6 0x01b6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B7 0x01b7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B8 0x01b8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B9 0x01b9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BA 0x01ba FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BB 0x01bb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BC 0x01bc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BD 0x01bd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BE 0x01be FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BF 0x01bf FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C0 0x01c0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C1 0x01c1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C2 0x01c2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C3 0x01c3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C4 0x01c4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C5 0x01c5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C6 0x01c6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C7 0x01c7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C8 0x01c8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C9 0x01c9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CA 0x01ca FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CB 0x01cb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CC 0x01cc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CD 0x01cd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CE 0x01ce FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CF 0x01cf FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D0 0x01d0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D1 0x01d1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D2 0x01d2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D3 0x01d3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D4 0x01d4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D5 0x01d5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D6 0x01d6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D7 0x01d7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D8 0x01d8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D9 0x01d9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DA 0x01da FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DB 0x01db FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DC 0x01dc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DD 0x01dd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DE 0x01de FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DF 0x01df FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E0 0x01e0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E1 0x01e1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E2 0x01e2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E3 0x01e3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E4 0x01e4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E5 0x01e5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E6 0x01e6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E7 0x01e7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E8 0x01e8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E9 0x01e9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EA 0x01ea FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EB 0x01eb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EC 0x01ec FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01ED 0x01ed FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EE 0x01ee FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EF 0x01ef FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F0 0x01f0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F1 0x01f1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F2 0x01f2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F3 0x01f3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F4 0x01f4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F5 0x01f5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F6 0x01f6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F7 0x01f7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F8 0x01f8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F9 0x01f9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FA 0x01fa FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FB 0x01fb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FC 0x01fc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FD 0x01fd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FE 0x01fe FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FF 0x01ff FTDI compatible adapter /* MCT Corp. */ product MCT HUB0100 0x0100 Hub product MCT DU_H3SP_USB232 0x0200 D-Link DU-H3SP USB BAY Hub product MCT USB232 0x0210 USB-232 Interface product MCT SITECOM_USB232 0x0230 Sitecom USB-232 Products /* Medeli */ product MEDELI DD305 0x5011 DD305 Digital Drum Set /* MediaTek, Inc. */ product MEDIATEK MTK3329 0x3329 MTK II GPS Receiver /* Meizu Electronics */ product MEIZU M6_SL 0x0140 MiniPlayer M6 (SL) /* Melco, Inc products */ product MELCO LUATX1 0x0001 LUA-TX Ethernet product MELCO LUATX5 0x0005 LUA-TX Ethernet product MELCO LUA2TX5 0x0009 LUA2-TX Ethernet product MELCO LUAKTX 0x0012 LUA-KTX Ethernet product MELCO DUBPXXG 0x001c DUB-PxxG product MELCO LUAU2KTX 0x003d LUA-U2-KTX Ethernet product MELCO KG54YB 0x005e WLI-U2-KG54-YB WLAN product MELCO KG54 0x0066 WLI-U2-KG54 WLAN product MELCO KG54AI 0x0067 WLI-U2-KG54-AI WLAN product MELCO LUA3U2AGT 0x006e LUA3-U2-AGT product MELCO NINWIFI 0x008b Nintendo Wi-Fi product MELCO PCOPRS1 0x00b3 PC-OP-RS1 RemoteStation product MELCO SG54HP 0x00d8 WLI-U2-SG54HP product MELCO G54HP 0x00d9 WLI-U2-G54HP product MELCO KG54L 0x00da WLI-U2-KG54L product MELCO WLIUCG300N 0x00e8 WLI-UC-G300N product MELCO SG54HG 0x00f4 WLI-U2-SG54HG product MELCO WLRUCG 0x0116 WLR-UC-G product MELCO WLRUCGAOSS 0x0119 WLR-UC-G-AOSS product MELCO WLIUCAG300N 0x012e WLI-UC-AG300N product MELCO WLIUCG 0x0137 WLI-UC-G product MELCO WLIUCG300HP 0x0148 WLI-UC-G300HP product MELCO RT2870_2 0x0150 RT2870 product MELCO WLIUCGN 0x015d WLI-UC-GN product MELCO WLIUCG301N 0x016f WLI-UC-G301N product MELCO WLIUCGNM 0x01a2 WLI-UC-GNM product MELCO WLIUCG300HPV1 0x01a8 WLI-UC-G300HP-V1 product MELCO WLIUCGNM2 0x01ee WLI-UC-GNM2 product MELCO WIU2433DM 0x0242 WI-U2-433DM product MELCO WIU3866D 0x025d WI-U3-866D /* Merlin products */ product MERLIN V620 0x1110 Merlin V620 /* MetaGeek products */ product METAGEEK TELLSTICK 0x0c30 FTDI compatible adapter product METAGEEK WISPY1B 0x083e MetaGeek Wi-Spy product METAGEEK WISPY24X 0x083f MetaGeek Wi-Spy 2.4x product METAGEEK2 WISPYDBX 0x5000 MetaGeek Wi-Spy DBx /* Metricom products */ product METRICOM RICOCHET_GS 0x0001 Ricochet GS /* MGE UPS Systems */ product MGE UPS1 0x0001 MGE UPS SYSTEMS PROTECTIONCENTER 1 product MGE UPS2 0xffff MGE UPS SYSTEMS PROTECTIONCENTER 2 /* MEI products */ product MEI CASHFLOW_SC 0x1100 Cashflow-SC Cash Acceptor product MEI S2000 0x1101 Series 2000 Combo Acceptor /* Microdia / Sonix Techonology Co., Ltd. products */ product CHICONY2 YUREX 0x1010 YUREX product CHICONY2 CAM_1 0x62c0 CAM_1 product CHICONY2 TEMPER 0x7401 TEMPer sensor /* Micro Star International products */ product MSI BT_DONGLE 0x1967 Bluetooth USB dongle product MSI RT3070_1 0x3820 RT3070 product MSI RT3070_2 0x3821 RT3070 product MSI RT3070_8 0x3822 RT3070 product MSI RT3070_3 0x3870 RT3070 product MSI RT3070_9 0x3871 RT3070 product MSI UB11B 0x6823 UB11B product MSI RT2570 0x6861 RT2570 product MSI RT2570_2 0x6865 RT2570 product MSI RT2570_3 0x6869 RT2570 product MSI RT2573_1 0x6874 RT2573 product MSI RT2573_2 0x6877 RT2573 product MSI RT3070_4 0x6899 RT3070 product MSI RT3070_5 0x821a RT3070 product MSI RT3070_10 0x822a RT3070 product MSI RT3070_6 0x870a RT3070 product MSI RT3070_11 0x871a RT3070 product MSI RT3070_7 0x899a RT3070 product MSI RT2573_3 0xa861 RT2573 product MSI RT2573_4 0xa874 RT2573 /* Micron products */ product MICRON REALSSD 0x0655 Real SSD eUSB /* Microsoft products */ product MICROSOFT SIDEPREC 0x0008 SideWinder Precision Pro product MICROSOFT INTELLIMOUSE 0x0009 IntelliMouse product MICROSOFT NATURALKBD 0x000b Natural Keyboard Elite product MICROSOFT DDS80 0x0014 Digital Sound System 80 product MICROSOFT SIDEWINDER 0x001a Sidewinder Precision Racing Wheel product MICROSOFT INETPRO 0x001c Internet Keyboard Pro product MICROSOFT TBEXPLORER 0x0024 Trackball Explorer product MICROSOFT INTELLIEYE 0x0025 IntelliEye mouse product MICROSOFT INETPRO2 0x002b Internet Keyboard Pro product MICROSOFT INTELLIMOUSE5 0x0039 IntelliMouse 1.1 5-Button Mouse product MICROSOFT WHEELMOUSE 0x0040 Wheel Mouse Optical product MICROSOFT MN510 0x006e MN510 Wireless product MICROSOFT 700WX 0x0079 Palm 700WX product MICROSOFT MN110 0x007a 10/100 USB NIC product MICROSOFT WLINTELLIMOUSE 0x008c Wireless Optical IntelliMouse product MICROSOFT WLNOTEBOOK 0x00b9 Wireless Optical Mouse (Model 1023) product MICROSOFT COMFORT3000 0x00d1 Comfort Optical Mouse 3000 (Model 1043) product MICROSOFT WLNOTEBOOK3 0x00d2 Wireless Optical Mouse 3000 (Model 1049) product MICROSOFT NATURAL4000 0x00db Natural Ergonomic Keyboard 4000 product MICROSOFT WLNOTEBOOK2 0x00e1 Wireless Optical Mouse 3000 (Model 1056) product MICROSOFT XBOX360 0x0292 XBOX 360 WLAN /* Microtech products */ product MICROTECH SCSIDB25 0x0004 USB-SCSI-DB25 product MICROTECH SCSIHD50 0x0005 USB-SCSI-HD50 product MICROTECH DPCM 0x0006 USB CameraMate product MICROTECH FREECOM 0xfc01 Freecom USB-IDE /* Microtek products */ product MICROTEK 336CX 0x0094 Phantom 336CX - C3 scanner product MICROTEK X6U 0x0099 ScanMaker X6 - X6U product MICROTEK C6 0x009a Phantom C6 scanner product MICROTEK 336CX2 0x00a0 Phantom 336CX - C3 scanner product MICROTEK V6USL 0x00a3 ScanMaker V6USL product MICROTEK V6USL2 0x80a3 ScanMaker V6USL product MICROTEK V6UL 0x80ac ScanMaker V6UL /* Microtune, Inc. products */ product MICROTUNE BT_DONGLE 0x1000 Bluetooth USB dongle /* Midiman products */ product MAUDIO MIDISPORT2X2 0x1001 Midisport 2x2 product MAUDIO FASTTRACKULTRA 0x2080 Fast Track Ultra product MAUDIO FASTTRACKULTRA8R 0x2081 Fast Track Ultra 8R /* MindsAtWork products */ product MINDSATWORK WALLET 0x0001 Digital Wallet /* Minolta Co., Ltd. */ product MINOLTA 2300 0x4001 Dimage 2300 product MINOLTA S304 0x4007 Dimage S304 product MINOLTA X 0x4009 Dimage X product MINOLTA 5400 0x400e Dimage 5400 product MINOLTA F300 0x4011 Dimage F300 product MINOLTA E223 0x4017 Dimage E223 /* Mitsumi products */ product MITSUMI CDRRW 0x0000 CD-R/RW Drive product MITSUMI BT_DONGLE 0x641f Bluetooth USB dongle product MITSUMI FDD 0x6901 USB FDD /* Mobile Action products */ product MOBILEACTION MA620 0x0620 MA-620 Infrared Adapter /* Mobility products */ product MOBILITY USB_SERIAL 0x0202 FTDI compatible adapter product MOBILITY EA 0x0204 Ethernet product MOBILITY EASIDOCK 0x0304 EasiDock Ethernet /* MosChip products */ product MOSCHIP MCS7703 0x7703 MCS7703 Serial Port Adapter product MOSCHIP MCS7730 0x7730 MCS7730 Ethernet product MOSCHIP MCS7820 0x7820 MCS7820 Serial Port Adapter product MOSCHIP MCS7830 0x7830 MCS7830 Ethernet product MOSCHIP MCS7832 0x7832 MCS7832 Ethernet product MOSCHIP MCS7840 0x7840 MCS7840 Serial Port Adapter /* Motorola products */ product MOTOROLA MC141555 0x1555 MC141555 hub controller product MOTOROLA SB4100 0x4100 SB4100 USB Cable Modem product MOTOROLA2 T720C 0x2822 T720c product MOTOROLA2 A41XV32X 0x2a22 A41x/V32x Mobile Phones product MOTOROLA2 E398 0x4810 E398 Mobile Phone product MOTOROLA2 USBLAN 0x600c USBLAN product MOTOROLA2 USBLAN2 0x6027 USBLAN product MOTOROLA2 MB886 0x710f MB886 Mobile Phone (Atria HD) product MOTOROLA4 RT2770 0x9031 RT2770 product MOTOROLA4 RT3070 0x9032 RT3070 /* Moxa */ product MOXA MXU1_1110 0x1110 Moxa Uport 1110 product MOXA MXU1_1130 0x1130 Moxa Uport 1130 product MOXA MXU1_1131 0x1131 Moxa Uport 1131 product MOXA MXU1_1150 0x1150 Moxa Uport 1150 product MOXA MXU1_1151 0x1151 Moxa Uport 1151 /* MpMan products */ product MPMAN MPF400_2 0x25a8 MPF400 Music Player 2Go product MPMAN MPF400_1 0x36d0 MPF400 Music Player 1Go /* MultiTech products */ product MULTITECH MT9234ZBA_2 0x0319 MT9234ZBA USB modem (alt) product MULTITECH ATLAS 0xf101 MT5634ZBA-USB modem product MULTITECH GSM 0xf108 GSM USB Modem product MULTITECH CDMA 0xf109 CDMA USB Modem product MULTITECH CDMA_FW 0xf110 CDMA USB Modem firmware running product MULTITECH GSM_FW 0xf111 GSM USB Modem firmware running product MULTITECH EDGE 0xf112 Edge USB Modem product MULTITECH MT9234MU 0xf114 MT9234 MU product MULTITECH MT9234ZBA 0xf115 MT9234 ZBA /* Mustek products */ product MUSTEK 1200CU 0x0001 1200 CU scanner product MUSTEK 600CU 0x0002 600 CU scanner product MUSTEK 1200USB 0x0003 1200 USB scanner product MUSTEK 1200UB 0x0006 1200 UB scanner product MUSTEK 1200USBPLUS 0x0007 1200 USB Plus scanner product MUSTEK 1200CUPLUS 0x0008 1200 CU Plus scanner product MUSTEK BEARPAW1200F 0x0010 BearPaw 1200F scanner product MUSTEK BEARPAW2400TA 0x0218 BearPaw 2400TA scanner product MUSTEK BEARPAW1200TA 0x021e BearPaw 1200TA scanner product MUSTEK 600USB 0x0873 600 USB scanner product MUSTEK MDC800 0xa800 MDC-800 digital camera /* M-Systems products */ product MSYSTEMS DISKONKEY 0x0010 DiskOnKey product MSYSTEMS DISKONKEY2 0x0011 DiskOnKey /* Myson products */ product MYSON HEDEN_8813 0x8813 USB-IDE product MYSON HEDEN 0x8818 USB-IDE product MYSON HUBREADER 0x8819 COMBO Card reader with USB HUB product MYSON STARREADER 0x9920 USB flash card adapter /* National Semiconductor */ product NATIONAL BEARPAW1200 0x1000 BearPaw 1200 product NATIONAL BEARPAW2400 0x1001 BearPaw 2400 /* NEC products */ product NEC HUB_0050 0x0050 USB 2.0 7-Port Hub product NEC HUB_005A 0x005a USB 2.0 4-Port Hub product NEC WL300NUG 0x0249 WL300NU-G product NEC WL900U 0x0408 Aterm WL900U product NEC HUB 0x55aa hub product NEC HUB_B 0x55ab hub /* NEODIO products */ product NEODIO ND3260 0x3260 8-in-1 Multi-format Flash Controller product NEODIO ND5010 0x5010 Multi-format Flash Controller /* Neotel products */ product NEOTEL PRIME 0x4000 Prime USB modem /* Netac products */ product NETAC CF_CARD 0x1060 USB-CF-Card product NETAC ONLYDISK 0x0003 OnlyDisk /* NetChip Technology Products */ product NETCHIP TURBOCONNECT 0x1080 Turbo-Connect product NETCHIP CLIK_40 0xa140 USB Clik! 40 product NETCHIP GADGETZERO 0xa4a0 Linux Gadget Zero product NETCHIP ETHERNETGADGET 0xa4a2 Linux Ethernet/RNDIS gadget on pxa210/25x/26x product NETCHIP POCKETBOOK 0xa4a5 PocketBook /* Netgear products */ product NETGEAR EA101 0x1001 Ethernet product NETGEAR EA101X 0x1002 Ethernet product NETGEAR FA101 0x1020 Ethernet 10/100, USB1.1 product NETGEAR FA120 0x1040 USB 2.0 Ethernet product NETGEAR M4100 0x1100 M4100/M5300/M7100 series switch product NETGEAR WG111V1_2 0x4240 PrismGT USB 2.0 WLAN product NETGEAR WG111V3 0x4260 WG111v3 product NETGEAR WG111U 0x4300 WG111U product NETGEAR WG111U_NF 0x4301 WG111U (no firmware) product NETGEAR WG111V2 0x6a00 WG111V2 product NETGEAR WN111V2 0x9001 WN111V2 product NETGEAR WNDA3100 0x9010 WNDA3100 product NETGEAR WNDA4100 0x9012 WNDA4100 product NETGEAR WNDA3200 0x9018 WNDA3200 product NETGEAR RTL8192CU 0x9021 RTL8192CU product NETGEAR WNA1000 0x9040 WNA1000 product NETGEAR WNA1000M 0x9041 WNA1000M product NETGEAR A6100 0x9052 A6100 product NETGEAR2 MA101 0x4100 MA101 product NETGEAR2 MA101B 0x4102 MA101 Rev B product NETGEAR3 WG111T 0x4250 WG111T product NETGEAR3 WG111T_NF 0x4251 WG111T (no firmware) product NETGEAR3 WPN111 0x5f00 WPN111 product NETGEAR3 WPN111_NF 0x5f01 WPN111 (no firmware) product NETGEAR3 WPN111_2 0x5f02 WPN111 product NETGEAR4 RTL8188CU 0x9041 RTL8188CU /* NetIndex products */ product NETINDEX WS002IN 0x2001 Willcom WS002IN /* NEWlink */ product NEWLINK USB2IDEBRIDGE 0x00ff USB 2.0 Hard Drive Enclosure /* Nikon products */ product NIKON E990 0x0102 Digital Camera E990 product NIKON LS40 0x4000 CoolScan LS40 ED product NIKON D300 0x041a Digital Camera D300 /* NovaTech Products */ product NOVATECH NV902 0x9020 NovaTech NV-902W product NOVATECH RT2573 0x9021 RT2573 product NOVATECH RTL8188CU 0x9071 RTL8188CU /* Nokia products */ product NOKIA N958GB 0x0070 Nokia N95 8GBc product NOKIA2 CA42 0x1234 CA-42 cable /* Novatel Wireless products */ product NOVATEL V640 0x1100 Merlin V620 product NOVATEL CDMA_MODEM 0x1110 Novatel Wireless Merlin CDMA product NOVATEL V620 0x1110 Merlin V620 product NOVATEL V740 0x1120 Merlin V740 product NOVATEL V720 0x1130 Merlin V720 product NOVATEL U740 0x1400 Merlin U740 product NOVATEL U740_2 0x1410 Merlin U740 product NOVATEL U870 0x1420 Merlin U870 product NOVATEL XU870 0x1430 Merlin XU870 product NOVATEL X950D 0x1450 Merlin X950D product NOVATEL ES620 0x2100 Expedite ES620 product NOVATEL E725 0x2120 Expedite E725 product NOVATEL ES620_2 0x2130 Expedite ES620 product NOVATEL ES620 0x2100 ES620 CDMA product NOVATEL U720 0x2110 Merlin U720 product NOVATEL EU730 0x2400 Expedite EU730 product NOVATEL EU740 0x2410 Expedite EU740 product NOVATEL EU870D 0x2420 Expedite EU870D product NOVATEL U727 0x4100 Merlin U727 CDMA product NOVATEL MC950D 0x4400 Novatel MC950D HSUPA product NOVATEL MC990D 0x7001 Novatel MC990D product NOVATEL ZEROCD 0x5010 Novatel ZeroCD product NOVATEL MIFI2200V 0x5020 Novatel MiFi 2200 CDMA Virgin Mobile product NOVATEL ZEROCD2 0x5030 Novatel ZeroCD product NOVATEL MIFI2200 0x5041 Novatel MiFi 2200 CDMA product NOVATEL U727_2 0x5100 Merlin U727 CDMA product NOVATEL U760 0x6000 Novatel U760 product NOVATEL MC760 0x6002 Novatel MC760 product NOVATEL MC547 0x7042 Novatel MC547 product NOVATEL MC679 0x7031 Novatel MC679 product NOVATEL2 FLEXPACKGPS 0x0100 NovAtel FlexPack GPS receiver /* Merlin products */ product MERLIN V620 0x1110 Merlin V620 /* O2Micro products */ product O2MICRO OZ776_HUB 0x7761 OZ776 hub product O2MICRO OZ776_CCID_SC 0x7772 OZ776 CCID SC Reader /* Olimex products */ product OLIMEX ARM_USB_OCD 0x0003 FTDI compatible adapter product OLIMEX ARM_USB_OCD_H 0x002b FTDI compatible adapter /* Olympus products */ product OLYMPUS C1 0x0102 C-1 Digital Camera product OLYMPUS C700 0x0105 C-700 Ultra Zoom /* OmniVision Technologies, Inc. products */ product OMNIVISION OV511 0x0511 OV511 Camera product OMNIVISION OV511PLUS 0xa511 OV511+ Camera /* OnSpec Electronic, Inc. */ product ONSPEC SDS_HOTFIND_D 0x0400 SDS-infrared.com Hotfind-D Infrared Camera product ONSPEC MDCFE_B_CF_READER 0xa000 MDCFE-B USB CF Reader product ONSPEC CFMS_RW 0xa001 SIIG/Datafab Memory Stick+CF Reader/Writer product ONSPEC READER 0xa003 Datafab-based Reader product ONSPEC CFSM_READER 0xa005 PNY/Datafab CF+SM Reader product ONSPEC CFSM_READER2 0xa006 Simple Tech/Datafab CF+SM Reader product ONSPEC MDSM_B_READER 0xa103 MDSM-B reader product ONSPEC CFSM_COMBO 0xa109 USB to CF + SM Combo (LC1) product ONSPEC UCF100 0xa400 FlashLink UCF-100 CompactFlash Reader product ONSPEC2 IMAGEMATE_SDDR55 0xa103 ImageMate SDDR55 /* Option products */ product OPTION VODAFONEMC3G 0x5000 Vodafone Mobile Connect 3G datacard product OPTION GT3G 0x6000 GlobeTrotter 3G datacard product OPTION GT3GQUAD 0x6300 GlobeTrotter 3G QUAD datacard product OPTION GT3GPLUS 0x6600 GlobeTrotter 3G+ datacard product OPTION GTICON322 0xd033 GlobeTrotter Icon322 storage product OPTION GTMAX36 0x6701 GlobeTrotter Max 3.6 Modem product OPTION GTMAX72 0x6711 GlobeTrotter Max 7.2 HSDPA product OPTION GTHSDPA 0x6971 GlobeTrotter HSDPA product OPTION GTMAXHSUPA 0x7001 GlobeTrotter HSUPA product OPTION GTMAXHSUPAE 0x6901 GlobeTrotter HSUPA PCIe product OPTION GTMAX380HSUPAE 0x7211 GlobeTrotter 380HSUPA PCIe product OPTION GT3G_1 0x6050 3G modem product OPTION GT3G_2 0x6100 3G modem product OPTION GT3G_3 0x6150 3G modem product OPTION GT3G_4 0x6200 3G modem product OPTION GT3G_5 0x6250 3G modem product OPTION GT3G_6 0x6350 3G modem product OPTION E6500 0x6500 3G modem product OPTION E6501 0x6501 3G modem product OPTION E6601 0x6601 3G modem product OPTION E6721 0x6721 3G modem product OPTION E6741 0x6741 3G modem product OPTION E6761 0x6761 3G modem product OPTION E6800 0x6800 3G modem product OPTION E7021 0x7021 3G modem product OPTION E7041 0x7041 3G modem product OPTION E7061 0x7061 3G modem product OPTION E7100 0x7100 3G modem product OPTION GTM380 0x7201 3G modem product OPTION GE40X 0x7601 Globetrotter HSUPA product OPTION GSICON72 0x6911 GlobeSurfer iCON product OPTION GSICONHSUPA 0x7251 Globetrotter HSUPA product OPTION ICON401 0x7401 GlobeSurfer iCON 401 product OPTION GTHSUPA 0x7011 Globetrotter HSUPA product OPTION GMT382 0x7501 Globetrotter HSUPA product OPTION GE40X_1 0x7301 Globetrotter HSUPA product OPTION GE40X_2 0x7361 Globetrotter HSUPA product OPTION GE40X_3 0x7381 Globetrotter HSUPA product OPTION GTM661W 0x9000 GTM661W product OPTION ICONEDGE 0xc031 GlobeSurfer iCON EDGE product OPTION MODHSXPA 0xd013 Globetrotter HSUPA product OPTION ICON321 0xd031 Globetrotter HSUPA product OPTION ICON505 0xd055 Globetrotter iCON 505 product OPTION ICON452 0x7901 Globetrotter iCON 452 /* Optoelectronics Co., Ltd */ product OPTO BARCODE 0x0001 Barcode Reader product OPTO OPTICONCODE 0x0009 Opticon Code Reader product OPTO BARCODE_1 0xa002 Barcode Reader product OPTO CRD7734 0xc000 USB Cradle CRD-7734-RU product OPTO CRD7734_1 0xc001 USB Cradle CRD-7734-RU /* OvisLink product */ product OVISLINK RT3072 0x3072 RT3072 /* OQO */ product OQO WIFI01 0x0002 model 01 WiFi interface product OQO BT01 0x0003 model 01 Bluetooth interface product OQO ETHER01PLUS 0x7720 model 01+ Ethernet product OQO ETHER01 0x8150 model 01 Ethernet interface /* Ours Technology Inc. */ product OTI DKU5 0x6858 DKU-5 Serial /* Owen.ru products */ product OWEN AC4 0x0004 AC4 USB-RS485 converter /* OWL producs */ product OWL CM_160 0xca05 OWL CM-160 power monitor /* Palm Computing, Inc. product */ product PALM SERIAL 0x0080 USB Serial product PALM M500 0x0001 Palm m500 product PALM M505 0x0002 Palm m505 product PALM M515 0x0003 Palm m515 product PALM I705 0x0020 Palm i705 product PALM TUNGSTEN_Z 0x0031 Palm Tungsten Z product PALM M125 0x0040 Palm m125 product PALM M130 0x0050 Palm m130 product PALM TUNGSTEN_T 0x0060 Palm Tungsten T product PALM ZIRE31 0x0061 Palm Zire 31 product PALM ZIRE 0x0070 Palm Zire /* Panasonic products */ product PANASONIC LS120CAM 0x0901 LS-120 Camera product PANASONIC KXL840AN 0x0d01 CD-R Drive KXL-840AN product PANASONIC KXLRW32AN 0x0d09 CD-R Drive KXL-RW32AN product PANASONIC KXLCB20AN 0x0d0a CD-R Drive KXL-CB20AN product PANASONIC KXLCB35AN 0x0d0e DVD-ROM & CD-R/RW product PANASONIC SDCAAE 0x1b00 MultiMediaCard product PANASONIC TYTP50P6S 0x3900 TY-TP50P6-S 50in Touch Panel /* Papouch products */ product PAPOUCH AD4USB 0x8003 FTDI compatible adapter product PAPOUCH AP485 0x0101 FTDI compatible adapter product PAPOUCH AP485_2 0x0104 FTDI compatible adapter product PAPOUCH DRAK5 0x0700 FTDI compatible adapter product PAPOUCH DRAK6 0x1000 FTDI compatible adapter product PAPOUCH GMSR 0x8005 FTDI compatible adapter product PAPOUCH GMUX 0x8004 FTDI compatible adapter product PAPOUCH IRAMP 0x0500 FTDI compatible adapter product PAPOUCH LEC 0x0300 FTDI compatible adapter product PAPOUCH MU 0x8001 FTDI compatible adapter product PAPOUCH QUIDO10X1 0x0b00 FTDI compatible adapter product PAPOUCH QUIDO2X16 0x0e00 FTDI compatible adapter product PAPOUCH QUIDO2X2 0x0a00 FTDI compatible adapter product PAPOUCH QUIDO30X3 0x0c00 FTDI compatible adapter product PAPOUCH QUIDO3X32 0x0f00 FTDI compatible adapter product PAPOUCH QUIDO4X4 0x0900 FTDI compatible adapter product PAPOUCH QUIDO60X3 0x0d00 FTDI compatible adapter product PAPOUCH QUIDO8X8 0x0800 FTDI compatible adapter product PAPOUCH SB232 0x0301 FTDI compatible adapter product PAPOUCH SB422 0x0102 FTDI compatible adapter product PAPOUCH SB422_2 0x0105 FTDI compatible adapter product PAPOUCH SB485 0x0100 FTDI compatible adapter product PAPOUCH SB485C 0x0107 FTDI compatible adapter product PAPOUCH SB485S 0x0106 FTDI compatible adapter product PAPOUCH SB485_2 0x0103 FTDI compatible adapter product PAPOUCH SIMUKEY 0x8002 FTDI compatible adapter product PAPOUCH TMU 0x0400 FTDI compatible adapter product PAPOUCH UPSUSB 0x8000 FTDI compatible adapter /* PARA Industrial products */ product PARA RT3070 0x8888 RT3070 /* Simtec Electronics products */ product SIMTEC ENTROPYKEY 0x0001 Entropy Key /* Pegatron products */ product PEGATRON RT2870 0x0002 RT2870 product PEGATRON RT3070 0x000c RT3070 product PEGATRON RT3070_2 0x000e RT3070 product PEGATRON RT3070_3 0x0010 RT3070 /* Peracom products */ product PERACOM SERIAL1 0x0001 Serial product PERACOM ENET 0x0002 Ethernet product PERACOM ENET3 0x0003 At Home Ethernet product PERACOM ENET2 0x0005 Ethernet /* Peraso Technologies, Inc products */ product PERASO PRS4001 0x4001 PRS4001 WLAN /* Philips products */ product PHILIPS DSS350 0x0101 DSS 350 Digital Speaker System product PHILIPS DSS 0x0104 DSS XXX Digital Speaker System product PHILIPS HUB 0x0201 hub product PHILIPS PCA646VC 0x0303 PCA646VC PC Camera product PHILIPS PCVC680K 0x0308 PCVC680K Vesta Pro PC Camera product PHILIPS SPC900NC 0x0329 SPC 900NC CCD PC Camera product PHILIPS DSS150 0x0471 DSS 150 Digital Speaker System product PHILIPS ACE1001 0x066a AKTAKOM ACE-1001 cable product PHILIPS SPE3030CC 0x083a USB 2.0 External Disk product PHILIPS SNU5600 0x1236 SNU5600 product PHILIPS UM10016 0x1552 ISP 1581 Hi-Speed USB MPEG2 Encoder Reference Kit product PHILIPS DIVAUSB 0x1801 DIVA USB mp3 player product PHILIPS RT2870 0x200f RT2870 /* Philips Semiconductor products */ product PHILIPSSEMI HUB1122 0x1122 HUB /* Megatec */ product MEGATEC UPS 0x5161 Phoenixtec protocol based UPS /* P.I. Engineering products */ product PIENGINEERING PS2USB 0x020b PS2 to Mac USB Adapter /* Planex Communications products */ product PLANEX GW_US11H 0x14ea GW-US11H WLAN product PLANEX2 RTL8188CUS 0x1201 RTL8188CUS product PLANEX2 GW_US11S 0x3220 GW-US11S WLAN product PLANEX2 GW_US54GXS 0x5303 GW-US54GXS WLAN product PLANEX2 GW_US300 0x5304 GW-US300 product PLANEX2 RTL8188CU_1 0xab2a RTL8188CU product PLANEX2 RTL8188CU_2 0xed17 RTL8188CU product PLANEX2 RTL8188CU_3 0x4902 RTL8188CU product PLANEX2 RTL8188CU_4 0xab2e RTL8188CU product PLANEX2 RTL8192CU 0xab2b RTL8192CU product PLANEX2 GWUS54HP 0xab01 GW-US54HP product PLANEX2 GWUS300MINIS 0xab24 GW-US300MiniS product PLANEX2 RT3070 0xab25 RT3070 product PLANEX2 MZKUE150N 0xab2f MZK-UE150N product PLANEX2 GW900D 0xab30 GW-900D product PLANEX2 GWUS54MINI2 0xab50 GW-US54Mini2 product PLANEX2 GWUS54SG 0xc002 GW-US54SG product PLANEX2 GWUS54GZL 0xc007 GW-US54GZL product PLANEX2 GWUS54GD 0xed01 GW-US54GD product PLANEX2 GWUSMM 0xed02 GW-USMM product PLANEX2 RT2870 0xed06 RT2870 product PLANEX2 GWUSMICRON 0xed14 GW-USMicroN product PLANEX2 GWUSVALUEEZ 0xed17 GW-USValue-EZ product PLANEX3 GWUS54GZ 0xab10 GW-US54GZ product PLANEX3 GU1000T 0xab11 GU-1000T product PLANEX3 GWUS54MINI 0xab13 GW-US54Mini product PLANEX2 GWUSNANO 0xab28 GW-USNano /* Plextor Corp. */ product PLEXTOR 40_12_40U 0x0011 PlexWriter 40/12/40U /* PLX products */ product PLX TESTBOARD 0x9060 test board product PLX CA42 0xac70 CA-42 /* PNY products */ product PNY ATTACHE2 0x0010 USB 2.0 Flash Drive /* PortGear products */ product PORTGEAR EA8 0x0008 Ethernet product PORTGEAR EA9 0x0009 Ethernet /* Portsmith products */ product PORTSMITH EEA 0x3003 Express Ethernet /* Posiflex products */ product POSIFLEX PP7000 0x0300 FTDI compatible adapter /* Primax products */ product PRIMAX G2X300 0x0300 G2-200 scanner product PRIMAX G2E300 0x0301 G2E-300 scanner product PRIMAX G2300 0x0302 G2-300 scanner product PRIMAX G2E3002 0x0303 G2E-300 scanner product PRIMAX 9600 0x0340 Colorado USB 9600 scanner product PRIMAX 600U 0x0341 Colorado 600u scanner product PRIMAX 6200 0x0345 Visioneer 6200 scanner product PRIMAX 19200 0x0360 Colorado USB 19200 scanner product PRIMAX 1200U 0x0361 Colorado 1200u scanner product PRIMAX G600 0x0380 G2-600 scanner product PRIMAX 636I 0x0381 ReadyScan 636i product PRIMAX G2600 0x0382 G2-600 scanner product PRIMAX G2E600 0x0383 G2E-600 scanner product PRIMAX COMFORT 0x4d01 Comfort product PRIMAX MOUSEINABOX 0x4d02 Mouse-in-a-Box product PRIMAX PCGAUMS1 0x4d04 Sony PCGA-UMS1 product PRIMAX HP_RH304AA 0x4d17 HP RH304AA mouse /* Prolific products */ product PROLIFIC PL2301 0x0000 PL2301 Host-Host interface product PROLIFIC PL2302 0x0001 PL2302 Host-Host interface product PROLIFIC MOTOROLA 0x0307 Motorola Cable product PROLIFIC RSAQ2 0x04bb PL2303 Serial (IODATA USB-RSAQ2) product PROLIFIC ALLTRONIX_GPRS 0x0609 Alltronix ACM003U00 modem product PROLIFIC ALDIGA_AL11U 0x0611 AlDiga AL-11U modem product PROLIFIC MICROMAX_610U 0x0612 Micromax 610U product PROLIFIC DCU11 0x1234 DCU-11 Phone Cable product PROLIFIC UIC_MSR206 0x206a UIC MSR206 Card Reader product PROLIFIC PL2303 0x2303 PL2303 Serial (ATEN/IOGEAR UC232A) product PROLIFIC PL2305 0x2305 Parallel printer product PROLIFIC ATAPI4 0x2307 ATAPI-4 Controller product PROLIFIC PL2501 0x2501 PL2501 Host-Host interface product PROLIFIC PL2506 0x2506 PL2506 USB to IDE Bridge product PROLIFIC PL27A1 0x27A1 PL27A1 USB 3.0 Host-Host interface product PROLIFIC HCR331 0x331a HCR331 Hybrid Card Reader product PROLIFIC PHAROS 0xaaa0 Prolific Pharos product PROLIFIC RSAQ3 0xaaa2 PL2303 Serial Adapter (IODATA USB-RSAQ3) product PROLIFIC2 PL2303 0x2303 PL2303 Serial Adapter /* Putercom products */ product PUTERCOM UPA100 0x047e USB-1284 BRIDGE /* Qcom products */ product QCOM RT2573 0x6196 RT2573 product QCOM RT2573_2 0x6229 RT2573 product QCOM RT2573_3 0x6238 RT2573 product QCOM RT2870 0x6259 RT2870 /* QI-hardware */ product QIHARDWARE JTAGSERIAL 0x0713 FTDI compatible adapter /* Qisda products */ product QISDA H21_1 0x4512 3G modem product QISDA H21_2 0x4523 3G modem product QISDA H20_1 0x4515 3G modem product QISDA H20_2 0x4519 3G modem /* Qualcomm products */ product QUALCOMM CDMA_MSM 0x6000 CDMA Technologies MSM phone product QUALCOMM NTT_L02C_MODEM 0x618f NTT DOCOMO L-02C product QUALCOMM NTT_L02C_STORAGE 0x61dd NTT DOCOMO L-02C product QUALCOMM2 MF330 0x6613 MF330 product QUALCOMM2 RWT_FCT 0x3100 RWT FCT-CDMA 2000 1xRTT modem product QUALCOMM2 CDMA_MSM 0x3196 CDMA Technologies MSM modem product QUALCOMM2 AC8700 0x6000 AC8700 product QUALCOMM2 VW110L 0x1000 Vertex Wireless 110L modem product QUALCOMM2 SIM5218 0x9000 SIM5218 product QUALCOMM2 WM620 0x9002 Neoway WM620 product QUALCOMM2 GOBI2000_QDL 0x9204 Qualcomm Gobi 2000 QDL product QUALCOMM2 GOBI2000 0x9205 Qualcomm Gobi 2000 modem product QUALCOMM2 VT80N 0x6500 Venus VT80N product QUALCOMM3 VFAST2 0x9909 Venus Fast2 modem product QUALCOMMINC CDMA_MSM 0x0001 CDMA Technologies MSM modem product QUALCOMMINC E0002 0x0002 3G modem product QUALCOMMINC E0003 0x0003 3G modem product QUALCOMMINC E0004 0x0004 3G modem product QUALCOMMINC E0005 0x0005 3G modem product QUALCOMMINC E0006 0x0006 3G modem product QUALCOMMINC E0007 0x0007 3G modem product QUALCOMMINC E0008 0x0008 3G modem product QUALCOMMINC E0009 0x0009 3G modem product QUALCOMMINC E000A 0x000a 3G modem product QUALCOMMINC E000B 0x000b 3G modem product QUALCOMMINC E000C 0x000c 3G modem product QUALCOMMINC E000D 0x000d 3G modem product QUALCOMMINC E000E 0x000e 3G modem product QUALCOMMINC E000F 0x000f 3G modem product QUALCOMMINC E0010 0x0010 3G modem product QUALCOMMINC E0011 0x0011 3G modem product QUALCOMMINC E0012 0x0012 3G modem product QUALCOMMINC E0013 0x0013 3G modem product QUALCOMMINC E0014 0x0014 3G modem product QUALCOMMINC MF628 0x0015 3G modem product QUALCOMMINC MF633R 0x0016 ZTE WCDMA modem product QUALCOMMINC E0017 0x0017 3G modem product QUALCOMMINC E0018 0x0018 3G modem product QUALCOMMINC E0019 0x0019 3G modem product QUALCOMMINC E0020 0x0020 3G modem product QUALCOMMINC E0021 0x0021 3G modem product QUALCOMMINC E0022 0x0022 3G modem product QUALCOMMINC E0023 0x0023 3G modem product QUALCOMMINC E0024 0x0024 3G modem product QUALCOMMINC E0025 0x0025 3G modem product QUALCOMMINC E0026 0x0026 3G modem product QUALCOMMINC E0027 0x0027 3G modem product QUALCOMMINC E0028 0x0028 3G modem product QUALCOMMINC E0029 0x0029 3G modem product QUALCOMMINC E0030 0x0030 3G modem product QUALCOMMINC MF626 0x0031 3G modem product QUALCOMMINC E0032 0x0032 3G modem product QUALCOMMINC E0033 0x0033 3G modem product QUALCOMMINC E0037 0x0037 3G modem product QUALCOMMINC E0039 0x0039 3G modem product QUALCOMMINC E0042 0x0042 3G modem product QUALCOMMINC E0043 0x0043 3G modem product QUALCOMMINC E0048 0x0048 3G modem product QUALCOMMINC E0049 0x0049 3G modem product QUALCOMMINC E0051 0x0051 3G modem product QUALCOMMINC E0052 0x0052 3G modem product QUALCOMMINC ZTE_STOR2 0x0053 USB ZTE Storage product QUALCOMMINC E0054 0x0054 3G modem product QUALCOMMINC E0055 0x0055 3G modem product QUALCOMMINC E0057 0x0057 3G modem product QUALCOMMINC E0058 0x0058 3G modem product QUALCOMMINC E0059 0x0059 3G modem product QUALCOMMINC E0060 0x0060 3G modem product QUALCOMMINC E0061 0x0061 3G modem product QUALCOMMINC E0062 0x0062 3G modem product QUALCOMMINC E0063 0x0063 3G modem product QUALCOMMINC E0064 0x0064 3G modem product QUALCOMMINC E0066 0x0066 3G modem product QUALCOMMINC E0069 0x0069 3G modem product QUALCOMMINC E0070 0x0070 3G modem product QUALCOMMINC E0073 0x0073 3G modem product QUALCOMMINC E0076 0x0076 3G modem product QUALCOMMINC E0078 0x0078 3G modem product QUALCOMMINC E0082 0x0082 3G modem product QUALCOMMINC E0086 0x0086 3G modem product QUALCOMMINC MF112 0x0103 3G modem product QUALCOMMINC SURFSTICK 0x0117 1&1 Surf Stick product QUALCOMMINC K3772_Z_INIT 0x1179 K3772-Z Initial product QUALCOMMINC K3772_Z 0x1181 K3772-Z product QUALCOMMINC ZTE_MF730M 0x1420 3G modem product QUALCOMMINC MF195E_INIT 0x1514 MF195E initial product QUALCOMMINC MF195E 0x1516 MF195E product QUALCOMMINC ZTE_STOR 0x2000 USB ZTE Storage product QUALCOMMINC E2002 0x2002 3G modem product QUALCOMMINC E2003 0x2003 3G modem product QUALCOMMINC AC682 0xffdd CDMA 1xEVDO USB modem product QUALCOMMINC AC682_INIT 0xffde CDMA 1xEVDO USB modem (initial) product QUALCOMMINC AC8710 0xfff1 3G modem product QUALCOMMINC AC2726 0xfff5 3G modem product QUALCOMMINC AC8700 0xfffe CDMA 1xEVDO USB modem /* Quanta products */ product QUANTA RW6815_1 0x00ce HP iPAQ rw6815 product QUANTA RT3070 0x0304 RT3070 product QUANTA Q101_STOR 0x1000 USB Q101 Storage product QUANTA Q101 0xea02 HSDPA modem product QUANTA Q111 0xea03 HSDPA modem product QUANTA GLX 0xea04 HSDPA modem product QUANTA GKE 0xea05 HSDPA modem product QUANTA GLE 0xea06 HSDPA modem product QUANTA RW6815R 0xf003 HP iPAQ rw6815 RNDIS /* Quectel products */ product QUECTEL EC25 0x0125 LTE modem /* Quickshot products */ product QUICKSHOT STRIKEPAD 0x6238 USB StrikePad /* Qtronix products */ product QTRONIX 980N 0x2011 Scorpion-980N keyboard /* Radio Shack */ product RADIOSHACK USBCABLE 0x4026 USB to Serial Cable /* Rainbow Technologies products */ product RAINBOW IKEY2000 0x1200 i-Key 2000 /* Ralink Technology products */ product RALINK RT2570 0x1706 RT2500USB Wireless Adapter product RALINK RT2070 0x2070 RT2070 product RALINK RT2570_2 0x2570 RT2500USB Wireless Adapter product RALINK RT2573 0x2573 RT2501USB Wireless Adapter product RALINK RT2671 0x2671 RT2601USB Wireless Adapter product RALINK RT2770 0x2770 RT2770 product RALINK RT2870 0x2870 RT2870 product RALINK RT_STOR 0x2878 USB Storage product RALINK RT3070 0x3070 RT3070 product RALINK RT3071 0x3071 RT3071 product RALINK RT3072 0x3072 RT3072 product RALINK RT3370 0x3370 RT3370 product RALINK RT3572 0x3572 RT3572 product RALINK RT3573 0x3573 RT3573 product RALINK RT5370 0x5370 RT5370 product RALINK RT5572 0x5572 RT5572 product RALINK RT8070 0x8070 RT8070 product RALINK RT2570_3 0x9020 RT2500USB Wireless Adapter product RALINK RT2573_2 0x9021 RT2501USB Wireless Adapter /* RATOC Systems products */ product RATOC REXUSB60 0xb000 USB serial adapter REX-USB60 product RATOC REXUSB60F 0xb020 USB serial adapter REX-USB60F /* Realtek products */ /* Green House and CompUSA OEM this part */ product REALTEK DUMMY 0x0000 Dummy product product REALTEK USB20CRW 0x0158 USB20CRW Card Reader product REALTEK RTL8188ETV 0x0179 RTL8188ETV product REALTEK RTL8188CTV 0x018a RTL8188CTV product REALTEK RTL8188RU_2 0x317f RTL8188RU product REALTEK USBKR100 0x8150 USBKR100 USB Ethernet product REALTEK RTL8152 0x8152 RTL8152 USB Ethernet product REALTEK RTL8153 0x8153 RTL8153 USB Ethernet product REALTEK RTL8188CE_0 0x8170 RTL8188CE product REALTEK RTL8171 0x8171 RTL8171 product REALTEK RTL8172 0x8172 RTL8172 product REALTEK RTL8173 0x8173 RTL8173 product REALTEK RTL8174 0x8174 RTL8174 product REALTEK RTL8188CU_0 0x8176 RTL8188CU product REALTEK RTL8191CU 0x8177 RTL8191CU product REALTEK RTL8192CU 0x8178 RTL8192CU product REALTEK RTL8188EU 0x8179 RTL8188EU product REALTEK RTL8188CU_1 0x817a RTL8188CU product REALTEK RTL8188CU_2 0x817b RTL8188CU product REALTEK RTL8192CE 0x817c RTL8192CE product REALTEK RTL8188RU_1 0x817d RTL8188RU product REALTEK RTL8188CE_1 0x817e RTL8188CE product REALTEK RTL8188RU_3 0x817f RTL8188RU product REALTEK RTL8187 0x8187 RTL8187 Wireless Adapter product REALTEK RTL8187B_0 0x8189 RTL8187B Wireless Adapter product REALTEK RTL8188CUS 0x818a RTL8188CUS product REALTEK RTL8192EU 0x818b RTL8192EU product REALTEK RTL8188CU_3 0x8191 RTL8188CU product REALTEK RTL8196EU 0x8196 RTL8196EU product REALTEK RTL8187B_1 0x8197 RTL8187B Wireless Adapter product REALTEK RTL8187B_2 0x8198 RTL8187B Wireless Adapter product REALTEK RTL8712 0x8712 RTL8712 product REALTEK RTL8713 0x8713 RTL8713 product REALTEK RTL8188CU_COMBO 0x8754 RTL8188CU product REALTEK RTL8723BU 0xb720 RTL8723BU product REALTEK RTL8192SU 0xc512 RTL8192SU product REALTEK RTL8812AU 0x8812 RTL8812AU Wireless Adapter /* RedOctane products */ product REDOCTANE DUMMY 0x0000 Dummy product product REDOCTANE GHMIDI 0x474b GH MIDI INTERFACE /* Renesas products */ product RENESAS RX610 0x0053 RX610 RX-Stick /* Ricoh products */ product RICOH VGPVCC2 0x1830 VGP-VCC2 Camera product RICOH VGPVCC3 0x1832 VGP-VCC3 Camera product RICOH VGPVCC2_2 0x1833 VGP-VCC2 Camera product RICOH VGPVCC2_3 0x1834 VGP-VCC2 Camera product RICOH VGPVCC7 0x183a VGP-VCC7 Camera product RICOH VGPVCC8 0x183b VGP-VCC8 Camera /* Reiner-SCT products */ product REINERSCT CYBERJACK_ECOM 0x0100 e-com cyberJack /* Roland products */ product ROLAND UA100 0x0000 UA-100 Audio I/F product ROLAND UM4 0x0002 UM-4 MIDI I/F product ROLAND SC8850 0x0003 SC-8850 MIDI Synth product ROLAND U8 0x0004 U-8 Audio I/F product ROLAND UM2 0x0005 UM-2 MIDI I/F product ROLAND SC8820 0x0007 SC-8820 MIDI Synth product ROLAND PC300 0x0008 PC-300 MIDI Keyboard product ROLAND UM1 0x0009 UM-1 MIDI I/F product ROLAND SK500 0x000b SK-500 MIDI Keyboard product ROLAND SCD70 0x000c SC-D70 MIDI Synth product ROLAND UM880N 0x0014 EDIROL UM-880 MIDI I/F (native) product ROLAND UM880G 0x0015 EDIROL UM-880 MIDI I/F (generic) product ROLAND SD90 0x0016 SD-90 MIDI Synth product ROLAND UM550 0x0023 UM-550 MIDI I/F product ROLAND SD20 0x0027 SD-20 MIDI Synth product ROLAND SD80 0x0029 SD-80 MIDI Synth product ROLAND UA700 0x002b UA-700 Audio I/F product ROLAND PCR300 0x0033 EDIROL PCR-300 MIDI I/F /* Rockfire products */ product ROCKFIRE GAMEPAD 0x2033 gamepad 203USB /* RATOC Systems products */ product RATOC REXUSB60 0xb000 REX-USB60 product RATOC REXUSB60F 0xb020 REX-USB60F /* RT system products */ product RTSYSTEMS CT29B 0x9e54 FTDI compatible adapter product RTSYSTEMS SERIAL_VX7 0x9e52 FTDI compatible adapter /* Sagem products */ product SAGEM USBSERIAL 0x0027 USB-Serial Controller product SAGEM XG760A 0x004a XG-760A product SAGEM XG76NA 0x0062 XG-76NA /* Samsung products */ product SAMSUNG WIS09ABGN 0x2018 WIS09ABGN Wireless LAN adapter product SAMSUNG ML6060 0x3008 ML-6060 laser printer product SAMSUNG YP_U2 0x5050 YP-U2 MP3 Player product SAMSUNG YP_U4 0x5092 YP-U4 MP3 Player product SAMSUNG I500 0x6601 I500 Palm USB Phone product SAMSUNG I330 0x8001 I330 phone cradle product SAMSUNG2 RT2870_1 0x2018 RT2870 /* Samsung Techwin products */ product SAMSUNG_TECHWIN DIGIMAX_410 0x000a Digimax 410 /* SanDisk products */ product SANDISK SDDR05A 0x0001 ImageMate SDDR-05a product SANDISK SDDR31 0x0002 ImageMate SDDR-31 product SANDISK SDDR05 0x0005 ImageMate SDDR-05 product SANDISK SDDR12 0x0100 ImageMate SDDR-12 product SANDISK SDDR09 0x0200 ImageMate SDDR-09 product SANDISK SDDR75 0x0810 ImageMate SDDR-75 product SANDISK SDCZ2_128 0x7100 Cruzer Mini 128MB product SANDISK SDCZ2_256 0x7104 Cruzer Mini 256MB product SANDISK SDCZ4_128 0x7112 Cruzer Micro 128MB product SANDISK SDCZ4_256 0x7113 Cruzer Micro 256MB product SANDISK IMAGEMATE_SDDR289 0xb6ba ImageMate SDDR-289 /* Sanwa Electric Instrument Co., Ltd. products */ product SANWA KB_USB2 0x0701 KB-USB2 multimeter cable /* Sanyo Electric products */ product SANYO SCP4900 0x0701 Sanyo SCP-4900 USB Phone /* ScanLogic products */ product SCANLOGIC SL11R 0x0002 SL11R IDE Adapter product SCANLOGIC 336CX 0x0300 Phantom 336CX - C3 scanner /* Schweitzer Engineering Laboratories products */ product SEL C662 0x0001 C662 Cable /* Sealevel products */ product SEALEVEL 2101 0x2101 FTDI compatible adapter product SEALEVEL 2102 0x2102 FTDI compatible adapter product SEALEVEL 2103 0x2103 FTDI compatible adapter product SEALEVEL 2104 0x2104 FTDI compatible adapter product SEALEVEL 2106 0x9020 FTDI compatible adapter product SEALEVEL 2201_1 0x2211 FTDI compatible adapter product SEALEVEL 2201_2 0x2221 FTDI compatible adapter product SEALEVEL 2202_1 0x2212 FTDI compatible adapter product SEALEVEL 2202_2 0x2222 FTDI compatible adapter product SEALEVEL 2203_1 0x2213 FTDI compatible adapter product SEALEVEL 2203_2 0x2223 FTDI compatible adapter product SEALEVEL 2401_1 0x2411 FTDI compatible adapter product SEALEVEL 2401_2 0x2421 FTDI compatible adapter product SEALEVEL 2401_3 0x2431 FTDI compatible adapter product SEALEVEL 2401_4 0x2441 FTDI compatible adapter product SEALEVEL 2402_1 0x2412 FTDI compatible adapter product SEALEVEL 2402_2 0x2422 FTDI compatible adapter product SEALEVEL 2402_3 0x2432 FTDI compatible adapter product SEALEVEL 2402_4 0x2442 FTDI compatible adapter product SEALEVEL 2403_1 0x2413 FTDI compatible adapter product SEALEVEL 2403_2 0x2423 FTDI compatible adapter product SEALEVEL 2403_3 0x2433 FTDI compatible adapter product SEALEVEL 2403_4 0x2443 FTDI compatible adapter product SEALEVEL 2801_1 0x2811 FTDI compatible adapter product SEALEVEL 2801_2 0x2821 FTDI compatible adapter product SEALEVEL 2801_3 0x2831 FTDI compatible adapter product SEALEVEL 2801_4 0x2841 FTDI compatible adapter product SEALEVEL 2801_5 0x2851 FTDI compatible adapter product SEALEVEL 2801_6 0x2861 FTDI compatible adapter product SEALEVEL 2801_7 0x2871 FTDI compatible adapter product SEALEVEL 2801_8 0x2881 FTDI compatible adapter product SEALEVEL 2802_1 0x2812 FTDI compatible adapter product SEALEVEL 2802_2 0x2822 FTDI compatible adapter product SEALEVEL 2802_3 0x2832 FTDI compatible adapter product SEALEVEL 2802_4 0x2842 FTDI compatible adapter product SEALEVEL 2802_5 0x2852 FTDI compatible adapter product SEALEVEL 2802_6 0x2862 FTDI compatible adapter product SEALEVEL 2802_7 0x2872 FTDI compatible adapter product SEALEVEL 2802_8 0x2882 FTDI compatible adapter product SEALEVEL 2803_1 0x2813 FTDI compatible adapter product SEALEVEL 2803_2 0x2823 FTDI compatible adapter product SEALEVEL 2803_3 0x2833 FTDI compatible adapter product SEALEVEL 2803_4 0x2843 FTDI compatible adapter product SEALEVEL 2803_5 0x2853 FTDI compatible adapter product SEALEVEL 2803_6 0x2863 FTDI compatible adapter product SEALEVEL 2803_7 0x2873 FTDI compatible adapter product SEALEVEL 2803_8 0x2883 FTDI compatible adapter /* Senao products */ product SENAO EUB1200AC 0x0100 EnGenius EUB1200AC product SENAO RT2870_3 0x0605 RT2870 product SENAO RT2870_4 0x0615 RT2870 product SENAO NUB8301 0x2000 NUB-8301 product SENAO RT2870_1 0x9701 RT2870 product SENAO RT2870_2 0x9702 RT2870 product SENAO RT3070 0x9703 RT3070 product SENAO RT3071 0x9705 RT3071 product SENAO RT3072_1 0x9706 RT3072 product SENAO RT3072_2 0x9707 RT3072 product SENAO RT3072_3 0x9708 RT3072 product SENAO RT3072_4 0x9709 RT3072 product SENAO RT3072_5 0x9801 RT3072 product SENAO RTL8192SU_1 0x9603 RTL8192SU product SENAO RTL8192SU_2 0x9605 RTL8192SU /* ShanTou products */ product SHANTOU ST268 0x0268 ST268 product SHANTOU DM9601 0x9601 DM 9601 product SHANTOU ADM8515 0x8515 ADM8515 /* Shark products */ product SHARK PA 0x0400 Pocket Adapter /* Sharp products */ product SHARP SL5500 0x8004 Zaurus SL-5500 PDA product SHARP SLA300 0x8005 Zaurus SL-A300 PDA product SHARP SL5600 0x8006 Zaurus SL-5600 PDA product SHARP SLC700 0x8007 Zaurus SL-C700 PDA product SHARP SLC750 0x9031 Zaurus SL-C750 PDA product SHARP WZERO3ES 0x9123 W-ZERO3 ES Smartphone product SHARP WZERO3ADES 0x91ac Advanced W-ZERO3 ES Smartphone product SHARP WILLCOM03 0x9242 WILLCOM03 /* Shuttle Technology products */ product SHUTTLE EUSB 0x0001 E-USB Bridge product SHUTTLE EUSCSI 0x0002 eUSCSI Bridge product SHUTTLE SDDR09 0x0003 ImageMate SDDR09 product SHUTTLE EUSBCFSM 0x0005 eUSB SmartMedia / CompactFlash Adapter product SHUTTLE ZIOMMC 0x0006 eUSB MultiMediaCard Adapter product SHUTTLE HIFD 0x0007 Sony Hifd product SHUTTLE EUSBATAPI 0x0009 eUSB ATA/ATAPI Adapter product SHUTTLE CF 0x000a eUSB CompactFlash Adapter product SHUTTLE EUSCSI_B 0x000b eUSCSI Bridge product SHUTTLE EUSCSI_C 0x000c eUSCSI Bridge product SHUTTLE CDRW 0x0101 CD-RW Device product SHUTTLE EUSBORCA 0x0325 eUSB ORCA Quad Reader /* Siemens products */ product SIEMENS SPEEDSTREAM 0x1001 SpeedStream product SIEMENS SPEEDSTREAM22 0x1022 SpeedStream 1022 product SIEMENS2 WLL013 0x001b WLL013 product SIEMENS2 ES75 0x0034 GSM module MC35 product SIEMENS2 WL54G 0x3c06 54g USB Network Adapter product SIEMENS3 SX1 0x0001 SX1 product SIEMENS3 X65 0x0003 X65 product SIEMENS3 X75 0x0004 X75 product SIEMENS3 EF81 0x0005 EF81 /* Sierra Wireless products */ product SIERRA EM5625 0x0017 EM5625 product SIERRA MC5720_2 0x0018 MC5720 product SIERRA MC5725 0x0020 MC5725 product SIERRA AIRCARD580 0x0112 Sierra Wireless AirCard 580 product SIERRA AIRCARD595 0x0019 Sierra Wireless AirCard 595 product SIERRA AC595U 0x0120 Sierra Wireless AirCard 595U product SIERRA AC597E 0x0021 Sierra Wireless AirCard 597E product SIERRA EM5725 0x0022 EM5725 product SIERRA C597 0x0023 Sierra Wireless Compass 597 product SIERRA MC5727 0x0024 MC5727 product SIERRA T598 0x0025 T598 product SIERRA T11 0x0026 T11 product SIERRA AC402 0x0027 AC402 product SIERRA MC5728 0x0028 MC5728 product SIERRA E0029 0x0029 E0029 product SIERRA AIRCARD580 0x0112 Sierra Wireless AirCard 580 product SIERRA AC595U 0x0120 Sierra Wireless AirCard 595U product SIERRA MC5720 0x0218 MC5720 Wireless Modem product SIERRA MINI5725 0x0220 Sierra Wireless miniPCI 5275 product SIERRA MC5727_2 0x0224 MC5727 product SIERRA MC8755_2 0x6802 MC8755 product SIERRA MC8765 0x6803 MC8765 product SIERRA MC8755 0x6804 MC8755 product SIERRA MC8765_2 0x6805 MC8765 product SIERRA MC8755_4 0x6808 MC8755 product SIERRA MC8765_3 0x6809 MC8765 product SIERRA AC875U 0x6812 AC875U HSDPA USB Modem product SIERRA MC8755_3 0x6813 MC8755 HSDPA product SIERRA MC8775_2 0x6815 MC8775 product SIERRA MC8775 0x6816 MC8775 product SIERRA AC875 0x6820 Sierra Wireless AirCard 875 product SIERRA AC875U_2 0x6821 AC875U product SIERRA AC875E 0x6822 AC875E product SIERRA MC8780 0x6832 MC8780 product SIERRA MC8781 0x6833 MC8781 product SIERRA MC8780_2 0x6834 MC8780 product SIERRA MC8781_2 0x6835 MC8781 product SIERRA MC8780_3 0x6838 MC8780 product SIERRA MC8781_3 0x6839 MC8781 product SIERRA MC8785 0x683A MC8785 product SIERRA MC8785_2 0x683B MC8785 product SIERRA MC8790 0x683C MC8790 product SIERRA MC8791 0x683D MC8791 product SIERRA MC8792 0x683E MC8792 product SIERRA AC880 0x6850 Sierra Wireless AirCard 880 product SIERRA AC881 0x6851 Sierra Wireless AirCard 881 product SIERRA AC880E 0x6852 Sierra Wireless AirCard 880E product SIERRA AC881E 0x6853 Sierra Wireless AirCard 881E product SIERRA AC880U 0x6855 Sierra Wireless AirCard 880U product SIERRA AC881U 0x6856 Sierra Wireless AirCard 881U product SIERRA AC885E 0x6859 AC885E product SIERRA AC885E_2 0x685A AC885E product SIERRA AC885U 0x6880 Sierra Wireless AirCard 885U product SIERRA C888 0x6890 C888 product SIERRA C22 0x6891 C22 product SIERRA E6892 0x6892 E6892 product SIERRA E6893 0x6893 E6893 product SIERRA MC8700 0x68A3 MC8700 product SIERRA MC7354 0x68C0 MC7354 product SIERRA MC7355 0x9041 MC7355 product SIERRA MC7430 0x9071 Sierra Wireless MC7430 Qualcomm Snapdragon X7 LTE-A product SIERRA AC313U 0x68aa Sierra Wireless AirCard 313U product SIERRA TRUINSTALL 0x0fff Aircard Tru Installer /* Sigmatel products */ product SIGMATEL WBT_3052 0x4200 WBT-3052 IrDA/USB Bridge product SIGMATEL I_BEAD100 0x8008 i-Bead 100 MP3 Player /* SIIG products */ /* Also: Omnidirectional Control Technology products */ product SIIG DIGIFILMREADER 0x0004 DigiFilm-Combo Reader product SIIG WINTERREADER 0x0330 WINTERREADER Reader product SIIG2 DK201 0x0103 FTDI compatible adapter product SIIG2 USBTOETHER 0x0109 USB TO Ethernet product SIIG2 US2308 0x0421 Serial /* Silicom products */ product SILICOM U2E 0x0001 U2E product SILICOM GPE 0x0002 Psion Gold Port Ethernet /* SI Labs */ product SILABS VSTABI 0x0f91 VStabi Controller product SILABS ARKHAM_DS101_M 0x1101 Arkham DS101 Monitor product SILABS ARKHAM_DS101_A 0x1601 Arkham DS101 Adapter product SILABS BSM7DUSB 0x800a SPORTident BSM7-D USB product SILABS POLOLU 0x803b Pololu Serial product SILABS CYGNAL_DEBUG 0x8044 Cygnal Debug Adapter product SILABS SB_PARAMOUNT_ME 0x8043 Software Bisque Paramount ME product SILABS SAEL 0x8053 SA-EL USB product SILABS GSM2228 0x8054 Enfora GSM2228 USB product SILABS ARGUSISP 0x8066 Argussoft ISP product SILABS IMS_USB_RS422 0x806f IMS USB-RS422 product SILABS CRUMB128 0x807a Crumb128 board product SILABS OPTRIS_MSPRO 0x80c4 Optris MSpro LT Thermometer product SILABS DEGREE 0x80ca Degree Controls Inc product SILABS TRACIENT 0x80dd Tracient RFID product SILABS TRAQMATE 0x80ed Track Systems Traqmate product SILABS SUUNTO 0x80f6 Suunto Sports Instrument product SILABS ARYGON_MIFARE 0x8115 Arygon Mifare RFID reader product SILABS BURNSIDE 0x813d Burnside Telecon Deskmobile product SILABS TAMSMASTER 0x813f Tams Master Easy Control product SILABS WMRBATT 0x814a WMR RIGblaster Plug&Play product SILABS WMRRIGBLASTER 0x814a WMR RIGblaster Plug&Play product SILABS WMRRIGTALK 0x814b WMR RIGtalk RT1 product SILABS B_G_H3000 0x8156 B&G H3000 Data Cable product SILABS HELICOM 0x815e Helicomm IP-Link 1220-DVM product SILABS HAMLINKUSB 0x815f Timewave HamLinkUSB product SILABS AVIT_USB_TTL 0x818b AVIT Research USB-TTL product SILABS MJS_TOSLINK 0x819f MJS USB-TOSLINK product SILABS WAVIT 0x81a6 ThinkOptics WavIt product SILABS MULTIPLEX_RC 0x81a9 Multiplex RC adapter product SILABS MSD_DASHHAWK 0x81ac MSD DashHawk product SILABS INSYS_MODEM 0x81ad INSYS Modem product SILABS LIPOWSKY_JTAG 0x81c8 Lipowsky Baby-JTAG product SILABS LIPOWSKY_LIN 0x81e2 Lipowsky Baby-LIN product SILABS AEROCOMM 0x81e7 Aerocomm Radio product SILABS ZEPHYR_BIO 0x81e8 Zephyr Bioharness product SILABS EMS_C1007 0x81f2 EMS C1007 HF RFID controller product SILABS LIPOWSKY_HARP 0x8218 Lipowsky HARP-1 product SILABS C2_EDGE_MODEM 0x822b Commander 2 EDGE(GSM) Modem product SILABS CYGNAL_GPS 0x826b Cygnal Fasttrax GPS product SILABS TELEGESIS_ETRX2 0x8293 Telegesis ETRX2USB product SILABS PROCYON_AVS 0x82f9 Procyon AVS product SILABS MC35PU 0x8341 MC35pu product SILABS CYGNAL 0x8382 Cygnal product SILABS AMBER_AMB2560 0x83a8 Amber Wireless AMB2560 product SILABS DEKTEK_DTAPLUS 0x83d8 DekTec DTA Plus VHF/UHF Booster product SILABS KYOCERA_GPS 0x8411 Kyocera GPS product SILABS IRZ_SG10 0x8418 IRZ SG-10 GSM/GPRS Modem product SILABS BEI_VCP 0x846e BEI USB Sensor (VCP) product SILABS BALLUFF_RFID 0x8477 Balluff RFID reader product SILABS AC_SERV_IBUS 0x85ea AC-Services IBUS Interface product SILABS AC_SERV_CIS 0x85eb AC-Services CIS-IBUS product SILABS V_PREON32 0x85f8 Virtenio Preon32 product SILABS AC_SERV_CAN 0x8664 AC-Services CAN Interface product SILABS AC_SERV_OBD 0x8665 AC-Services OBD Interface product SILABS MMB_ZIGBEE 0x88a4 MMB Networks ZigBee product SILABS INGENI_ZIGBEE 0x88a5 Planet Innovation Ingeni ZigBee product SILABS HUBZ 0x8a2a HubZ dual ZigBee and Z-Wave product SILABS CP2102 0xea60 SILABS USB UART product SILABS CP210X_2 0xea61 CP210x Serial product SILABS CP210X_3 0xea70 CP210x Serial product SILABS CP210X_4 0xea80 CP210x Serial product SILABS INFINITY_MIC 0xea71 Infinity GPS-MIC-1 Radio Monophone product SILABS USBSCOPE50 0xf001 USBscope50 product SILABS USBWAVE12 0xf002 USBwave12 product SILABS USBPULSE100 0xf003 USBpulse100 product SILABS USBCOUNT50 0xf004 USBcount50 product SILABS2 DCU11CLONE 0xaa26 DCU-11 clone product SILABS3 GPRS_MODEM 0xea61 GPRS Modem product SILABS4 100EU_MODEM 0xea61 GPRS Modem 100EU /* Silicon Portals Inc. */ product SILICONPORTALS YAPPH_NF 0x0200 YAP Phone (no firmware) product SILICONPORTALS YAPPHONE 0x0201 YAP Phone /* Sirius Technologies products */ product SIRIUS ROADSTER 0x0001 NetComm Roadster II 56 USB /* Sitecom products */ product SITECOM LN029 0x182d USB 2.0 Ethernet product SITECOM SERIAL 0x2068 USB to serial cable (v2) product SITECOM2 WL022 0x182d WL-022 /* Sitecom Europe products */ product SITECOMEU RT2870_1 0x0017 RT2870 product SITECOMEU WL168V1 0x000d WL-168 v1 product SITECOMEU LN030 0x0021 MCS7830 product SITECOMEU WL168V4 0x0028 WL-168 v4 product SITECOMEU RT2870_2 0x002b RT2870 product SITECOMEU RT2870_3 0x002c RT2870 product SITECOMEU RT2870_4 0x002d RT2870 product SITECOMEU RT2770 0x0039 RT2770 product SITECOMEU RT3070_2 0x003b RT3070 product SITECOMEU RT3070_3 0x003c RT3070 product SITECOMEU RT3070_4 0x003d RT3070 product SITECOMEU RT3070 0x003e RT3070 product SITECOMEU WL608 0x003f WL-608 product SITECOMEU RT3071 0x0040 RT3071 product SITECOMEU RT3072_1 0x0041 RT3072 product SITECOMEU RT3072_2 0x0042 RT3072 product SITECOMEU WL353 0x0045 WL-353 product SITECOMEU RT3072_3 0x0047 RT3072 product SITECOMEU RT3072_4 0x0048 RT3072 product SITECOMEU RT3072_5 0x004a RT3072 product SITECOMEU WL349V1 0x004b WL-349 v1 product SITECOMEU RT3072_6 0x004d RT3072 product SITECOMEU RTL8188CU_1 0x0052 RTL8188CU product SITECOMEU RTL8188CU_2 0x005c RTL8188CU product SITECOMEU RTL8192CU 0x0061 RTL8192CU product SITECOMEU LN032 0x0072 LN-032 product SITECOMEU WLA7100 0x0074 WLA-7100 product SITECOMEU LN031 0x0056 LN-031 product SITECOMEU LN028 0x061c LN-028 product SITECOMEU WL113 0x9071 WL-113 product SITECOMEU ZD1211B 0x9075 ZD1211B product SITECOMEU WL172 0x90ac WL-172 product SITECOMEU WL113R2 0x9712 WL-113 rev 2 /* Skanhex Technology products */ product SKANHEX MD_7425 0x410a MD 7425 Camera product SKANHEX SX_520Z 0x5200 SX 520z Camera /* Smart Technologies products */ product SMART PL2303 0x2303 Serial adapter /* SmartBridges products */ product SMARTBRIDGES SMARTLINK 0x0001 SmartLink USB Ethernet product SMARTBRIDGES SMARTNIC 0x0003 smartNIC 2 PnP Ethernet /* SMC products */ product SMC 2102USB 0x0100 10Mbps Ethernet product SMC 2202USB 0x0200 10/100 Ethernet product SMC 2206USB 0x0201 EZ Connect USB Ethernet product SMC 2862WG 0xee13 EZ Connect Wireless Adapter product SMC2 2020HUB 0x2020 USB Hub product SMC2 2514HUB 0x2514 USB Hub product SMC3 2662WUSB 0xa002 2662W-AR Wireless product SMC2 LAN7800_ETH 0x7800 USB/Ethernet product SMC2 LAN7801_ETH 0x7801 USB/Ethernet product SMC2 LAN7850_ETH 0x7850 USB/Ethernet product SMC2 LAN9500_ETH 0x9500 USB/Ethernet product SMC2 LAN9505_ETH 0x9505 USB/Ethernet product SMC2 LAN9530_ETH 0x9530 USB/Ethernet product SMC2 LAN9730_ETH 0x9730 USB/Ethernet product SMC2 LAN9500_SAL10 0x9900 USB/Ethernet product SMC2 LAN9505_SAL10 0x9901 USB/Ethernet product SMC2 LAN9500A_SAL10 0x9902 USB/Ethernet product SMC2 LAN9505A_SAL10 0x9903 USB/Ethernet product SMC2 LAN9514_SAL10 0x9904 USB/Ethernet product SMC2 LAN9500A_HAL 0x9905 USB/Ethernet product SMC2 LAN9505A_HAL 0x9906 USB/Ethernet product SMC2 LAN9500_ETH_2 0x9907 USB/Ethernet product SMC2 LAN9500A_ETH_2 0x9908 USB/Ethernet product SMC2 LAN9514_ETH_2 0x9909 USB/Ethernet product SMC2 LAN9500A_ETH 0x9e00 USB/Ethernet product SMC2 LAN9505A_ETH 0x9e01 USB/Ethernet product SMC2 LAN89530_ETH 0x9e08 USB/Ethernet product SMC2 LAN9514_ETH 0xec00 USB/Ethernet /* SOHOware products */ product SOHOWARE NUB100 0x9100 10/100 USB Ethernet product SOHOWARE NUB110 0x9110 10/100 USB Ethernet /* SOLID YEAR products */ product SOLIDYEAR KEYBOARD 0x2101 Solid Year USB keyboard /* SONY products */ product SONY DSC 0x0010 DSC cameras product SONY MS_NW_MS7 0x0025 Memorystick NW-MS7 product SONY PORTABLE_HDD_V2 0x002b Portable USB Harddrive V2 product SONY MSACUS1 0x002d Memorystick MSAC-US1 product SONY HANDYCAM 0x002e Handycam product SONY MSC 0x0032 MSC memory stick slot product SONY CLIE_35 0x0038 Sony Clie v3.5 product SONY MS_PEG_N760C 0x0058 PEG N760c Memorystick product SONY CLIE_40 0x0066 Sony Clie v4.0 product SONY MS_MSC_U03 0x0069 Memorystick MSC-U03 product SONY CLIE_40_MS 0x006d Sony Clie v4.0 Memory Stick slot product SONY CLIE_S360 0x0095 Sony Clie s360 product SONY CLIE_41_MS 0x0099 Sony Clie v4.1 Memory Stick slot product SONY CLIE_41 0x009a Sony Clie v4.1 product SONY CLIE_NX60 0x00da Sony Clie nx60 product SONY CLIE_TH55 0x0144 Sony Clie th55 product SONY CLIE_TJ37 0x0169 Sony Clie tj37 product SONY RF_RECEIVER 0x01db Sony RF mouse/kbd Receiver VGP-WRC1 product SONY QN3 0x0437 Sony QN3 CMD-Jxx phone cable /* Sony Ericsson products */ product SONYERICSSON DCU10 0x0528 DCU-10 Phone Data Cable product SONYERICSSON DATAPILOT 0x2003 Datapilot Phone Cable /* SOURCENEXT products */ product SOURCENEXT KEIKAI8 0x039f KeikaiDenwa 8 product SOURCENEXT KEIKAI8_CHG 0x012e KeikaiDenwa 8 with charger /* SparkLAN products */ product SPARKLAN RT2573 0x0004 RT2573 product SPARKLAN RT2870_1 0x0006 RT2870 product SPARKLAN RT3070 0x0010 RT3070 /* Soundgraph products */ product SOUNDGRAPH IMON_VFD 0x0044 Antec Veris Elite VFD Panel, Knob, and Remote product SOUNDGRAPH SSTONE_LC16 0xffdc Silverstone LC16 VFD Panel, Knob, and Remote /* Speed Dragon Multimedia products */ product SPEEDDRAGON MS3303H 0x110b MS3303H Serial /* Sphairon Access Systems GmbH products */ product SPHAIRON UB801R 0x0110 UB801R /* Stelera Wireless products */ product STELERA ZEROCD 0x1000 Zerocd Installer product STELERA C105 0x1002 Stelera/Bandrish C105 USB product STELERA E1003 0x1003 3G modem product STELERA E1004 0x1004 3G modem product STELERA E1005 0x1005 3G modem product STELERA E1006 0x1006 3G modem product STELERA E1007 0x1007 3G modem product STELERA E1008 0x1008 3G modem product STELERA E1009 0x1009 3G modem product STELERA E100A 0x100a 3G modem product STELERA E100B 0x100b 3G modem product STELERA E100C 0x100c 3G modem product STELERA E100D 0x100d 3G modem product STELERA E100E 0x100e 3G modem product STELERA E100F 0x100f 3G modem product STELERA E1010 0x1010 3G modem product STELERA E1011 0x1011 3G modem product STELERA E1012 0x1012 3G modem /* STMicroelectronics products */ product STMICRO BIOCPU 0x2016 Biometric Coprocessor product STMICRO COMMUNICATOR 0x7554 USB Communicator product STMICRO ST72682 0xfada USB 2.0 Flash drive controller /* STSN products */ product STSN STSN0001 0x0001 Internet Access Device /* SUN Corporation products */ product SUNTAC DS96L 0x0003 SUNTAC U-Cable type D2 product SUNTAC PS64P1 0x0005 SUNTAC U-Cable type P1 product SUNTAC VS10U 0x0009 SUNTAC Slipper U product SUNTAC IS96U 0x000a SUNTAC Ir-Trinity product SUNTAC AS64LX 0x000b SUNTAC U-Cable type A3 product SUNTAC AS144L4 0x0011 SUNTAC U-Cable type A4 /* Sun Microsystems products */ product SUN KEYBOARD_TYPE_6 0x0005 Type 6 USB keyboard product SUN KEYBOARD_TYPE_7 0x00a2 Type 7 USB keyboard /* XXX The above is a North American PC style keyboard possibly */ product SUN MOUSE 0x0100 Type 6 USB mouse product SUN KBD_HUB 0x100e Kbd Hub /* Sunplus Innovation Technology Inc. products */ product SUNPLUS USBMOUSE 0x0007 USB Optical Mouse /* Super Top products */ product SUPERTOP IDE 0x6600 USB-IDE product SUPERTOP FLASHDRIVE 0x121c extrememory Snippy /* Syntech products */ product SYNTECH CPT8001C 0x0001 CPT-8001C Barcode scanner product SYNTECH CYPHERLAB100 0x1000 CipherLab USB Barcode Scanner /* Teclast products */ product TECLAST TLC300 0x3203 USB Media Player /* Testo products */ product TESTO USB_INTERFACE 0x0001 FTDI compatible adapter /* TexTech products */ product TEXTECH DUMMY 0x0000 Dummy product product TEXTECH U2M_1 0x0101 Textech USB MIDI cable product TEXTECH U2M_2 0x1806 Textech USB MIDI cable /* The Mobility Lab products */ product TML USB_SERIAL 0x0064 FTDI compatible adapter /* Thurlby Thandar Instrument products */ product TTI QL355P 0x03e8 FTDI compatible adapter /* Supra products */ product DIAMOND2 SUPRAEXPRESS56K 0x07da Supra Express 56K modem product DIAMOND2 SUPRA2890 0x0b4a SupraMax 2890 56K Modem product DIAMOND2 RIO600USB 0x5001 Rio 600 USB product DIAMOND2 RIO800USB 0x5002 Rio 800 USB /* Surecom Technology products */ product SURECOM EP9001G2A 0x11f2 EP-9001-G rev 2A product SURECOM RT2570 0x11f3 RT2570 product SURECOM RT2573 0x31f3 RT2573 /* Sweex products */ product SWEEX ZD1211 0x1809 ZD1211 product SWEEX2 LW153 0x0153 LW153 product SWEEX2 LW154 0x0154 LW154 product SWEEX2 LW303 0x0302 LW303 product SWEEX2 LW313 0x0313 LW313 /* System TALKS, Inc. */ product SYSTEMTALKS SGCX2UL 0x1920 SGC-X2UL /* Tapwave products */ product TAPWAVE ZODIAC 0x0100 Zodiac /* Taugagreining products */ product TAUGA CAMERAMATE 0x0005 CameraMate (DPCM_USB) /* TCTMobile products */ product TCTMOBILE X060S 0x0000 X060S 3G modem product TCTMOBILE X080S 0xf000 X080S 3G modem /* TDK products */ product TDK UPA9664 0x0115 USB-PDC Adapter UPA9664 product TDK UCA1464 0x0116 USB-cdmaOne Adapter UCA1464 product TDK UHA6400 0x0117 USB-PHS Adapter UHA6400 product TDK UPA6400 0x0118 USB-PHS Adapter UPA6400 product TDK BT_DONGLE 0x0309 Bluetooth USB dongle /* TEAC products */ product TEAC FD05PUB 0x0000 FD-05PUB floppy /* Tekram Technology products */ product TEKRAM QUICKWLAN 0x1630 QuickWLAN product TEKRAM ZD1211_1 0x5630 ZD1211 product TEKRAM ZD1211_2 0x6630 ZD1211 /* Telex Communications products */ product TELEX MIC1 0x0001 Enhanced USB Microphone /* Telit products */ product TELIT UC864E 0x1003 UC864E 3G modem product TELIT UC864G 0x1004 UC864G 3G modem /* Ten X Technology, Inc. */ product TENX UAUDIO0 0xf211 USB audio headset /* ThingM products */ product THINGM BLINK1 0x01ed USB notification light /* Texas Intel products */ product TI UTUSB41 0x1446 UT-USB41 hub product TI TUSB2046 0x2046 TUSB2046 hub product TI USB3410 0x3410 TI USB 3410 Modem product TI USB5052 0x5052 TI USB 5250 Modem product TI FRI2 0x5053 TI Fish River Island II product TI USB5052_EEPROM 0x505a TI USB 5250 Modem w/Eeprom product TI USB5052_FW 0x505f TI USB 5250 Modme w/Firmware running product TI USB5152 0x5152 TI USB 5152 Modem product TI EZ430 0xf430 TI ex430 development tool /* Thrustmaster products */ product THRUST FUSION_PAD 0xa0a3 Fusion Digital Gamepad /* TLayTech products */ product TLAYTECH TEU800 0x1682 TEU800 3G modem /* Topre Corporation products */ product TOPRE HHKB 0x0100 HHKB Professional /* Toshiba Corporation products */ product TOSHIBA POCKETPC_E740 0x0706 PocketPC e740 product TOSHIBA RT3070 0x0a07 RT3070 product TOSHIBA G450 0x0d45 G450 modem product TOSHIBA HSDPA 0x1302 G450 modem product TOSHIBA TRANSMEMORY 0x6545 USB ThumbDrive /* TP-Link products */ product TPLINK T4U 0x0101 Archer T4U product TPLINK WN822NV4 0x0108 TL-WN822N v4 product TPLINK WN823NV2 0x0109 TL-WN823N v2 product TPLINK T4UV2 0x010d Archer T4U ver 2 product TPLINK T4UHV2 0x010e Archer T4UH ver 2 product TPLINK RTL8153 0x0601 RTL8153 USB 10/100/1000 LAN /* Trek Technology products */ product TREK THUMBDRIVE 0x1111 ThumbDrive product TREK MEMKEY 0x8888 IBM USB Memory Key product TREK THUMBDRIVE_8MB 0x9988 ThumbDrive_8MB /* TRENDnet products */ product TRENDNET RTL8192CU 0x624d RTL8192CU product TRENDNET TEW646UBH 0x646b TEW-646UBH product TRENDNET RTL8188CU 0x648b RTL8188CU product TRENDNET TEW805UB 0x805b TEW-805UB /* Tripp-Lite products */ product TRIPPLITE U209 0x2008 Serial /* Trumpion products */ product TRUMPION T33520 0x1001 T33520 USB Flash Card Controller product TRUMPION C3310 0x1100 Comotron C3310 MP3 player product TRUMPION MP3 0x1200 MP3 player /* TwinMOS */ product TWINMOS G240 0xa006 G240 product TWINMOS MDIV 0x1325 Memory Disk IV /* Ubiquam products */ product UBIQUAM UALL 0x3100 CDMA 1xRTT USB Modem (U-100/105/200/300/520) /* Ultima products */ product ULTIMA 1200UBPLUS 0x4002 1200 UB Plus scanner /* UMAX products */ product UMAX ASTRA1236U 0x0002 Astra 1236U Scanner product UMAX ASTRA1220U 0x0010 Astra 1220U Scanner product UMAX ASTRA2000U 0x0030 Astra 2000U Scanner product UMAX ASTRA2100U 0x0130 Astra 2100U Scanner product UMAX ASTRA2200U 0x0230 Astra 2200U Scanner product UMAX ASTRA3400 0x0060 Astra 3400 Scanner /* U-MEDIA Communications products */ product UMEDIA TEW444UBEU 0x3006 TEW-444UB EU product UMEDIA TEW444UBEU_NF 0x3007 TEW-444UB EU (no firmware) product UMEDIA TEW429UB_A 0x300a TEW-429UB_A product UMEDIA TEW429UB 0x300b TEW-429UB product UMEDIA TEW429UBC1 0x300d TEW-429UB C1 product UMEDIA RT2870_1 0x300e RT2870 product UMEDIA ALL0298V2 0x3204 ALL0298 v2 product UMEDIA AR5523_2 0x3205 AR5523 product UMEDIA AR5523_2_NF 0x3206 AR5523 (no firmware) /* Universal Access products */ product UNIACCESS PANACHE 0x0101 Panache Surf USB ISDN Adapter /* Unknown products */ product UNKNOWN4 NF_RIC 0x0001 FTDI compatible adapter /* USI products */ product USI MC60 0x10c5 MC60 Serial /* U.S. Robotics products */ product USR USR5422 0x0118 USR5422 WLAN product USR USR5423 0x0121 USR5423 WLAN /* VIA Technologies products */ product VIA USB2IDEBRIDGE 0x6204 USB 2.0 IDE Bridge /* VIA Labs */ product VIALABS USB30SATABRIDGE 0x0700 USB 3.0 SATA Bridge /* Vaisala products */ product VAISALA CABLE 0x0200 USB Interface cable /* Vertex products */ product VERTEX VW110L 0x0100 Vertex VW110L modem /* VidzMedia products */ product VIDZMEDIA MONSTERTV 0x4fb1 MonsterTV P2H /* Vision products */ product VISION VC6452V002 0x0002 CPiA Camera /* Visioneer products */ product VISIONEER 7600 0x0211 OneTouch 7600 product VISIONEER 5300 0x0221 OneTouch 5300 product VISIONEER 3000 0x0224 Scanport 3000 product VISIONEER 6100 0x0231 OneTouch 6100 product VISIONEER 6200 0x0311 OneTouch 6200 product VISIONEER 8100 0x0321 OneTouch 8100 product VISIONEER 8600 0x0331 OneTouch 8600 /* Vivitar products */ product VIVITAR 35XX 0x0003 Vivicam 35Xx /* VTech products */ product VTECH RT2570 0x3012 RT2570 product VTECH ZD1211B 0x3014 ZD1211B /* Wacom products */ product WACOM CT0405U 0x0000 CT-0405-U Tablet product WACOM GRAPHIRE 0x0010 Graphire product WACOM GRAPHIRE3_4X5 0x0013 Graphire 3 4x5 product WACOM INTUOSA5 0x0021 Intuos A5 product WACOM GD0912U 0x0022 Intuos 9x12 Graphics Tablet /* WAGO Kontakttechnik GmbH products */ product WAGO SERVICECABLE 0x07a6 USB Service Cable 750-923 /* WaveSense products */ product WAVESENSE JAZZ 0xaaaa Jazz blood glucose meter /* WCH products */ product WCH CH341SER 0x5523 CH341/CH340 USB-Serial Bridge product WCH2 DUMMY 0x0000 Dummy product product WCH2 CH341SER_2 0x5523 CH341/CH340 USB-Serial Bridge product WCH2 CH341SER 0x7523 CH341/CH340 USB-Serial Bridge product WCH2 U2M 0X752d CH345 USB2.0-MIDI /* West Mountain Radio products */ product WESTMOUNTAIN RIGBLASTER_ADVANTAGE 0x0003 RIGblaster Advantage /* Western Digital products */ product WESTERN COMBO 0x0200 Firewire USB Combo product WESTERN EXTHDD 0x0400 External HDD product WESTERN HUB 0x0500 USB HUB product WESTERN MYBOOK 0x0901 MyBook External HDD product WESTERN MYPASSPORT_00 0x0704 MyPassport External HDD product WESTERN MYPASSPORT_11 0x0741 MyPassport External HDD product WESTERN MYPASSPORT_01 0x0746 MyPassport External HDD product WESTERN MYPASSPORT_02 0x0748 MyPassport External HDD product WESTERN MYPASSPORT_03 0x074A MyPassport External HDD product WESTERN MYPASSPORT_04 0x074C MyPassport External HDD product WESTERN MYPASSPORT_05 0x074E MyPassport External HDD product WESTERN MYPASSPORT_06 0x07A6 MyPassport External HDD product WESTERN MYPASSPORT_07 0x07A8 MyPassport External HDD product WESTERN MYPASSPORT_08 0x07AA MyPassport External HDD product WESTERN MYPASSPORT_09 0x07AC MyPassport External HDD product WESTERN MYPASSPORT_10 0x07AE MyPassport External HDD product WESTERN MYPASSPORTES_00 0x070A MyPassport Essential External HDD product WESTERN MYPASSPORTES_01 0x071A MyPassport Essential External HDD product WESTERN MYPASSPORTES_02 0x0730 MyPassport Essential External HDD product WESTERN MYPASSPORTES_03 0x0732 MyPassport Essential External HDD product WESTERN MYPASSPORTES_04 0x0740 MyPassport Essential External HDD product WESTERN MYPASSPORTES_05 0x0742 MyPassport Essential External HDD product WESTERN MYPASSPORTES_06 0x0750 MyPassport Essential External HDD product WESTERN MYPASSPORTES_07 0x0752 MyPassport Essential External HDD product WESTERN MYPASSPORTES_08 0x07A0 MyPassport Essential External HDD product WESTERN MYPASSPORTES_09 0x07A2 MyPassport Essential External HDD /* WeTelecom products */ product WETELECOM WM_D200 0x6801 WM-D200 /* WIENER Plein & Baus GmbH products */ product WIENERPLEINBAUS PL512 0x0010 PL512 PSU product WIENERPLEINBAUS RCM 0x0011 RCM Remote Control product WIENERPLEINBAUS MPOD 0x0012 MPOD PSU product WIENERPLEINBAUS CML 0x0015 CML Data Logger /* Windbond Electronics */ product WINBOND UH104 0x5518 4-port USB Hub /* WinMaxGroup products */ product WINMAXGROUP FLASH64MC 0x6660 USB Flash Disk 64M-C /* Wistron NeWeb products */ product WISTRONNEWEB WNC0600 0x0326 WNC-0600USB product WISTRONNEWEB UR045G 0x0427 PrismGT USB 2.0 WLAN product WISTRONNEWEB UR055G 0x0711 UR055G product WISTRONNEWEB O8494 0x0804 ORiNOCO 802.11n product WISTRONNEWEB AR5523_1 0x0826 AR5523 product WISTRONNEWEB AR5523_1_NF 0x0827 AR5523 (no firmware) product WISTRONNEWEB AR5523_2 0x082a AR5523 product WISTRONNEWEB AR5523_2_NF 0x0829 AR5523 (no firmware) /* Xerox products */ product XEROX WCM15 0xffef WorkCenter M15 /* Xirlink products */ product XIRLINK PCCAM 0x8080 IBM PC Camera /* Xyratex products */ product XYRATEX PRISM_GT_1 0x2000 PrismGT USB 2.0 WLAN product XYRATEX PRISM_GT_2 0x2002 PrismGT USB 2.0 WLAN /* Yamaha products */ product YAMAHA UX256 0x1000 UX256 MIDI I/F product YAMAHA MU1000 0x1001 MU1000 MIDI Synth. product YAMAHA MU2000 0x1002 MU2000 MIDI Synth. product YAMAHA MU500 0x1003 MU500 MIDI Synth. product YAMAHA UW500 0x1004 UW500 USB Audio I/F product YAMAHA MOTIF6 0x1005 MOTIF6 MIDI Synth. Workstation product YAMAHA MOTIF7 0x1006 MOTIF7 MIDI Synth. Workstation product YAMAHA MOTIF8 0x1007 MOTIF8 MIDI Synth. Workstation product YAMAHA UX96 0x1008 UX96 MIDI I/F product YAMAHA UX16 0x1009 UX16 MIDI I/F product YAMAHA S08 0x100e S08 MIDI Keyboard product YAMAHA CLP150 0x100f CLP-150 digital piano product YAMAHA CLP170 0x1010 CLP-170 digital piano product YAMAHA RPU200 0x3104 RP-U200 product YAMAHA RTA54I 0x4000 NetVolante RTA54i Broadband&ISDN Router product YAMAHA RTW65B 0x4001 NetVolante RTW65b Broadband Wireless Router product YAMAHA RTW65I 0x4002 NetVolante RTW65i Broadband&ISDN Wireless Router product YAMAHA RTA55I 0x4004 NetVolante RTA55i Broadband VoIP Router /* Yano products */ product YANO U640MO 0x0101 U640MO-03 product YANO FW800HD 0x05fc METALWEAR-HDD /* Y.C. Cable products */ product YCCABLE PL2303 0x0fba PL2303 Serial /* Y-E Data products */ product YEDATA FLASHBUSTERU 0x0000 Flashbuster-U /* Yiso Wireless Co. products */ product YISO C893 0xc893 CDMA 2000 1xEVDO PC Card /* Z-Com products */ product ZCOM M4Y750 0x0001 M4Y-750 product ZCOM XI725 0x0002 XI-725/726 product ZCOM XI735 0x0005 XI-735 product ZCOM XG703A 0x0008 PrismGT USB 2.0 WLAN product ZCOM ZD1211 0x0011 ZD1211 product ZCOM AR5523 0x0012 AR5523 product ZCOM AR5523_NF 0x0013 AR5523 driver (no firmware) product ZCOM XM142 0x0015 XM-142 product ZCOM ZD1211B 0x001a ZD1211B product ZCOM RT2870_1 0x0022 RT2870 product ZCOM UB81 0x0023 UB81 product ZCOM RT2870_2 0x0025 RT2870 product ZCOM UB82 0x0026 UB82 /* Zeevo, Inc. products */ product ZEEVO BLUETOOTH 0x07d0 BT-500 Bluetooth USB Adapter /* Zinwell products */ product ZINWELL RT2570 0x0260 RT2570 product ZINWELL RT2870_1 0x0280 RT2870 product ZINWELL RT2870_2 0x0282 RT2870 product ZINWELL RT3072_1 0x0283 RT3072 product ZINWELL RT3072_2 0x0284 RT3072 product ZINWELL RT3070 0x5257 RT3070 /* Zoom Telephonics, Inc. products */ product ZOOM 2986L 0x9700 2986L Fax modem product ZOOM 3095 0x3095 3095 USB Fax modem /* Zoran Microelectronics products */ product ZORAN EX20DSC 0x4343 Digital Camera EX-20 DSC /* ZTE products */ product ZTE MF622 0x0001 MF622 modem product ZTE MF628 0x0015 MF628 modem product ZTE MF626 0x0031 MF626 modem product ZTE MF820D_INSTALLER 0x0166 MF820D CD product ZTE MF820D 0x0167 MF820D modem product ZTE INSTALLER 0x2000 UMTS CD product ZTE MC2718 0xffe8 MC2718 modem product ZTE AC8700 0xfffe CDMA 1xEVDO USB modem /* Zydas Technology Corporation products */ product ZYDAS ZD1201 0x1201 ZD1201 product ZYDAS ZD1211 0x1211 ZD1211 WLAN abg product ZYDAS ZD1211B 0x1215 ZD1211B product ZYDAS ZD1221 0x1221 ZD1221 product ZYDAS ALL0298 0xa211 ALL0298 product ZYDAS ZD1211B_2 0xb215 ZD1211B /* ZyXEL Communication Co. products */ product ZYXEL OMNI56K 0x1500 Omni 56K Plus product ZYXEL 980N 0x2011 Scorpion-980N keyboard product ZYXEL ZYAIRG220 0x3401 ZyAIR G-220 product ZYXEL G200V2 0x3407 G-200 v2 product ZYXEL AG225H 0x3409 AG-225H product ZYXEL M202 0x340a M-202 product ZYXEL G270S 0x340c G-270S product ZYXEL G220V2 0x340f G-220 v2 product ZYXEL G202 0x3410 G-202 product ZYXEL RT2573 0x3415 RT2573 product ZYXEL RT2870_1 0x3416 RT2870 product ZYXEL NWD271N 0x3417 NWD-271N product ZYXEL NWD211AN 0x3418 NWD-211AN product ZYXEL RT2870_2 0x341a RT2870 product ZYXEL RT3070 0x341e NWD2105 product ZYXEL RTL8192CU 0x341f RTL8192CU product ZYXEL NWD2705 0x3421 NWD2705 product ZYXEL NWD6605 0x3426 ND6605 product ZYXEL PRESTIGE 0x401a Prestige Index: projects/pnfs-planb-server/sys/fs/nfs/nfs_commonkrpc.c =================================================================== --- projects/pnfs-planb-server/sys/fs/nfs/nfs_commonkrpc.c (revision 334966) +++ projects/pnfs-planb-server/sys/fs/nfs/nfs_commonkrpc.c (revision 334967) @@ -1,1417 +1,1418 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Socket operations for use by nfs */ #include "opt_kgssapi.h" #include "opt_nfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfscl_nfs234_start_probe; dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfscl_nfs234_done_probe; /* * Registered probes by RPC type. */ uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; #endif NFSSTATESPINLOCK; NFSREQSPINLOCK; NFSDLOCKMUTEX; NFSCLSTATEMUTEX; extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern int nfscl_ticks; extern void (*ncl_call_invalcaches)(struct vnode *); extern int nfs_numnfscbd; extern int nfscl_debuglevel; SVCPOOL *nfscbd_pool; static int nfsrv_gsscallbackson = 0; static int nfs_bufpackets = 4; static int nfs_reconnects; static int nfs3_jukebox_delay = 10; static int nfs_skip_wcc_data_onerr = 1; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "Buffer reservation size 2 < x < 64"); SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0, "Number of times the nfs client has had to reconnect"); SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0, "Number of seconds to delay a retry after receiving EJUKEBOX"); SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0, "Disable weak cache consistency checking when server returns an error"); static void nfs_down(struct nfsmount *, struct thread *, const char *, int, int); static void nfs_up(struct nfsmount *, struct thread *, const char *, int, int); static int nfs_msg(struct thread *, const char *, const char *, int); struct nfs_cached_auth { int ca_refs; /* refcount, including 1 from the cache */ uid_t ca_uid; /* uid that corresponds to this auth */ AUTH *ca_auth; /* RPC auth handle */ }; static int nfsv2_procid[NFS_V3NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. * Which arguments are set to NULL indicate what kind of call it is. * cred == NULL --> a call to connect to a pNFS DS * nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback */ int newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp, struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; struct netconfig *nconf; struct socket *so; int one = 1, retries, error = 0; struct thread *td = curthread; SVCXPRT *xprt; struct timeval timo; /* * We need to establish the socket using the credentials of * the mountpoint. Some parts of this process (such as * sobind() and soconnect()) will use the curent thread's * credential instead of the socket credential. To work * around this, temporarily change the current thread's * credential to that of the mountpoint. * * XXX: It would be better to explicitly pass the correct * credential to sobind() and soconnect(). */ origcred = td->td_ucred; /* * Use the credential in nr_cred, if not NULL. */ if (nrp->nr_cred != NULL) td->td_ucred = nrp->nr_cred; else td->td_ucred = cred; saddr = nrp->nr_nam; if (saddr->sa_family == AF_INET) if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp"); else nconf = getnetconfigent("tcp"); else if (saddr->sa_family == AF_LOCAL) nconf = getnetconfigent("local"); else if (nrp->nr_sotype == SOCK_DGRAM) nconf = getnetconfigent("udp6"); else nconf = getnetconfigent("tcp6"); pktscale = nfs_bufpackets; if (pktscale < 2) pktscale = 2; if (pktscale > 64) pktscale = 64; pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. * Print a log message suggesting increasing sb_max. * Creating a socket and doing this is necessary since, if the * reservation sizes are too large and will make soreserve() fail, * the connection will work until a large send is attempted and * then it will loop in the krpc code. */ so = NULL; saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *); error = socreate(saddr->sa_family, &so, nrp->nr_sotype, nrp->nr_soproto, td->td_ucred, td); if (error) { td->td_ucred = origcred; goto out; } do { if (error != 0 && pktscale > 2) { if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale == pktscalesav) printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } else { if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && pktscale <= 2) printf("Must increase kern.ipc.maxsockbuf or reduce" " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error) { td->td_ucred = origcred; goto out; } client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog, nrp->nr_vers, sndreserve, rcvreserve); CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq"); if (nmp != NULL) { if ((nmp->nm_flag & NFSMNT_INT)) CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one); if ((nmp->nm_flag & NFSMNT_RESVPORT)) CLNT_CONTROL(client, CLSET_PRIVPORT, &one); if (NFSHASSOFT(nmp)) { if (nmp->nm_sotype == SOCK_DGRAM) /* * For UDP, the large timeout for a reconnect * will be set to "nm_retry * nm_timeo / 2", so * we only want to do 2 reconnect timeout * retries. */ retries = 2; else retries = nmp->nm_retry; } else retries = INT_MAX; if (NFSHASNFSV4N(nmp)) { if (cred != NULL) { /* * Make sure the nfscbd_pool doesn't get * destroyed while doing this. */ NFSD_LOCK(); if (nfs_numnfscbd > 0) { nfs_numnfscbd++; NFSD_UNLOCK(); xprt = svc_vc_create_backchannel( nfscbd_pool); CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt); NFSD_LOCK(); nfs_numnfscbd--; if (nfs_numnfscbd == 0) wakeup(&nfs_numnfscbd); } NFSD_UNLOCK(); } else { /* * cred == NULL for a DS connect. * For connects to a DS, set a retry limit * so that failed DSs will be detected. * This is ok for NFSv4.1, since a DS does * not maintain open/lock state and is the * only case where using a "soft" mount is * recommended for NFSv4. */ retries = 2; } } } else { /* * Three cases: * - Null RPC callback to client * - Non-Null RPC callback to client, wait a little longer * - upcalls to nfsuserd and gssd (clp == NULL) */ if (callback_retry_mult == 0) { retries = NFSV4_UPCALLRETRY; CLNT_CONTROL(client, CLSET_PRIVPORT, &one); } else { retries = NFSV4_CALLBACKRETRY * callback_retry_mult; } } CLNT_CONTROL(client, CLSET_RETRIES, &retries); if (nmp != NULL) { /* * For UDP, there are 2 timeouts: * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer * that does a retransmit of an RPC request using the same * socket and xid. This is what you normally want to do, * since NFS servers depend on "same xid" for their * Duplicate Request Cache. * - timeout specified in CLNT_CALL_MBUF(), which specifies when * retransmits on the same socket should fail and a fresh * socket created. Each of these timeouts counts as one * CLSET_RETRIES as set above. * Set the initial retransmit timeout for UDP. This timeout * doesn't exist for TCP and the following call just fails, * which is ok. */ timo.tv_sec = nmp->nm_timeo / NFS_HZ; timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ; CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo); } mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { mtx_unlock(&nrp->nr_mtx); /* * Someone else already connected. */ CLNT_RELEASE(client); } else { nrp->nr_client = client; /* * Protocols that do not require connections may be optionally * left unconnected for servers that reply from a port other * than NFS_PORT. */ if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) { mtx_unlock(&nrp->nr_mtx); CLNT_CONTROL(client, CLSET_CONNECT, &one); } else mtx_unlock(&nrp->nr_mtx); } /* Restore current thread's credentials. */ td->td_ucred = origcred; out: NFSEXITCODE(error); return (error); } /* * NFS disconnect. Clean up and unlink. */ void newnfs_disconnect(struct nfssockreq *nrp) { CLIENT *client; mtx_lock(&nrp->nr_mtx); if (nrp->nr_client != NULL) { client = nrp->nr_client; nrp->nr_client = NULL; mtx_unlock(&nrp->nr_mtx); rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else { mtx_unlock(&nrp->nr_mtx); } } static AUTH * nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, char *srv_principal, gss_OID mech_oid, struct ucred *cred) { rpc_gss_service_t svc; AUTH *auth; switch (secflavour) { case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!mech_oid) { if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) return (NULL); } if (secflavour == RPCSEC_GSS_KRB5) svc = rpc_gss_svc_none; else if (secflavour == RPCSEC_GSS_KRB5I) svc = rpc_gss_svc_integrity; else svc = rpc_gss_svc_privacy; if (clnt_principal == NULL) auth = rpc_gss_secfind_call(nrp->nr_client, cred, srv_principal, mech_oid, svc); else { auth = rpc_gss_seccreate_call(nrp->nr_client, cred, clnt_principal, srv_principal, "kerberosv5", svc, NULL, NULL, NULL); return (auth); } if (auth != NULL) return (auth); /* fallthrough */ case AUTH_SYS: default: return (authunix_create(cred)); } } /* * Callback from the RPC code to generate up/down notifications. */ struct nfs_feedback_arg { struct nfsmount *nf_mount; int nf_lastmsg; /* last tprintf */ int nf_tprintfmsg; struct thread *nf_td; }; static void nfs_feedback(int type, int proc, void *arg) { struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg; struct nfsmount *nmp = nf->nf_mount; time_t now; switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: now = NFSD_MONOSEC; if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) { nfs_down(nmp, nf->nf_td, "not responding", 0, NFSSTA_TIMEO); nf->nf_tprintfmsg = TRUE; nf->nf_lastmsg = now; } break; case FEEDBACK_OK: nfs_up(nf->nf_mount, nf->nf_td, "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg); break; } } /* * newnfs_request - goes something like this * - does the rpc by calling the krpc layer * - break down rpc header and return with nfs reply * nb: always frees up nd_mreq mbuf list */ int newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp, struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers, u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep) { uint32_t retseq, retval, slotseq, *tl; time_t waituntil; int i = 0, j = 0, opcnt, set_sigset = 0, slot; int error = 0, usegssname = 0, secflavour = AUTH_SYS, trycnt; int freeslot, maxslot, reterr, slotpos, timeo; u_int16_t procnum; u_int trylater_delay = 1; struct nfs_feedback_arg nf; struct timeval timo; AUTH *auth; struct rpc_callextra ext; enum clnt_stat stat; struct nfsreq *rep = NULL; char *srv_principal = NULL, *clnt_principal = NULL; sigset_t oldset; struct ucred *authcred; struct nfsclsession *sep; uint8_t sessionid[NFSX_V4SESSIONID]; sep = dssep; if (xidp != NULL) *xidp = 0; /* Reject requests while attempting a forced unmount. */ if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) { m_freem(nd->nd_mreq); return (ESTALE); } /* * Set authcred, which is used to acquire RPC credentials to * the cred argument, by default. The crhold() should not be * necessary, but will ensure that some future code change * doesn't result in the credential being free'd prematurely. */ authcred = crhold(cred); /* For client side interruptible mounts, mask off the signals. */ if (nmp != NULL && td != NULL && NFSHASINT(nmp)) { newnfs_set_sigmask(td, &oldset); set_sigset = 1; } /* * XXX if not already connected call nfs_connect now. Longer * term, change nfs_mount to call nfs_connect unconditionally * and let clnt_reconnect_create handle reconnects. */ if (nrp->nr_client == NULL) newnfs_connect(nmp, nrp, cred, td, 0); /* * For a client side mount, nmp is != NULL and clp == NULL. For * server calls (callbacks or upcalls), nmp == NULL. */ if (clp != NULL) { NFSLOCKSTATE(); if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) { secflavour = RPCSEC_GSS_KRB5; if (nd->nd_procnum != NFSPROC_NULL) { if (clp->lc_flags & LCL_GSSINTEGRITY) secflavour = RPCSEC_GSS_KRB5I; else if (clp->lc_flags & LCL_GSSPRIVACY) secflavour = RPCSEC_GSS_KRB5P; } } NFSUNLOCKSTATE(); } else if (nmp != NULL && NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL) { if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0) nd->nd_flag |= ND_USEGSSNAME; if ((nd->nd_flag & ND_USEGSSNAME) != 0) { /* * If there is a client side host based credential, * use that, otherwise use the system uid, if set. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ if (nmp->nm_krbnamelen > 0) { usegssname = 1; clnt_principal = nmp->nm_krbname; } else if (nmp->nm_uid != (uid_t)-1) { KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } } else if (nmp->nm_krbnamelen == 0 && nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) { /* * If there is no host based principal name and * the system uid is set and this is root, use the * system uid, since root won't have user * credentials in a credentials cache file. * The system uid is in the nmp->nm_sockreq.nr_cred * credentials. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (NFSHASINTEGRITY(nmp)) secflavour = RPCSEC_GSS_KRB5I; else if (NFSHASPRIVACY(nmp)) secflavour = RPCSEC_GSS_KRB5P; else secflavour = RPCSEC_GSS_KRB5; srv_principal = NFSMNT_SRVKRBNAME(nmp); } else if (nmp != NULL && !NFSHASKERB(nmp) && nd->nd_procnum != NFSPROC_NULL && (nd->nd_flag & ND_USEGSSNAME) != 0) { /* * Use the uid that did the mount when the RPC is doing * NFSv4 system operations, as indicated by the * ND_USEGSSNAME flag, for the AUTH_SYS case. * The credentials in nm_sockreq.nr_cred were used for the * mount. */ KASSERT(nmp->nm_sockreq.nr_cred != NULL, ("newnfs_request: NULL nr_cred")); crfree(authcred); authcred = crhold(nmp->nm_sockreq.nr_cred); } if (nmp != NULL) { bzero(&nf, sizeof(struct nfs_feedback_arg)); nf.nf_mount = nmp; nf.nf_td = td; nf.nf_lastmsg = NFSD_MONOSEC - ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); } if (nd->nd_procnum == NFSPROC_NULL) auth = authnone_create(); else if (usegssname) { /* * For this case, the authenticator is held in the * nfssockreq structure, so don't release the reference count * held on it. --> Don't AUTH_DESTROY() it in this function. */ if (nrp->nr_auth == NULL) nrp->nr_auth = nfs_getauth(nrp, secflavour, clnt_principal, srv_principal, NULL, authcred); else rpc_gss_refresh_auth_call(nrp->nr_auth); auth = nrp->nr_auth; } else auth = nfs_getauth(nrp, secflavour, NULL, srv_principal, NULL, authcred); crfree(authcred); if (auth == NULL) { m_freem(nd->nd_mreq); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (EACCES); } bzero(&ext, sizeof(ext)); ext.rc_auth = auth; if (nmp != NULL) { ext.rc_feedback = nfs_feedback; ext.rc_feedback_arg = &nf; } procnum = nd->nd_procnum; if ((nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBCOMPOUND) procnum = NFSV4PROC_COMPOUND; if (nmp != NULL) { NFSINCRGLOBAL(nfsstatsv1.rpcrequests); /* Map the procnum to the old NFSv2 one, as required. */ if ((nd->nd_flag & ND_NFSV2) != 0) { if (nd->nd_procnum < NFS_V3NPROCS) procnum = nfsv2_procid[nd->nd_procnum]; else procnum = NFSV2PROC_NOOP; } /* * Now only used for the R_DONTRECOVER case, but until that is * supported within the krpc code, I need to keep a queue of * outstanding RPCs for nfsv4 client requests. */ if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) rep = malloc(sizeof(struct nfsreq), M_NFSDREQ, M_WAITOK); #ifdef KDTRACE_HOOKS if (dtrace_nfscl_nfs234_start_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_start_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_start_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_start_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_start_probe) (probe_id, vp, nd->nd_mreq, cred, probe_procnum); } #endif } trycnt = 0; freeslot = -1; /* Set to slot that needs to be free'd */ tryagain: slot = -1; /* Slot that needs a sequence# increment. */ /* * This timeout specifies when a new socket should be created, * along with new xid values. For UDP, this should be done * infrequently, since retransmits of RPC requests should normally * use the same xid. */ if (nmp == NULL) { timo.tv_usec = 0; if (clp == NULL) timo.tv_sec = NFSV4_UPCALLTIMEO; else timo.tv_sec = NFSV4_CALLBACKTIMEO; } else { if (nrp->nr_sotype != SOCK_DGRAM) { timo.tv_usec = 0; if ((nmp->nm_flag & NFSMNT_NFSV4)) timo.tv_sec = INT_MAX; else timo.tv_sec = NFS_TCPTIMEO; } else { if (NFSHASSOFT(nmp)) { /* * CLSET_RETRIES is set to 2, so this should be * half of the total timeout required. */ timeo = nmp->nm_retry * nmp->nm_timeo / 2; if (timeo < 1) timeo = 1; timo.tv_sec = timeo / NFS_HZ; timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ; } else { /* For UDP hard mounts, use a large value. */ timo.tv_sec = NFS_MAXTIMEO / NFS_HZ; timo.tv_usec = 0; } } if (rep != NULL) { rep->r_flags = 0; rep->r_nmp = nmp; /* * Chain request into list of outstanding requests. */ NFSLOCKREQ(); TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } } nd->nd_mrep = NULL; if (clp != NULL && sep != NULL) stat = clnt_bck_call(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt); else stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); NFSCL_DEBUG(2, "clnt call=%d\n", stat); if (rep != NULL) { /* * RPC done, unlink the request. */ NFSLOCKREQ(); TAILQ_REMOVE(&nfsd_reqq, rep, r_chain); NFSUNLOCKREQ(); } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (stat == RPC_SUCCESS) { error = 0; } else if (stat == RPC_TIMEDOUT) { NFSINCRGLOBAL(nfsstatsv1.rpctimeouts); error = ETIMEDOUT; } else if (stat == RPC_VERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EOPNOTSUPP; } else if (stat == RPC_PROGVERSMISMATCH) { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EPROTONOSUPPORT; } else if (stat == RPC_INTR) { error = EINTR; } else if (stat == RPC_CANTSEND || stat == RPC_CANTRECV || stat == RPC_SYSTEMERROR) { if ((nd->nd_flag & ND_NFSV41) != 0 && nmp != NULL && nd->nd_procnum != NFSPROC_NULL) { /* * The nfsess_defunct field is protected by * the NFSLOCKMNT()/nm_mtx lock and not the * nfsess_mtx lock to simplify its handling, * for the MDS session. This lock is also * sufficient for nfsess_sessionid, since it * never changes in the structure. */ NFSLOCKCLSTATE(); NFSLOCKMNT(nmp); /* The session must be marked defunct. */ if (dssep == NULL) { /* * This is either an MDS proxy operation or * a client mount with "soft,retrans=N" options. * Mark the MDS session defunct and initiate * recovery, as required. */ NFSCL_DEBUG(1, "Failed soft proxy RPC\n"); sep = NFSMNT_MDSSESSION(nmp); if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, NFSX_V4SESSIONID) == 0) { /* Initiate recovery. */ sep->nfsess_defunct = 1; NFSCL_DEBUG(1, "Marked defunct\n"); if (nmp->nm_clp != NULL) { nmp->nm_clp->nfsc_flags |= NFSCLFLAGS_RECOVER; wakeup(nmp->nm_clp); } } } else { /* * This is a client side DS RPC. Just mark * the session defunct. A subsequent LayoutGet * should get a new session. */ NFSCL_DEBUG(1, "Failed client DS RPC\n"); if (bcmp(dssep->nfsess_sessionid, nd->nd_sequence, NFSX_V4SESSIONID) == 0) { /* Mark it defunct. */ dssep->nfsess_defunct = 1; NFSCL_DEBUG(1, "Marked defunct\n"); } } NFSUNLOCKMNT(nmp); NFSUNLOCKCLSTATE(); } NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = ENXIO; } else { NFSINCRGLOBAL(nfsstatsv1.rpcinvalid); error = EACCES; } if (error) { m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n")); /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ newnfs_realign(&nd->nd_mrep, M_WAITOK); nd->nd_md = nd->nd_mrep; nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t); nd->nd_repstat = 0; if (nd->nd_procnum != NFSPROC_NULL && nd->nd_procnum != NFSV4PROC_CBNULL) { /* If sep == NULL, set it to the default in nmp. */ if (sep == NULL && nmp != NULL) sep = nfsmnt_mdssession(nmp); /* * and now the actual NFS xdr. */ NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl); if (nd->nd_repstat >= 10000) NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum, (int)nd->nd_repstat); /* * Get rid of the tag, return count and SEQUENCE result for * NFSv4. */ if ((nd->nd_flag & ND_NFSV4) != 0) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); opcnt = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j); /* * If the first op is Sequence, free up the slot. */ if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) || (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) NFSCL_DEBUG(1, "failed seq=%d\n", j); - if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) || - (clp != NULL && i == NFSV4OP_CBSEQUENCE && j == 0) - ) { + if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) || + (clp != NULL && i == NFSV4OP_CBSEQUENCE && + j == 0)) && sep != NULL) { if (i == NFSV4OP_SEQUENCE) NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); else NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); mtx_lock(&sep->nfsess_mtx); if (bcmp(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID) == 0) { tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; retseq = fxdr_unsigned(uint32_t, *tl++); slot = fxdr_unsigned(int, *tl++); freeslot = slot; if (retseq != sep->nfsess_slotseq[slot]) printf("retseq diff 0x%x\n", retseq); retval = fxdr_unsigned(uint32_t, *++tl); if ((retval + 1) < sep->nfsess_foreslots ) sep->nfsess_foreslots = (retval + 1); else if ((retval + 1) > sep->nfsess_foreslots) sep->nfsess_foreslots = (retval < 64) ? (retval + 1) : 64; } mtx_unlock(&sep->nfsess_mtx); /* Grab the op and status for the next one. */ if (opcnt > 1) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); } } } if (nd->nd_repstat != 0) { if (nd->nd_repstat == NFSERR_BADSESSION && - nmp != NULL && dssep == NULL) { + nmp != NULL && dssep == NULL && + (nd->nd_flag & ND_NFSV41) != 0) { /* * If this is a client side MDS RPC, mark * the MDS session defunct and initiate * recovery, as required. * The nfsess_defunct field is protected by * the NFSLOCKMNT()/nm_mtx lock and not the * nfsess_mtx lock to simplify its handling, * for the MDS session. This lock is also * sufficient for nfsess_sessionid, since it * never changes in the structure. */ NFSCL_DEBUG(1, "Got badsession\n"); NFSLOCKCLSTATE(); NFSLOCKMNT(nmp); sep = NFSMNT_MDSSESSION(nmp); if (bcmp(sep->nfsess_sessionid, nd->nd_sequence, NFSX_V4SESSIONID) == 0) { /* Initiate recovery. */ sep->nfsess_defunct = 1; NFSCL_DEBUG(1, "Marked defunct\n"); if (nmp->nm_clp != NULL) { nmp->nm_clp->nfsc_flags |= NFSCLFLAGS_RECOVER; wakeup(nmp->nm_clp); } } NFSUNLOCKCLSTATE(); /* * Sleep for up to 1sec waiting for a new * session. */ mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO, "nfsbadsess", hz); /* * Get the session again, in case a new one * has been created during the sleep. */ sep = NFSMNT_MDSSESSION(nmp); NFSUNLOCKMNT(nmp); if ((nd->nd_flag & ND_LOOPBADSESS) != 0) { reterr = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq, sessionid); if (reterr == 0) { /* Fill in new session info. */ NFSCL_DEBUG(1, "Filling in new sequence\n"); tl = nd->nd_sequence; bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl = txdr_unsigned(maxslot); } if (reterr == NFSERR_BADSESSION || reterr == 0) { NFSCL_DEBUG(1, "Badsession looping\n"); m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } nd->nd_repstat = reterr; NFSCL_DEBUG(1, "Got err=%d\n", reterr); } } if (((nd->nd_repstat == NFSERR_DELAY || nd->nd_repstat == NFSERR_GRACE) && (nd->nd_flag & ND_NFSV4) && nd->nd_procnum != NFSPROC_DELEGRETURN && nd->nd_procnum != NFSPROC_SETATTR && nd->nd_procnum != NFSPROC_READ && nd->nd_procnum != NFSPROC_READDS && nd->nd_procnum != NFSPROC_WRITE && nd->nd_procnum != NFSPROC_WRITEDS && nd->nd_procnum != NFSPROC_OPEN && nd->nd_procnum != NFSPROC_CREATE && nd->nd_procnum != NFSPROC_OPENCONFIRM && nd->nd_procnum != NFSPROC_OPENDOWNGRADE && nd->nd_procnum != NFSPROC_CLOSE && nd->nd_procnum != NFSPROC_LOCK && nd->nd_procnum != NFSPROC_LOCKU) || (nd->nd_repstat == NFSERR_DELAY && (nd->nd_flag & ND_NFSV4) == 0) || nd->nd_repstat == NFSERR_RESOURCE) { if (trylater_delay > NFS_TRYLATERDEL) trylater_delay = NFS_TRYLATERDEL; waituntil = NFSD_MONOSEC + trylater_delay; while (NFSD_MONOSEC < waituntil) (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; if (slot != -1) { mtx_lock(&sep->nfsess_mtx); sep->nfsess_slotseq[slot]++; *nd->nd_slotseq = txdr_unsigned( sep->nfsess_slotseq[slot]); mtx_unlock(&sep->nfsess_mtx); } m_freem(nd->nd_mrep); nd->nd_mrep = NULL; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. * (vp != NULL implies a client side call) */ if (nd->nd_repstat == ESTALE && vp != NULL) { cache_purge(vp); if (ncl_call_invalcaches != NULL) (*ncl_call_invalcaches)(vp); } } if ((nd->nd_flag & ND_NFSV4) != 0) { /* Free the slot, as required. */ if (freeslot != -1) nfsv4_freeslot(sep, freeslot); /* * If this op is Putfh, throw its results away. */ if (j >= 10000) NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j); if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) { NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl++); j = fxdr_unsigned(int, *tl); if (j >= 10000) NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i, j); /* * All Compounds that do an Op that must * be in sequence consist of NFSV4OP_PUTFH * followed by one of these. As such, we * can determine if the seqid# should be * incremented, here. */ if ((i == NFSV4OP_OPEN || i == NFSV4OP_OPENCONFIRM || i == NFSV4OP_OPENDOWNGRADE || i == NFSV4OP_CLOSE || i == NFSV4OP_LOCK || i == NFSV4OP_LOCKU) && (j == 0 || (j != NFSERR_STALECLIENTID && j != NFSERR_STALESTATEID && j != NFSERR_BADSTATEID && j != NFSERR_BADSEQID && j != NFSERR_BADXDR && j != NFSERR_RESOURCE && j != NFSERR_NOFILEHANDLE))) nd->nd_flag |= ND_INCRSEQID; } /* * If this op's status is non-zero, mark * that there is no more data to process. * The exception is Setattr, which always has xdr * when it has failed. */ if (j != 0 && i != NFSV4OP_SETATTR) nd->nd_flag |= ND_NOMOREDATA; /* * If R_DONTRECOVER is set, replace the stale error * reply, so that recovery isn't initiated. */ if ((nd->nd_repstat == NFSERR_STALECLIENTID || nd->nd_repstat == NFSERR_BADSESSION || nd->nd_repstat == NFSERR_STALESTATEID) && rep != NULL && (rep->r_flags & R_DONTRECOVER)) nd->nd_repstat = NFSERR_STALEDONTRECOVER; } } #ifdef KDTRACE_HOOKS if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { uint32_t probe_id; int probe_procnum; if (nd->nd_flag & ND_NFSV4) { probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; probe_procnum = nd->nd_procnum; } else if (nd->nd_flag & ND_NFSV3) { probe_id = nfscl_nfs3_done_probes[procnum]; probe_procnum = procnum; } else { probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; probe_procnum = procnum; } if (probe_id != 0) (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, nd->nd_mreq, cred, probe_procnum, 0); } #endif m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (0); nfsmout: mbuf_freem(nd->nd_mrep); mbuf_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); if (rep != NULL) free(rep, M_NFSDREQ); if (set_sigset) newnfs_restore_sigmask(td, &oldset); return (error); } /* * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. */ int newnfs_nmcancelreqs(struct nfsmount *nmp) { struct nfsclds *dsp; struct __rpc_client *cl; if (nmp->nm_sockreq.nr_client != NULL) CLNT_CLOSE(nmp->nm_sockreq.nr_client); lookformore: NFSLOCKMNT(nmp); TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { NFSLOCKDS(dsp); if (dsp != TAILQ_FIRST(&nmp->nm_sess) && (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 && dsp->nfsclds_sockp != NULL && dsp->nfsclds_sockp->nr_client != NULL) { dsp->nfsclds_flags |= NFSCLDS_CLOSED; cl = dsp->nfsclds_sockp->nr_client; NFSUNLOCKDS(dsp); NFSUNLOCKMNT(nmp); CLNT_CLOSE(cl); goto lookformore; } NFSUNLOCKDS(dsp); } NFSUNLOCKMNT(nmp); return (0); } /* * Any signal that can interrupt an NFS operation in an intr mount * should be added to this set. SIGSTOP and SIGKILL cannot be masked. */ int newnfs_sig_set[] = { SIGINT, SIGTERM, SIGHUP, SIGKILL, SIGQUIT }; /* * Check to see if one of the signals in our subset is pending on * the process (in an intr mount). */ static int nfs_sig_pending(sigset_t set) { int i; for (i = 0 ; i < nitems(newnfs_sig_set); i++) if (SIGISMEMBER(set, newnfs_sig_set[i])) return (1); return (0); } /* * The set/restore sigmask functions are used to (temporarily) overwrite * the thread td_sigmask during an RPC call (for example). These are also * used in other places in the NFS client that might tsleep(). */ void newnfs_set_sigmask(struct thread *td, sigset_t *oldset) { sigset_t newset; int i; struct proc *p; SIGFILLSET(newset); if (td == NULL) td = curthread; /* XXX */ p = td->td_proc; /* Remove the NFS set of signals from newset */ PROC_LOCK(p); mtx_lock(&p->p_sigacts->ps_mtx); for (i = 0 ; i < nitems(newnfs_sig_set); i++) { /* * But make sure we leave the ones already masked * by the process, ie. remove the signal from the * temporary signalmask only if it wasn't already * in p_sigmask. */ if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) && !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i])) SIGDELSET(newset, newnfs_sig_set[i]); } mtx_unlock(&p->p_sigacts->ps_mtx); kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, SIGPROCMASK_PROC_LOCKED); PROC_UNLOCK(p); } void newnfs_restore_sigmask(struct thread *td, sigset_t *set) { if (td == NULL) td = curthread; /* XXX */ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0); } /* * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the * old one after msleep() returns. */ int newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo) { sigset_t oldset; int error; if ((priority & PCATCH) == 0) return msleep(ident, mtx, priority, wmesg, timo); if (td == NULL) td = curthread; /* XXX */ newnfs_set_sigmask(td, &oldset); error = msleep(ident, mtx, priority, wmesg, timo); newnfs_restore_sigmask(td, &oldset); return (error); } /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int newnfs_sigintr(struct nfsmount *nmp, struct thread *td) { struct proc *p; sigset_t tmpset; /* Terminate all requests while attempting a forced unmount. */ if (NFSCL_FORCEDISM(nmp->nm_mountp)) return (EIO); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (td == NULL) return (0); p = td->td_proc; PROC_LOCK(p); tmpset = p->p_siglist; SIGSETOR(tmpset, td->td_siglist); SIGSETNAND(tmpset, td->td_sigmask); mtx_lock(&p->p_sigacts->ps_mtx); SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore); mtx_unlock(&p->p_sigacts->ps_mtx); if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) && nfs_sig_pending(tmpset)) { PROC_UNLOCK(p); return (EINTR); } PROC_UNLOCK(p); return (0); } static int nfs_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } static void nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg, int error, int flags) { if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state |= NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 0); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else mtx_unlock(&nmp->nm_mtx); nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg, int flags, int tprintfmsg) { if (nmp == NULL) return; if (tprintfmsg) { nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); } mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESP, 1); } else mtx_unlock(&nmp->nm_mtx); mtx_lock(&nmp->nm_mtx); if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else mtx_unlock(&nmp->nm_mtx); } Index: projects/pnfs-planb-server/sys/fs/nfsclient/nfsmount.h =================================================================== --- projects/pnfs-planb-server/sys/fs/nfsclient/nfsmount.h (revision 334966) +++ projects/pnfs-planb-server/sys/fs/nfsclient/nfsmount.h (revision 334967) @@ -1,148 +1,150 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCLIENT_NFSMOUNT_H_ #define _NFSCLIENT_NFSMOUNT_H_ #include /* * Mount structure. * One allocated on every NFS mount. * Holds NFS specific information for mount. */ struct nfsmount { struct nfsmount_common nm_com; /* Common fields for nlm */ uint32_t nm_privflag; /* Private flags */ int nm_numgrps; /* Max. size of groupslist */ u_char nm_fh[NFSX_FHMAX]; /* File handle of root dir */ int nm_fhsize; /* Size of root file handle */ struct nfssockreq nm_sockreq; /* Socket Info */ int nm_timeouts; /* Request timeouts */ int nm_rsize; /* Max size of read rpc */ int nm_wsize; /* Max size of write rpc */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ int nm_wcommitsize; /* Max size of commit for write */ int nm_acdirmin; /* Directory attr cache min lifetime */ int nm_acdirmax; /* Directory attr cache max lifetime */ int nm_acregmin; /* Reg file attr cache min lifetime */ int nm_acregmax; /* Reg file attr cache max lifetime */ u_char nm_verf[NFSX_VERF]; /* write verifier */ TAILQ_HEAD(, buf) nm_bufq; /* async io buffer queue */ short nm_bufqlen; /* number of buffers in queue */ short nm_bufqwant; /* process wants to add to the queue */ int nm_bufqiods; /* number of iods processing queue */ u_int64_t nm_maxfilesize; /* maximum file size */ int nm_tprintf_initial_delay; /* initial delay */ int nm_tprintf_delay; /* interval for messages */ int nm_nametimeo; /* timeout for +ve entries (sec) */ int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ TAILQ_HEAD(, nfsclds) nm_sess; /* Session(s) for NFSv4.1. */ struct nfsclclient *nm_clp; uid_t nm_uid; /* Uid for SetClientID etc. */ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ int nm_minorvers; /* Minor version # for NFSv4 */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ u_char nm_name[1]; /* malloc'd actual len of krbname + dirpath */ }; #define nm_nam nm_sockreq.nr_nam #define nm_sotype nm_sockreq.nr_sotype #define nm_so nm_sockreq.nr_so #define nm_soflags nm_sockreq.nr_soflags #define nm_soproto nm_sockreq.nr_soproto #define nm_client nm_sockreq.nr_client #define nm_krbname nm_name #define nm_mtx nm_com.nmcom_mtx #define nm_flag nm_com.nmcom_flag #define nm_state nm_com.nmcom_state #define nm_mountp nm_com.nmcom_mountp #define nm_timeo nm_com.nmcom_timeo #define nm_retry nm_com.nmcom_retry #define nm_hostname nm_com.nmcom_hostname #define nm_getinfo nm_com.nmcom_getinfo #define nm_vinvalbuf nm_com.nmcom_vinvalbuf /* Private flags. */ #define NFSMNTP_FORCEDISM 0x00000001 #define NFSMNTP_CANCELRPCS 0x00000002 #define NFSMNT_DIRPATH(m) (&((m)->nm_name[(m)->nm_krbnamelen + 1])) #define NFSMNT_SRVKRBNAME(m) \ (&((m)->nm_name[(m)->nm_krbnamelen + (m)->nm_dirpathlen + 2])) #if defined(_KERNEL) /* * Convert mount ptr to nfsmount ptr. */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) /* * Get a pointer to the MDS session, which is always the first element * in the list. * This macro can only be safely used when the NFSLOCKMNT() lock is held. * The inline function can be used when the lock isn't held. */ #define NFSMNT_MDSSESSION(m) (&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess)) static __inline struct nfsclsession * nfsmnt_mdssession(struct nfsmount *nmp) { struct nfsclsession *tsep; + tsep = NULL; mtx_lock(&nmp->nm_mtx); - tsep = NFSMNT_MDSSESSION(nmp); + if (TAILQ_FIRST(&nmp->nm_sess) != NULL) + tsep = NFSMNT_MDSSESSION(nmp); mtx_unlock(&nmp->nm_mtx); return (tsep); } #ifndef NFS_DEFAULT_NAMETIMEO #define NFS_DEFAULT_NAMETIMEO 60 #endif #ifndef NFS_DEFAULT_NEGNAMETIMEO #define NFS_DEFAULT_NEGNAMETIMEO 60 #endif #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSMOUNT_H_ */ Index: projects/pnfs-planb-server/sys/i386/i386/vm86bios.s =================================================================== --- projects/pnfs-planb-server/sys/i386/i386/vm86bios.s (revision 334966) +++ projects/pnfs-planb-server/sys/i386/i386/vm86bios.s (revision 334967) @@ -1,169 +1,170 @@ /*- * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include /* miscellaneous asm macros */ #include #include "assym.inc" #define SCR_NEWPTD PCB_ESI /* readability macros */ #define SCR_VMFRAME PCB_EBP /* see vm86.c for explanation */ #define SCR_STACK PCB_ESP #define SCR_PGTABLE PCB_EBX #define SCR_ARGFRAME PCB_EIP #define SCR_TSS0 PCB_VM86 #define SCR_TSS1 (PCB_VM86+4) .data ALIGN_DATA .globl vm86pcb vm86pcb: .long 0 .text /* * vm86_bioscall(struct trapframe_vm86 *vm86) */ ENTRY(vm86_bioscall) movl vm86pcb,%edx /* scratch data area */ movl 4(%esp),%eax movl %eax,SCR_ARGFRAME(%edx) /* save argument pointer */ pushl %ebx pushl %ebp pushl %esi pushl %edi pushl %gs movl PCPU(CURTHREAD),%ecx cmpl %ecx,PCPU(FPCURTHREAD) /* do we need to save fp? */ jne 1f pushl %edx movl TD_PCB(%ecx),%ecx pushl PCB_SAVEFPU(%ecx) - call npxsave + movl $npxsave,%eax + call *%eax addl $4,%esp popl %edx /* recover our pcb */ 1: movl SCR_VMFRAME(%edx),%ebx /* target frame location */ movl %ebx,%edi /* destination */ movl SCR_ARGFRAME(%edx),%esi /* source (set on entry) */ movl $VM86_FRAMESIZE/4,%ecx /* sizeof(struct vm86frame)/4 */ cld rep movsl /* copy frame to new stack */ movl PCPU(CURPCB),%eax pushl %eax /* save curpcb */ movl %edx,PCPU(CURPCB) /* set curpcb to vm86pcb */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl 0(%ebx),%eax movl %eax,SCR_TSS0(%edx) /* save first word */ movl 4(%ebx),%eax andl $~0x200, %eax /* flip 386BSY -> 386TSS */ movl %eax,SCR_TSS1(%edx) /* save second word */ movl PCB_EXT(%edx),%edi /* vm86 tssd entry */ movl 0(%edi),%eax movl %eax,0(%ebx) movl 4(%edi),%eax movl %eax,4(%ebx) movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si movl %cr3,%eax pushl %eax /* save address space */ movl IdlePTD,%ecx /* va (and pa) of Idle PTD */ movl %ecx,%ebx movl 0(%ebx),%eax pushl %eax /* old ptde != 0 when booting */ pushl %ebx /* keep for reuse */ movl %esp,SCR_STACK(%edx) /* save current stack location */ movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ #if defined(PAE) || defined(PAE_TABLES) movl IdlePDPT,%ecx #endif movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ pushl %esp movl $vm86_prepcall, %eax call *%eax /* finish setup */ add $4, %esp /* * Return via doreti */ MEXITCOUNT jmp doreti /* * vm86_biosret(struct trapframe_vm86 *vm86) */ ENTRY(vm86_biosret) movl vm86pcb,%edx /* data area */ movl 4(%esp),%esi /* source */ movl SCR_ARGFRAME(%edx),%edi /* destination */ movl $VM86_FRAMESIZE/4,%ecx /* size */ cld rep movsl /* copy frame to original frame */ movl SCR_STACK(%edx),%esp /* back to old stack */ popl %ebx /* saved va of Idle PTD */ popl %eax movl %eax,0(%ebx) /* restore old pte */ popl %eax movl %eax,%cr3 /* install old page table */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl SCR_TSS0(%edx),%eax movl %eax,0(%ebx) /* restore first word */ movl SCR_TSS1(%edx),%eax movl %eax,4(%ebx) /* restore second word */ movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si popl PCPU(CURPCB) /* restore curpcb/curproc */ movl SCR_ARGFRAME(%edx),%edx /* original stack frame */ movl TF_TRAPNO(%edx),%eax /* return (trapno) */ popl %gs popl %edi popl %esi popl %ebp popl %ebx ret /* back to our normal program */ Index: projects/pnfs-planb-server/sys/kern/makesyscalls.sh =================================================================== --- projects/pnfs-planb-server/sys/kern/makesyscalls.sh (revision 334966) +++ projects/pnfs-planb-server/sys/kern/makesyscalls.sh (revision 334967) @@ -1,738 +1,738 @@ #! /bin/sh - # @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93 # $FreeBSD$ set -e # name of compat options: compat=COMPAT_43 compat4=COMPAT_FREEBSD4 compat6=COMPAT_FREEBSD6 compat7=COMPAT_FREEBSD7 compat10=COMPAT_FREEBSD10 compat11=COMPAT_FREEBSD11 # output files: sysnames="syscalls.c" sysproto="../sys/sysproto.h" sysproto_h=_SYS_SYSPROTO_H_ syshdr="../sys/syscall.h" sysmk="../sys/syscall.mk" syssw="init_sysent.c" syscallprefix="SYS_" switchname="sysent" namesname="syscallnames" systrace="systrace_args.c" # tmp files: sysaue="sysent.aue.$$" sysdcl="sysent.dcl.$$" syscompat="sysent.compat.$$" syscompatdcl="sysent.compatdcl.$$" syscompat4="sysent.compat4.$$" syscompat4dcl="sysent.compat4dcl.$$" syscompat6="sysent.compat6.$$" syscompat6dcl="sysent.compat6dcl.$$" syscompat7="sysent.compat7.$$" syscompat7dcl="sysent.compat7dcl.$$" syscompat10="sysent.compat10.$$" syscompat10dcl="sysent.compat10dcl.$$" syscompat11="sysent.compat11.$$" syscompat11dcl="sysent.compat11dcl.$$" sysent="sysent.switch.$$" sysinc="sysinc.switch.$$" sysarg="sysarg.switch.$$" sysprotoend="sysprotoend.$$" systracetmp="systrace.$$" systraceret="systraceret.$$" if [ -r capabilities.conf ]; then - capenabled=`cat capabilities.conf | grep -v "^#" | grep -v "^$"` + capenabled=`egrep -v '^#|^$' capabilities.conf` capenabled=`echo $capenabled | sed 's/ /,/g'` else capenabled="" fi trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0 touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret case $# in 0) echo "usage: $0 input-file " 1>&2 exit 1 ;; esac if [ -n "$2" ]; then . $2 fi sed -e ' :join /\\$/{a\ N s/\\\n// b join } 2,${ /^#/!s/\([{}()*,]\)/ \1 /g } ' < $1 | awk " BEGIN { sysaue = \"$sysaue\" sysdcl = \"$sysdcl\" sysproto = \"$sysproto\" sysprotoend = \"$sysprotoend\" sysproto_h = \"$sysproto_h\" syscompat = \"$syscompat\" syscompatdcl = \"$syscompatdcl\" syscompat4 = \"$syscompat4\" syscompat4dcl = \"$syscompat4dcl\" syscompat6 = \"$syscompat6\" syscompat6dcl = \"$syscompat6dcl\" syscompat7 = \"$syscompat7\" syscompat7dcl = \"$syscompat7dcl\" syscompat10 = \"$syscompat10\" syscompat10dcl = \"$syscompat10dcl\" syscompat11 = \"$syscompat11\" syscompat11dcl = \"$syscompat11dcl\" sysent = \"$sysent\" syssw = \"$syssw\" sysinc = \"$sysinc\" sysarg = \"$sysarg\" sysnames = \"$sysnames\" syshdr = \"$syshdr\" sysmk = \"$sysmk\" systrace = \"$systrace\" systracetmp = \"$systracetmp\" systraceret = \"$systraceret\" compat = \"$compat\" compat4 = \"$compat4\" compat6 = \"$compat6\" compat7 = \"$compat7\" compat10 = \"$compat10\" compat11 = \"$compat11\" syscallprefix = \"$syscallprefix\" switchname = \"$switchname\" namesname = \"$namesname\" infile = \"$1\" capenabled_string = \"$capenabled\" "' split(capenabled_string, capenabled, ","); printf "\n/* The casts are bogus but will do for now. */\n" > sysent printf "struct sysent %s[] = {\n",switchname > sysent printf "/*\n * System call switch table.\n *\n" > syssw printf " * DO NOT EDIT-- this file is automatically generated.\n" > syssw printf " * $%s$\n", "FreeBSD" > syssw printf " */\n\n" > syssw printf "/*\n * System call prototypes.\n *\n" > sysarg printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysarg printf " * $%s$\n", "FreeBSD" > sysarg printf " */\n\n" > sysarg printf "#ifndef %s\n", sysproto_h > sysarg printf "#define\t%s\n\n", sysproto_h > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n" > sysarg printf "#include \n\n" > sysarg printf "#include \n\n" > sysarg printf "struct proc;\n\n" > sysarg printf "struct thread;\n\n" > sysarg printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) ? \\\n" > sysarg printf "\t\t0 : sizeof(register_t) - sizeof(t))\n\n" > sysarg printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg printf "#define\tPADL_(t)\t0\n" > sysarg printf "#define\tPADR_(t)\tPAD_(t)\n" > sysarg printf "#else\n" > sysarg printf "#define\tPADL_(t)\tPAD_(t)\n" > sysarg printf "#define\tPADR_(t)\t0\n" > sysarg printf "#endif\n\n" > sysarg printf "\n#ifdef %s\n\n", compat > syscompat printf "\n#ifdef %s\n\n", compat4 > syscompat4 printf "\n#ifdef %s\n\n", compat6 > syscompat6 printf "\n#ifdef %s\n\n", compat7 > syscompat7 printf "\n#ifdef %s\n\n", compat10 > syscompat10 printf "\n#ifdef %s\n\n", compat11 > syscompat11 printf "/*\n * System call names.\n *\n" > sysnames printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames printf " * $%s$\n", "FreeBSD" > sysnames printf " */\n\n" > sysnames printf "const char *%s[] = {\n", namesname > sysnames printf "/*\n * System call numbers.\n *\n" > syshdr printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr printf " * $%s$\n", "FreeBSD" > syshdr printf " */\n\n" > syshdr printf "# FreeBSD system call object files.\n" > sysmk printf "# DO NOT EDIT-- this file is automatically generated.\n" > sysmk printf "# $%s$\n", "FreeBSD" > sysmk printf "MIASM = " > sysmk printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace printf " * $%s$\n", "FreeBSD" > systrace printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace printf "\tint64_t *iarg = (int64_t *) uarg;\n" > systrace printf "\tswitch (sysnum) {\n" > systrace printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp printf "\tswitch (sysnum) {\n" > systracetmp printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret printf "\tswitch (sysnum) {\n" > systraceret } NR == 1 { next } NF == 0 || $1 ~ /^;/ { next } $1 ~ /^#[ ]*include/ { print > sysinc next } $1 ~ /^#[ ]*if/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret savesyscall = syscall next } $1 ~ /^#[ ]*else/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret syscall = savesyscall next } $1 ~ /^#/ { print > sysent print > sysdcl print > sysarg print > syscompat print > syscompat4 print > syscompat6 print > syscompat7 print > syscompat10 print > syscompat11 print > sysnames print > systrace print > systracetmp print > systraceret next } # Returns true if the type "name" is the first flag in the type field function type(name, flags, n) { n = split($3, flags, /\|/) return (n > 0 && flags[1] == name) } # Returns true if the flag "name" is set in the type field function flag(name, flags, i, n) { n = split($3, flags, /\|/) for (i = 1; i <= n; i++) if (flags[i] == name) return 1 return 0 } { n = split($1, syscall_range, /-/) if (n == 1) { syscall_range[2] = syscall_range[1] } else if (n == 2) { if (!type("UNIMPL")) { printf "%s: line %d: range permitted only with UNIMPL\n", infile, NR exit 1 } } else { printf "%s: line %d: invalid syscall number or range %s\n", infile, NR, $1 exit 1 } } syscall != syscall_range[1] { printf "%s: line %d: syscall number out of sync at %d\n", infile, NR, syscall printf "line is:\n" print exit 1 } function align_sysent_comment(column) { printf("\t") > sysent column = column + 8 - column % 8 while (column < 56) { printf("\t") > sysent column = column + 8 } } function parserr(was, wanted) { printf "%s: line %d: unexpected %s (expected %s)\n", infile, NR, was, wanted exit 1 } function parseline() { f=4 # toss number, type, audit event argc= 0; argssize = "0" thr_flag = "SY_THR_STATIC" if (flag("NOTSTATIC")) { thr_flag = "SY_THR_ABSENT" } if ($NF != "}") { funcalias=$(NF-2) argalias=$(NF-1) rettype=$NF end=NF-3 } else { funcalias="" argalias="" rettype="int" end=NF } if (flag("NODEF")) { auditev="AUE_NULL" funcname=$4 argssize = "AS(" $6 ")" return } if ($f != "{") parserr($f, "{") f++ if ($end != "}") parserr($end, "}") end-- if ($end != ";") parserr($end, ";") end-- if ($end != ")") parserr($end, ")") end-- syscallret=$f f++ funcname=$f # # We now know the func name, so define a flags field for it. # Do this before any other processing as we may return early # from it. # for (cap in capenabled) { if (funcname == capenabled[cap]) { flags = "SYF_CAPENABLED"; break; } } if (funcalias == "") funcalias = funcname if (argalias == "") { argalias = funcname "_args" if (flag("COMPAT")) argalias = "o" argalias if (flag("COMPAT4")) argalias = "freebsd4_" argalias if (flag("COMPAT6")) argalias = "freebsd6_" argalias if (flag("COMPAT7")) argalias = "freebsd7_" argalias if (flag("COMPAT10")) argalias = "freebsd10_" argalias if (flag("COMPAT11")) argalias = "freebsd11_" argalias } f++ if ($f != "(") parserr($f, ")") f++ if (f == end) { if ($f != "void") parserr($f, "argument definition") return } while (f <= end) { argc++ argtype[argc]="" oldf="" while (f < end && $(f+1) != ",") { if (argtype[argc] != "" && oldf != "*") argtype[argc] = argtype[argc]" "; argtype[argc] = argtype[argc]$f; oldf = $f; f++ } if (argtype[argc] == "") parserr($f, "argument definition") # The parser adds space around parens. # Remove it from annotations. gsub(/ \( /, "(", argtype[argc]); gsub(/ \)/, ")", argtype[argc]); #remove annotations gsub(/_In[^ ]*[_)] /, "", argtype[argc]); gsub(/_Out[^ ]*[_)] /, "", argtype[argc]); argname[argc]=$f; f += 2; # skip name, and any comma } if (argc != 0) argssize = "AS(" argalias ")" } { comment = $4 if (NF < 7) for (i = 5; i <= NF; i++) comment = comment " " $i } # # The AUE_ audit event identifier. # { auditev = $2; } # # The flags, if any. # { flags = "0"; } type("STD") || type("NODEF") || type("NOARGS") || type("NOPROTO") \ || type("NOSTD") { parseline() printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret if (argc > 0) { printf("\t\tswitch(ndx) {\n") > systracetmp printf("\t\tstruct %s *p = params;\n", argalias) > systrace for (i = 1; i <= argc; i++) { arg = argtype[i] sub("__restrict$", "", arg) if (index(arg, "*") > 0) printf("\t\tcase %d:\n\t\t\tp = \"userland %s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp else printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp if (index(arg, "*") > 0 || arg == "caddr_t") printf("\t\tuarg[%d] = (intptr_t) p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else if (arg == "union l_semun") printf("\t\tuarg[%d] = p->%s.buf; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else if (substr(arg, 1, 1) == "u" || arg == "size_t") printf("\t\tuarg[%d] = p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace else printf("\t\tiarg[%d] = p->%s; /* %s */\n", \ i - 1, \ argname[i], arg) > systrace } printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret printf("\t\t\tp = \"%s\";\n", syscallret) > systraceret printf("\t\tbreak;\n") > systraceret } printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace printf("\t\tbreak;\n") > systracetmp if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \ !flag("NODEF")) { printf("struct %s {\n", argalias) > sysarg for (i = 1; i <= argc; i++) printf("\tchar %s_l_[PADL_(%s)]; " \ "%s %s; char %s_r_[PADR_(%s)];\n", argname[i], argtype[i], argtype[i], argname[i], argname[i], argtype[i]) > sysarg printf("};\n") > sysarg } else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF")) printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg if (!flag("NOPROTO") && !flag("NODEF")) { if (funcname == "nosys" || funcname == "lkmnosys" || funcname == "sysarch" || funcname ~ /^freebsd/ || funcname ~ /^linux/ || funcname ~ /^ibcs2/ || funcname ~ /^xenix/ || funcname ~ /^cloudabi/) { printf("%s\t%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } else { printf("%s\tsys_%s(struct thread *, struct %s *)", rettype, funcname, argalias) > sysdcl } printf(";\n") > sysdcl printf("#define\t%sAUE_%s\t%s\n", syscallprefix, funcalias, auditev) > sysaue } printf("\t{ %s, (sy_call_t *)", argssize) > sysent column = 8 + 2 + length(argssize) + 15 if (flag("NOSTD")) { printf("lkmressys, AUE_NULL, NULL, 0, 0, %s, SY_THR_ABSENT },", flags) > sysent column = column + length("lkmressys") + length("AUE_NULL") + 3 } else { if (funcname == "nosys" || funcname == "sysarch" || funcname == "lkmnosys" || funcname ~ /^freebsd/ || funcname ~ /^linux/ || funcname ~ /^ibcs2/ || funcname ~ /^xenix/ || funcname ~ /^cloudabi/) { printf("%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 } else { printf("sys_%s, %s, NULL, 0, 0, %s, %s },", funcname, auditev, flags, thr_flag) > sysent column = column + length(funcname) + length(auditev) + length(flags) + 3 + 4 } } align_sysent_comment(column) printf("/* %d = %s */\n", syscall, funcalias) > sysent printf("\t\"%s\",\t\t\t/* %d = %s */\n", funcalias, syscall, funcalias) > sysnames if (!flag("NODEF")) { printf("#define\t%s%s\t%d\n", syscallprefix, funcalias, syscall) > syshdr printf(" \\\n\t%s.o", funcalias) > sysmk } syscall++ next } type("COMPAT") || type("COMPAT4") || type("COMPAT6") || \ type("COMPAT7") || type("COMPAT10") || type("COMPAT11") { if (flag("COMPAT")) { ncompat++ out = syscompat outdcl = syscompatdcl wrap = "compat" prefix = "o" descr = "old" } else if (flag("COMPAT4")) { ncompat4++ out = syscompat4 outdcl = syscompat4dcl wrap = "compat4" prefix = "freebsd4_" descr = "freebsd4" } else if (flag("COMPAT6")) { ncompat6++ out = syscompat6 outdcl = syscompat6dcl wrap = "compat6" prefix = "freebsd6_" descr = "freebsd6" } else if (flag("COMPAT7")) { ncompat7++ out = syscompat7 outdcl = syscompat7dcl wrap = "compat7" prefix = "freebsd7_" descr = "freebsd7" } else if (flag("COMPAT10")) { ncompat10++ out = syscompat10 outdcl = syscompat10dcl wrap = "compat10" prefix = "freebsd10_" descr = "freebsd10" } else if (flag("COMPAT11")) { ncompat11++ out = syscompat11 outdcl = syscompat11dcl wrap = "compat11" prefix = "freebsd11_" descr = "freebsd11" } parseline() if (argc != 0 && !flag("NOARGS") && !flag("NOPROTO") && \ !flag("NODEF")) { printf("struct %s {\n", argalias) > out for (i = 1; i <= argc; i++) printf("\tchar %s_l_[PADL_(%s)]; %s %s; " \ "char %s_r_[PADR_(%s)];\n", argname[i], argtype[i], argtype[i], argname[i], argname[i], argtype[i]) > out printf("};\n") > out } else if (!flag("NOARGS") && !flag("NOPROTO") && !flag("NODEF")) printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg if (!flag("NOPROTO") && !flag("NODEF")) { printf("%s\t%s%s(struct thread *, struct %s *);\n", rettype, prefix, funcname, argalias) > outdcl printf("#define\t%sAUE_%s%s\t%s\n", syscallprefix, prefix, funcname, auditev) > sysaue } if (flag("NOSTD")) { printf("\t{ %s, (sy_call_t *)%s, %s, NULL, 0, 0, 0, SY_THR_ABSENT },", "0", "lkmressys", "AUE_NULL") > sysent align_sysent_comment(8 + 2 + length("0") + 15 + \ length("lkmressys") + length("AUE_NULL") + 3) } else { printf("\t{ %s(%s,%s), %s, NULL, 0, 0, %s, %s },", wrap, argssize, funcname, auditev, flags, thr_flag) > sysent align_sysent_comment(8 + 9 + length(argssize) + 1 + \ length(funcname) + length(auditev) + \ length(flags) + 4) } printf("/* %d = %s %s */\n", syscall, descr, funcalias) > sysent printf("\t\"%s.%s\",\t\t/* %d = %s %s */\n", wrap, funcalias, syscall, descr, funcalias) > sysnames # Do not provide freebsdN_* symbols in libc for < FreeBSD 7 if (flag("COMPAT") || flag("COMPAT4") || flag("COMPAT6")) { printf("\t\t\t\t/* %d is %s %s */\n", syscall, descr, funcalias) > syshdr } else if (!flag("NODEF")) { printf("#define\t%s%s%s\t%d\n", syscallprefix, prefix, funcalias, syscall) > syshdr printf(" \\\n\t%s%s.o", prefix, funcalias) > sysmk } syscall++ next } type("OBSOL") { printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },") > sysent align_sysent_comment(34) printf("/* %d = obsolete %s */\n", syscall, comment) > sysent printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", $4, syscall, comment) > sysnames printf("\t\t\t\t/* %d is obsolete %s */\n", syscall, comment) > syshdr syscall++ next } type("UNIMPL") { while (syscall <= syscall_range[2]) { printf("\t{ 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT },\t\t\t/* %d = %s */\n", syscall, comment) > sysent printf("\t\"#%d\",\t\t\t/* %d = %s */\n", syscall, syscall, comment) > sysnames syscall++ } next } { printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $3 exit 1 } END { printf "\n#define AS(name) (sizeof(struct name) / sizeof(register_t))\n" > sysinc if (ncompat != 0) { printf "\n#ifdef %s\n", compat > sysinc printf "#define compat(n, name) n, (sy_call_t *)__CONCAT(o,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat4 != 0) { printf "\n#ifdef %s\n", compat4 > sysinc printf "#define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat4(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat6 != 0) { printf "\n#ifdef %s\n", compat6 > sysinc printf "#define compat6(n, name) n, (sy_call_t *)__CONCAT(freebsd6_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat6(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat7 != 0) { printf "\n#ifdef %s\n", compat7 > sysinc printf "#define compat7(n, name) n, (sy_call_t *)__CONCAT(freebsd7_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat7(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat10 != 0) { printf "\n#ifdef %s\n", compat10 > sysinc printf "#define compat10(n, name) n, (sy_call_t *)__CONCAT(freebsd10_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat10(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } if (ncompat11 != 0) { printf "\n#ifdef %s\n", compat11 > sysinc printf "#define compat11(n, name) n, (sy_call_t *)__CONCAT(freebsd11_,name)\n" > sysinc printf "#else\n" > sysinc printf "#define compat11(n, name) 0, (sy_call_t *)nosys\n" > sysinc printf "#endif\n" > sysinc } printf("\n#endif /* %s */\n\n", compat) > syscompatdcl printf("\n#endif /* %s */\n\n", compat4) > syscompat4dcl printf("\n#endif /* %s */\n\n", compat6) > syscompat6dcl printf("\n#endif /* %s */\n\n", compat7) > syscompat7dcl printf("\n#endif /* %s */\n\n", compat10) > syscompat10dcl printf("\n#endif /* %s */\n\n", compat11) > syscompat11dcl printf("\n#undef PAD_\n") > sysprotoend printf("#undef PADL_\n") > sysprotoend printf("#undef PADR_\n") > sysprotoend printf("\n#endif /* !%s */\n", sysproto_h) > sysprotoend printf("\n") > sysmk printf("};\n") > sysent printf("};\n") > sysnames printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall) \ > syshdr printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret } ' cat $sysinc $sysent >> $syssw cat $sysarg $sysdcl \ $syscompat $syscompatdcl \ $syscompat4 $syscompat4dcl \ $syscompat6 $syscompat6dcl \ $syscompat7 $syscompat7dcl \ $syscompat10 $syscompat10dcl \ $syscompat11 $syscompat11dcl \ $sysaue $sysprotoend > $sysproto cat $systracetmp >> $systrace cat $systraceret >> $systrace Index: projects/pnfs-planb-server/sys/kern/uipc_socket.c =================================================================== --- projects/pnfs-planb-server/sys/kern/uipc_socket.c (revision 334966) +++ projects/pnfs-planb-server/sys/kern/uipc_socket.c (revision 334967) @@ -1,4141 +1,4140 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. * Copyright (c) 2004 The FreeBSD Foundation * Copyright (c) 2004-2008 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 */ /* * Comments on the socket life cycle: * * soalloc() sets of socket layer state for a socket, called only by * socreate() and sonewconn(). Socket layer private. * * sodealloc() tears down socket layer state for a socket, called only by * sofree() and sonewconn(). Socket layer private. * * pru_attach() associates protocol layer state with an allocated socket; * called only once, may fail, aborting socket allocation. This is called * from socreate() and sonewconn(). Socket layer private. * * pru_detach() disassociates protocol layer state from an attached socket, * and will be called exactly once for sockets in which pru_attach() has * been successfully called. If pru_attach() returned an error, * pru_detach() will not be called. Socket layer private. * * pru_abort() and pru_close() notify the protocol layer that the last * consumer of a socket is starting to tear down the socket, and that the * protocol should terminate the connection. Historically, pru_abort() also * detached protocol state from the socket state, but this is no longer the * case. * * socreate() creates a socket and attaches protocol state. This is a public * interface that may be used by socket layer consumers to create new * sockets. * * sonewconn() creates a socket and attaches protocol state. This is a * public interface that may be used by protocols to create new sockets when * a new connection is received and will be available for accept() on a * listen socket. * * soclose() destroys a socket after possibly waiting for it to disconnect. * This is a public interface that socket consumers should use to close and * release a socket when done with it. * * soabort() destroys a socket without waiting for it to disconnect (used * only for incoming connections that are already partially or fully * connected). This is used internally by the socket layer when clearing * listen socket queues (due to overflow or close on the listen socket), but * is also a public interface protocols may use to abort connections in * their incomplete listen queues should they no longer be required. Sockets * placed in completed connection listen queues should not be aborted for * reasons described in the comment above the soclose() implementation. This * is not a general purpose close routine, and except in the specific * circumstances described here, should not be used. * * sofree() will free a socket and its protocol state if all references on * the socket have been released, and is the public interface to attempt to * free a socket when a reference is removed. This is a socket layer private * interface. * * NOTE: In addition to socreate() and soclose(), which provide a single * socket reference to the consumer to be managed as required, there are two * calls to explicitly manage socket references, soref(), and sorele(). * Currently, these are generally required only when transitioning a socket * from a listen queue to a file descriptor, in order to prevent garbage * collection of the socket at an untimely moment. For a number of reasons, * these interfaces are not preferred, and should be avoided. * * NOTE: With regard to VNETs the general rule is that callers do not set * curvnet. Exceptions to this rule include soabort(), sodisconnect(), * sofree() (and with that sorele(), sotryfree()), as well as sonewconn() * and sorflush(), which are usually called from a pre-set VNET context. * sopoll() currently does not need a VNET context to be set. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include /* for struct knote */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #include #endif static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags); static void so_rdknl_lock(void *); static void so_rdknl_unlock(void *); static void so_rdknl_assert_locked(void *); static void so_rdknl_assert_unlocked(void *); static void so_wrknl_lock(void *); static void so_wrknl_unlock(void *); static void so_wrknl_assert_locked(void *); static void so_wrknl_assert_unlocked(void *); static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); static int filt_soempty(struct knote *kn, long hint); static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id); fo_kqfilter_t soo_kqfilter; static struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, .f_event = filt_soread, }; static struct filterops sowrite_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; static struct filterops soempty_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_soempty, }; so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); #define VNET_SO_ASSERT(so) \ VNET_ASSERT(curvnet != NULL, \ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]); #define V_socket_hhh VNET(socket_hhh) /* * Limit on the number of connections in the listen queue waiting * for accept(2). * NB: The original sysctl somaxconn is still available but hidden * to prevent confusion about the actual purpose of this number. */ static u_int somaxconn = SOMAXCONN; static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS) { int error; int val; val = somaxconn; error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr ) return (error); /* * The purpose of the UINT_MAX / 3 limit, is so that the formula * 3 * so_qlimit / 2 * below, will not overflow. */ if (val < 1 || val > UINT_MAX / 3) return (EINVAL); somaxconn = val; return (0); } SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW, 0, sizeof(int), sysctl_somaxconn, "I", "Maximum listen socket pending connection accept queue size"); SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP, 0, sizeof(int), sysctl_somaxconn, "I", "Maximum listen socket pending connection accept queue size (compat)"); static int numopensockets; SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, &numopensockets, 0, "Number of open sockets"); /* * accept_mtx locks down per-socket fields relating to accept queues. See * socketvar.h for an annotation of the protected fields of struct socket. */ struct mtx accept_mtx; MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF); /* * so_global_mtx protects so_gencnt, numopensockets, and the per-socket * so_gencnt field. */ static struct mtx so_global_mtx; MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); /* * General IPC sysctl name space, used by sockets and a variety of other IPC * types. */ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); /* * Initialize the socket subsystem and set up the socket * memory allocator. */ static uma_zone_t socket_zone; int maxsockets; static void socket_zone_change(void *tag) { maxsockets = uma_zone_set_max(socket_zone, maxsockets); } static void socket_hhook_register(int subtype) { if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype, &V_socket_hhh[subtype], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register hook\n", __func__); } static void socket_hhook_deregister(int subtype) { if (hhook_head_deregister(V_socket_hhh[subtype]) != 0) printf("%s: WARNING: unable to deregister hook\n", __func__); } static void socket_init(void *tag) { socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); maxsockets = uma_zone_set_max(socket_zone, maxsockets); uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, EVENTHANDLER_PRI_FIRST); } SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL); static void socket_vnet_init(const void *unused __unused) { int i; /* We expect a contiguous range */ for (i = 0; i <= HHOOK_SOCKET_LAST; i++) socket_hhook_register(i); } VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_vnet_init, NULL); static void socket_vnet_uninit(const void *unused __unused) { int i; for (i = 0; i <= HHOOK_SOCKET_LAST; i++) socket_hhook_deregister(i); } VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_vnet_uninit, NULL); /* * Initialise maxsockets. This SYSINIT must be run after * tunable_mbinit(). */ static void init_maxsockets(void *ignored) { TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); maxsockets = imax(maxsockets, maxfiles); } SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); /* * Sysctl to get and set the maximum global sockets limit. Notify protocols * of the change so that they can update their dependent limits as required. */ static int sysctl_maxsockets(SYSCTL_HANDLER_ARGS) { int error, newmaxsockets; newmaxsockets = maxsockets; error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); if (error == 0 && req->newptr) { if (newmaxsockets > maxsockets && newmaxsockets <= maxfiles) { maxsockets = newmaxsockets; EVENTHANDLER_INVOKE(maxsockets_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, &maxsockets, 0, sysctl_maxsockets, "IU", "Maximum number of sockets available"); /* * Socket operation routines. These routines are called by the routines in * sys_socket.c or from a system process, and implement the semantics of * socket operations by switching out to the protocol specific routines. */ /* * Get a socket structure from our zone, and initialize it. Note that it * would probably be better to allocate socket and PCB at the same time, but * I'm not convinced that all the protocols can be easily modified to do * this. * * soalloc() returns a socket with a ref count of 0. */ static struct socket * soalloc(struct vnet *vnet) { struct socket *so; so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); if (so == NULL) return (NULL); #ifdef MAC if (mac_socket_init(so, M_NOWAIT) != 0) { uma_zfree(socket_zone, so); return (NULL); } #endif if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) { uma_zfree(socket_zone, so); return (NULL); } /* * The socket locking protocol allows to lock 2 sockets at a time, * however, the first one must be a listening socket. WITNESS lacks * a feature to change class of an existing lock, so we use DUPOK. */ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK); SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); so->so_rcv.sb_sel = &so->so_rdsel; so->so_snd.sb_sel = &so->so_wrsel; sx_init(&so->so_snd.sb_sx, "so_snd_sx"); sx_init(&so->so_rcv.sb_sx, "so_rcv_sx"); TAILQ_INIT(&so->so_snd.sb_aiojobq); TAILQ_INIT(&so->so_rcv.sb_aiojobq); TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so); TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so); #ifdef VIMAGE VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p", __func__, __LINE__, so)); so->so_vnet = vnet; #endif /* We shouldn't need the so_global_mtx */ if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) { /* Do we need more comprehensive error returns? */ uma_zfree(socket_zone, so); return (NULL); } mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; ++numopensockets; #ifdef VIMAGE vnet->vnet_sockcnt++; #endif mtx_unlock(&so_global_mtx); return (so); } /* * Free the storage associated with a socket at the socket layer, tear down * locks, labels, etc. All protocol state is assumed already to have been * torn down (and possibly never set up) by the caller. */ static void sodealloc(struct socket *so) { KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; --numopensockets; /* Could be below, but faster here. */ #ifdef VIMAGE VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p", __func__, __LINE__, so)); so->so_vnet->vnet_sockcnt--; #endif mtx_unlock(&so_global_mtx); #ifdef MAC mac_socket_destroy(so); #endif hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE); crfree(so->so_cred); khelp_destroy_osd(&so->osd); if (SOLISTENING(so)) { if (so->sol_accept_filter != NULL) accept_filt_setopt(so, NULL); } else { if (so->so_rcv.sb_hiwat) (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); if (so->so_snd.sb_hiwat) (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); sx_destroy(&so->so_snd.sb_sx); sx_destroy(&so->so_rcv.sb_sx); SOCKBUF_LOCK_DESTROY(&so->so_snd); SOCKBUF_LOCK_DESTROY(&so->so_rcv); } mtx_destroy(&so->so_lock); uma_zfree(socket_zone, so); } /* * socreate returns a socket with a ref count of 1. The socket should be * closed with soclose(). */ int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td) { struct protosw *prp; struct socket *so; int error; if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); if (prp == NULL) { /* No support for domain. */ if (pffinddomain(dom) == NULL) return (EAFNOSUPPORT); /* No support for socket type. */ if (proto == 0 && type != 0) return (EPROTOTYPE); return (EPROTONOSUPPORT); } if (prp->pr_usrreqs->pru_attach == NULL || prp->pr_usrreqs->pru_attach == pru_attach_notsupp) return (EPROTONOSUPPORT); if (prison_check_af(cred, prp->pr_domain->dom_family) != 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); so = soalloc(CRED_TO_VNET(cred)); if (so == NULL) return (ENOBUFS); so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) so->so_fibnum = td->td_proc->p_fibnum; else so->so_fibnum = 0; so->so_proto = prp; #ifdef MAC mac_socket_create(cred, so); #endif knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock, so_rdknl_assert_locked, so_rdknl_assert_unlocked); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_locked, so_wrknl_assert_unlocked); /* * Auto-sizing of socket buffers is managed by the protocols and * the appropriate flags must be set in the pru_attach function. */ CURVNET_SET(so->so_vnet); error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); CURVNET_RESTORE(); if (error) { sodealloc(so); return (error); } soref(so); *aso = so; return (0); } #ifdef REGRESSION static int regression_sonewconn_earlytest = 1; SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); #endif /* * When an attempt at a new connection is noted on a socket which accepts * connections, sonewconn is called. If the connection is possible (subject * to space constraints, etc.) then we allocate a new structure, properly * linked into the data structure of the original socket, and return this. * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED. * * Note: the ref count on the socket is 0 on return. */ struct socket * sonewconn(struct socket *head, int connstatus) { static struct timeval lastover; static struct timeval overinterval = { 60, 0 }; static int overcount; struct socket *so; u_int over; SOLISTEN_LOCK(head); over = (head->sol_qlen > 3 * head->sol_qlimit / 2); SOLISTEN_UNLOCK(head); #ifdef REGRESSION if (regression_sonewconn_earlytest && over) { #else if (over) { #endif overcount++; if (ratecheck(&lastover, &overinterval)) { log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: " "%i already in queue awaiting acceptance " "(%d occurrences)\n", __func__, head->so_pcb, head->sol_qlen, overcount); overcount = 0; } return (NULL); } VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL", __func__, head)); so = soalloc(head->so_vnet); if (so == NULL) { log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " "limit reached or out of memory\n", __func__, head->so_pcb); return (NULL); } so->so_listen = head; so->so_type = head->so_type; so->so_linger = head->so_linger; so->so_state = head->so_state | SS_NOFDREF; so->so_fibnum = head->so_fibnum; so->so_proto = head->so_proto; so->so_cred = crhold(head->so_cred); #ifdef MAC mac_socket_newconn(head, so); #endif knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock, so_rdknl_assert_locked, so_rdknl_assert_unlocked); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_locked, so_wrknl_assert_unlocked); VNET_SO_ASSERT(head); if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", __func__, head->so_pcb); return (NULL); } if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->sol_sbrcv_lowat; so->so_snd.sb_lowat = head->sol_sbsnd_lowat; so->so_rcv.sb_timeo = head->sol_sbrcv_timeo; so->so_snd.sb_timeo = head->sol_sbsnd_timeo; so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE; so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE; SOLISTEN_LOCK(head); if (head->sol_accept_filter != NULL) connstatus = 0; so->so_state |= connstatus; so->so_options = head->so_options & ~SO_ACCEPTCONN; soref(head); /* A socket on (in)complete queue refs head. */ if (connstatus) { TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list); so->so_qstate = SQ_COMP; head->sol_qlen++; solisten_wakeup(head); /* unlocks */ } else { /* * Keep removing sockets from the head until there's room for * us to insert on the tail. In pre-locking revisions, this * was a simple if(), but as we could be racing with other * threads and soabort() requires dropping locks, we must * loop waiting for the condition to be true. */ while (head->sol_incqlen > head->sol_qlimit) { struct socket *sp; sp = TAILQ_FIRST(&head->sol_incomp); TAILQ_REMOVE(&head->sol_incomp, sp, so_list); head->sol_incqlen--; SOCK_LOCK(sp); sp->so_qstate = SQ_NONE; sp->so_listen = NULL; SOCK_UNLOCK(sp); sorele(head); /* does SOLISTEN_UNLOCK, head stays */ soabort(sp); SOLISTEN_LOCK(head); } TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list); so->so_qstate = SQ_INCOMP; head->sol_incqlen++; SOLISTEN_UNLOCK(head); } return (so); } #ifdef SCTP /* * Socket part of sctp_peeloff(). Detach a new socket from an * association. The new socket is returned with a reference. */ struct socket * sopeeloff(struct socket *head) { struct socket *so; VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", __func__, __LINE__, head)); so = soalloc(head->so_vnet); if (so == NULL) { log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " "limit reached or out of memory\n", __func__, head->so_pcb); return (NULL); } so->so_type = head->so_type; so->so_options = head->so_options; so->so_linger = head->so_linger; so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED; so->so_fibnum = head->so_fibnum; so->so_proto = head->so_proto; so->so_cred = crhold(head->so_cred); #ifdef MAC mac_socket_newconn(head, so); #endif knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock, so_rdknl_assert_locked, so_rdknl_assert_unlocked); knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock, so_wrknl_assert_locked, so_wrknl_assert_unlocked); VNET_SO_ASSERT(head); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", __func__, head->so_pcb); return (NULL); } if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; so->so_snd.sb_lowat = head->so_snd.sb_lowat; so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; so->so_snd.sb_timeo = head->so_snd.sb_timeo; so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; soref(so); return (so); } #endif /* SCTP */ int sobind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td); CURVNET_RESTORE(); return (error); } int sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td); CURVNET_RESTORE(); return (error); } /* * solisten() transitions a socket from a non-listening state to a listening * state, but can also be used to update the listen queue depth on an * existing listen socket. The protocol will call back into the sockets * layer using solisten_proto_check() and solisten_proto() to check and set * socket-layer listen state. Call backs are used so that the protocol can * acquire both protocol and socket layer locks in whatever order is required * by the protocol. * * Protocol implementors are advised to hold the socket lock across the * socket-layer test and set to avoid races at the socket layer. */ int solisten(struct socket *so, int backlog, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td); CURVNET_RESTORE(); return (error); } int solisten_proto_check(struct socket *so) { SOCK_LOCK_ASSERT(so); if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) return (EINVAL); return (0); } void solisten_proto(struct socket *so, int backlog) { int sbrcv_lowat, sbsnd_lowat; u_int sbrcv_hiwat, sbsnd_hiwat; short sbrcv_flags, sbsnd_flags; sbintime_t sbrcv_timeo, sbsnd_timeo; SOCK_LOCK_ASSERT(so); if (SOLISTENING(so)) goto listening; /* * Change this socket to listening state. */ sbrcv_lowat = so->so_rcv.sb_lowat; sbsnd_lowat = so->so_snd.sb_lowat; sbrcv_hiwat = so->so_rcv.sb_hiwat; sbsnd_hiwat = so->so_snd.sb_hiwat; sbrcv_flags = so->so_rcv.sb_flags; sbsnd_flags = so->so_snd.sb_flags; sbrcv_timeo = so->so_rcv.sb_timeo; sbsnd_timeo = so->so_snd.sb_timeo; sbdestroy(&so->so_snd, so); sbdestroy(&so->so_rcv, so); sx_destroy(&so->so_snd.sb_sx); sx_destroy(&so->so_rcv.sb_sx); SOCKBUF_LOCK_DESTROY(&so->so_snd); SOCKBUF_LOCK_DESTROY(&so->so_rcv); #ifdef INVARIANTS bzero(&so->so_rcv, sizeof(struct socket) - offsetof(struct socket, so_rcv)); #endif so->sol_sbrcv_lowat = sbrcv_lowat; so->sol_sbsnd_lowat = sbsnd_lowat; so->sol_sbrcv_hiwat = sbrcv_hiwat; so->sol_sbsnd_hiwat = sbsnd_hiwat; so->sol_sbrcv_flags = sbrcv_flags; so->sol_sbsnd_flags = sbsnd_flags; so->sol_sbrcv_timeo = sbrcv_timeo; so->sol_sbsnd_timeo = sbsnd_timeo; so->sol_qlen = so->sol_incqlen = 0; TAILQ_INIT(&so->sol_incomp); TAILQ_INIT(&so->sol_comp); so->sol_accept_filter = NULL; so->sol_accept_filter_arg = NULL; so->sol_accept_filter_str = NULL; so->sol_upcall = NULL; so->sol_upcallarg = NULL; so->so_options |= SO_ACCEPTCONN; listening: if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->sol_qlimit = backlog; } /* * Wakeup listeners/subsystems once we have a complete connection. * Enters with lock, returns unlocked. */ void solisten_wakeup(struct socket *sol) { if (sol->sol_upcall != NULL) (void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT); else { selwakeuppri(&sol->so_rdsel, PSOCK); KNOTE_LOCKED(&sol->so_rdsel.si_note, 0); } SOLISTEN_UNLOCK(sol); wakeup_one(&sol->sol_comp); } /* * Return single connection off a listening socket queue. Main consumer of * the function is kern_accept4(). Some modules, that do their own accept * management also use the function. * * Listening socket must be locked on entry and is returned unlocked on * return. * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT. */ int solisten_dequeue(struct socket *head, struct socket **ret, int flags) { struct socket *so; int error; SOLISTEN_LOCK_ASSERT(head); while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) && head->so_error == 0) { error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH, "accept", 0); if (error != 0) { SOLISTEN_UNLOCK(head); return (error); } } if (head->so_error) { error = head->so_error; head->so_error = 0; SOLISTEN_UNLOCK(head); return (error); } if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) { SOLISTEN_UNLOCK(head); return (EWOULDBLOCK); } so = TAILQ_FIRST(&head->sol_comp); SOCK_LOCK(so); KASSERT(so->so_qstate == SQ_COMP, ("%s: so %p not SQ_COMP", __func__, so)); soref(so); head->sol_qlen--; so->so_qstate = SQ_NONE; so->so_listen = NULL; TAILQ_REMOVE(&head->sol_comp, so, so_list); if (flags & ACCEPT4_INHERIT) so->so_state |= (head->so_state & SS_NBIO); else so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; SOCK_UNLOCK(so); sorele(head); *ret = so; return (0); } /* * Evaluate the reference count and named references on a socket; if no * references remain, free it. This should be called whenever a reference is * released, such as in sorele(), but also when named reference flags are * cleared in socket or protocol code. * * sofree() will free the socket if: * * - There are no outstanding file descriptor references or related consumers * (so_count == 0). * * - The socket has been closed by user space, if ever open (SS_NOFDREF). * * - The protocol does not have an outstanding strong reference on the socket * (SS_PROTOREF). * * - The socket is not in a completed connection queue, so a process has been * notified that it is present. If it is removed, the user process may * block in accept() despite select() saying the socket was ready. */ void sofree(struct socket *so) { struct protosw *pr = so->so_proto; SOCK_LOCK_ASSERT(so); if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 || (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) { SOCK_UNLOCK(so); return; } if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) { struct socket *sol; sol = so->so_listen; KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so)); /* * To solve race between close of a listening socket and * a socket on its incomplete queue, we need to lock both. * The order is first listening socket, then regular. * Since we don't have SS_NOFDREF neither SS_PROTOREF, this * function and the listening socket are the only pointers * to so. To preserve so and sol, we reference both and then * relock. * After relock the socket may not move to so_comp since it * doesn't have PCB already, but it may be removed from * so_incomp. If that happens, we share responsiblity on * freeing the socket, but soclose() has already removed * it from queue. */ soref(sol); soref(so); SOCK_UNLOCK(so); SOLISTEN_LOCK(sol); SOCK_LOCK(so); if (so->so_qstate == SQ_INCOMP) { KASSERT(so->so_listen == sol, ("%s: so %p migrated out of sol %p", __func__, so, sol)); TAILQ_REMOVE(&sol->sol_incomp, so, so_list); sol->sol_incqlen--; /* This is guarenteed not to be the last. */ refcount_release(&sol->so_count); so->so_qstate = SQ_NONE; so->so_listen = NULL; } else KASSERT(so->so_listen == NULL, ("%s: so %p not on (in)comp with so_listen", __func__, so)); sorele(sol); KASSERT(so->so_count == 1, ("%s: so %p count %u", __func__, so, so->so_count)); so->so_count = 0; } if (SOLISTENING(so)) so->so_error = ECONNABORTED; SOCK_UNLOCK(so); VNET_SO_ASSERT(so); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(so); if (pr->pr_usrreqs->pru_detach != NULL) (*pr->pr_usrreqs->pru_detach)(so); /* * From this point on, we assume that no other references to this * socket exist anywhere else in the stack. Therefore, no locks need * to be acquired or held. * * We used to do a lot of socket buffer and socket locking here, as * well as invoke sorflush() and perform wakeups. The direct call to * dom_dispose() and sbrelease_internal() are an inlining of what was * necessary from sorflush(). * * Notice that the socket buffer and kqueue state are torn down * before calling pru_detach. This means that protocols shold not * assume they can perform socket wakeups, etc, in their detach code. */ if (!SOLISTENING(so)) { sbdestroy(&so->so_snd, so); sbdestroy(&so->so_rcv, so); } seldrain(&so->so_rdsel); seldrain(&so->so_wrsel); knlist_destroy(&so->so_rdsel.si_note); knlist_destroy(&so->so_wrsel.si_note); sodealloc(so); } /* * Close a socket on last file table reference removal. Initiate disconnect * if connected. Free socket when disconnect complete. * * This function will sorele() the socket. Note that soclose() may be called * prior to the ref count reaching zero. The actual socket structure will * not be freed until the ref count reaches zero. */ int soclose(struct socket *so) { struct accept_queue lqueue; bool listening; int error = 0; KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter")); CURVNET_SET(so->so_vnet); funsetown(&so->so_sigio); if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) { if (error == ENOTCONN) error = 0; goto drop; } } if (so->so_options & SO_LINGER) { if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = tsleep(&so->so_timeo, PSOCK | PCATCH, "soclos", so->so_linger * hz); if (error) break; } } } drop: if (so->so_proto->pr_usrreqs->pru_close != NULL) (*so->so_proto->pr_usrreqs->pru_close)(so); if (so->so_dtor != NULL) so->so_dtor(so); SOCK_LOCK(so); if ((listening = (so->so_options & SO_ACCEPTCONN))) { struct socket *sp; TAILQ_INIT(&lqueue); TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list); TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list); so->sol_qlen = so->sol_incqlen = 0; TAILQ_FOREACH(sp, &lqueue, so_list) { SOCK_LOCK(sp); sp->so_qstate = SQ_NONE; sp->so_listen = NULL; SOCK_UNLOCK(sp); /* Guaranteed not to be the last. */ refcount_release(&so->so_count); } } KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; sorele(so); if (listening) { struct socket *sp; TAILQ_FOREACH(sp, &lqueue, so_list) { SOCK_LOCK(sp); if (sp->so_count == 0) { SOCK_UNLOCK(sp); soabort(sp); } else /* sp is now in sofree() */ SOCK_UNLOCK(sp); } } CURVNET_RESTORE(); return (error); } /* * soabort() is used to abruptly tear down a connection, such as when a * resource limit is reached (listen queue depth exceeded), or if a listen * socket is closed while there are sockets waiting to be accepted. * * This interface is tricky, because it is called on an unreferenced socket, * and must be called only by a thread that has actually removed the socket * from the listen queue it was on, or races with other threads are risked. * * This interface will call into the protocol code, so must not be called * with any socket locks held. Protocols do call it while holding their own * recursible protocol mutexes, but this is something that should be subject * to review in the future. */ void soabort(struct socket *so) { /* * In as much as is possible, assert that no references to this * socket are held. This is not quite the same as asserting that the * current thread is responsible for arranging for no references, but * is as close as we can get for now. */ KASSERT(so->so_count == 0, ("soabort: so_count")); KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF")); KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE")); VNET_SO_ASSERT(so); if (so->so_proto->pr_usrreqs->pru_abort != NULL) (*so->so_proto->pr_usrreqs->pru_abort)(so); SOCK_LOCK(so); sofree(so); } int soaccept(struct socket *so, struct sockaddr **nam) { int error; SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); so->so_state &= ~SS_NOFDREF; SOCK_UNLOCK(so); CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); CURVNET_RESTORE(); return (error); } int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (soconnectat(AT_FDCWD, so, nam, td)); } int soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); CURVNET_SET(so->so_vnet); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. This allows * user to disconnect by connecting to, e.g., a null address. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) { error = EISCONN; } else { /* * Prevent accumulated error from previous connection from * biting us. */ so->so_error = 0; if (fd == AT_FDCWD) { error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); } else { error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd, so, nam, td); } } CURVNET_RESTORE(); return (error); } int soconnect2(struct socket *so1, struct socket *so2) { int error; CURVNET_SET(so1->so_vnet); error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); CURVNET_RESTORE(); return (error); } int sodisconnect(struct socket *so) { int error; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) return (EALREADY); VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); return (error); } #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; ssize_t resid; int clen = 0, error, dontroute; KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM")); KASSERT(so->so_proto->pr_flags & PR_ATOMIC, ("sosend_dgram: !PR_ATOMIC")); if (uio != NULL) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. */ if (resid < 0) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection-based * socket if it supports implied connect. Return ENOTCONN if * not connected and no address is supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto out; } } else if (addr == NULL) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) error = ENOTCONN; else error = EDESTADDRREQ; SOCKBUF_UNLOCK(&so->so_snd); goto out; } } /* * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a * problem and need fixing. */ space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; space -= clen; SOCKBUF_UNLOCK(&so->so_snd); if (resid > space) { error = EMSGSIZE; goto out; } if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { /* * Copy the data from userland into a mbuf chain. * If no data is to be copied in, a single empty mbuf * is returned. */ top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); if (top == NULL) { error = EFAULT; /* only possible error */ goto out; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } KASSERT(resid == 0, ("sosend_dgram: resid != 0")); /* * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock * than with. */ if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; SOCK_UNLOCK(so); } /* * XXX all the SBS_CANTSENDMORE checks previously done could be out * of date. We could have received a reset packet in an interrupt or * maybe we slept while doing page faults in uiomove() etc. We could * probably recheck again inside the locking protection here, but * there are probably other places that this also happens. We must * rethink this. */ VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands this flag and * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ (flags & MSG_MORETOCOME) || (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } clen = 0; control = NULL; top = NULL; out: if (top != NULL) m_freem(top); if (control != NULL) m_freem(control); return (error); } /* * Send on a socket. If send must go all at once and message is larger than * send buffering, then hard error. Lock against other senders. If must go * all at once and not enough room now, then inform user that this would * block and do nothing. Otherwise, if nonblocking, send as much as * possible. The data to be sent is described by "uio" if nonzero, otherwise * by the mbuf chain "top" (which must be null if uio is not). Data provided * in mbuf chain must be small enough to send all at once. * * Returns nonzero on error, timeout or signal; callers must check for short * counts if EINTR/ERESTART are returned. Data and control buffers are freed * on return. */ int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; ssize_t resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; if (uio != NULL) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; restart: do { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto release; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto release; } } else if (addr == NULL) { SOCKBUF_UNLOCK(&so->so_snd); if (so->so_proto->pr_flags & PR_CONNREQUIRED) error = ENOTCONN; else error = EDESTADDRREQ; goto release; } } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) { SOCKBUF_UNLOCK(&so->so_snd); error = EMSGSIZE; goto release; } if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { SOCKBUF_UNLOCK(&so->so_snd); error = EWOULDBLOCK; goto release; } error = sbwait(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; goto restart; } SOCKBUF_UNLOCK(&so->so_snd); space -= clen; do { if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { /* * Copy the data from userland into a mbuf * chain. If resid is 0, which can happen * only if we have control to send, then * a single empty mbuf is returned. This * is a workaround to prevent protocol send * methods to panic. */ top = m_uiotombuf(uio, M_WAITOK, space, (atomic ? max_hdr : 0), (atomic ? M_PKTHDR : 0) | ((flags & MSG_EOR) ? M_EOR : 0)); if (top == NULL) { error = EFAULT; /* only possible error */ goto release; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; SOCK_UNLOCK(so); } /* * XXX all the SBS_CANTSENDMORE checks previously * done could be out of date. We could have received * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We * could probably recheck again inside the locking * protection here, but there are probably other * places that this also happens. We must rethink * this. */ VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands * this flag and nothing left to send then use * PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME. */ (flags & MSG_MORETOCOME) || (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } clen = 0; control = NULL; top = NULL; if (error) goto release; } while (resid && space > 0); } while (resid); release: sbunlock(&so->so_snd); out: if (top != NULL) m_freem(top); if (control != NULL) m_freem(control); return (error); } int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { int error; CURVNET_SET(so->so_vnet); if (!SOLISTENING(so)) error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top, control, flags, td); else { m_freem(top); m_freem(control); error = ENOTCONN; } CURVNET_RESTORE(); return (error); } /* * The part of soreceive() that implements reading non-inline out-of-band * data from a socket. For more complete comments, see soreceive(), from * which this code originated. * * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is * unable to return an mbuf chain to the caller. */ static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) { struct protosw *pr = so->so_proto; struct mbuf *m; int error; KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); VNET_SO_ASSERT(so); m = m_get(M_WAITOK, MT_DATA); error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); if (error) goto bad; do { error = uiomove(mtod(m, void *), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: if (m != NULL) m_freem(m); return (error); } /* * Following replacement or removal of the first mbuf on the first mbuf chain * of a socket buffer, push necessary state changes back into the socket * buffer so that other consumers see the values consistently. 'nextrecord' * is the callers locally stored value of the original value of * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. * NOTE: 'nextrecord' may be NULL. */ static __inline void sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) { SOCKBUF_LOCK_ASSERT(sb); /* * First, update for the new value of nextrecord. If necessary, make * it the first record. */ if (sb->sb_mb != NULL) sb->sb_mb->m_nextpkt = nextrecord; else sb->sb_mb = nextrecord; /* * Now update any dependent socket buffer fields to reflect the new * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the * addition of a second clause that takes care of the case where * sb_mb has been updated, but remains the last record. */ if (sb->sb_mb == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (sb->sb_mb->m_nextpkt == NULL) sb->sb_lastrecord = sb->sb_mb; } /* * Implement receive operations on a socket. We depend on the way that * records are added to the sockbuf by sbappend. In particular, each record * (mbufs linked through m_next) must begin with an address if the protocol * so specifies, followed by an optional mbuf or mbufs containing ancillary * data, and then zero or more mbufs of data. In order to allow parallelism * between network receive and copying to user space, as well as avoid * sleeping with a mutex held, we release the socket buffer mutex during the * user space copy. Although the sockbuf is locked, new data may still be * appended, and thus we must maintain consistency of the sockbuf during that * time. * * The caller may receive the data as a single mbuf chain by supplying an * mbuf **mp0 for use in returning the chain. The uio is then used only for * the count in uio_resid. */ int soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, **mp; int flags, error, offset; ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; ssize_t orig_resid = uio->uio_resid; mp = mp0; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp != NULL) *mp = NULL; if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) && uio->uio_resid) { VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, 0); } error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) return (error); restart: SOCKBUF_LOCK(&so->so_rcv); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more (subject * to any timeout) if: * 1. the current count is less than the low water mark, or * 2. MSG_DONTWAIT is not set */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && sbavail(&so->so_rcv) < uio->uio_resid) && sbavail(&so->so_rcv) < so->so_rcv.sb_lowat && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { KASSERT(m != NULL || !sbavail(&so->so_rcv), ("receive: m == %p sbavail == %u", m, sbavail(&so->so_rcv))); if (so->so_error) { if (m != NULL) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) so->so_error = 0; SOCKBUF_UNLOCK(&so->so_rcv); goto release; } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { if (m == NULL) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } else goto dontblock; } for (; m != NULL; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { SOCKBUF_UNLOCK(&so->so_rcv); error = ENOTCONN; goto release; } if (uio->uio_resid == 0) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { SOCKBUF_UNLOCK(&so->so_rcv); error = EWOULDBLOCK; goto release; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); if (error) goto release; goto restart; } dontblock: /* * From this point onward, we maintain 'nextrecord' as a cache of the * pointer to the next record in the socket buffer. We must keep the * various socket buffer pointers and local stack versions of the * pointers in sync, pushing out modifications before dropping the * socket buffer mutex, and re-reading them when picking it up. * * Otherwise, we will race with the network stack appending new data * or records onto the socket buffer by using inconsistent/stale * versions of the field, possibly resulting in socket buffer * corruption. * * By holding the high-level sblock(), we prevent simultaneous * readers from pulling off the front of the socket buffer. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); orig_resid = 0; if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); if (flags & MSG_PEEK) { m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; sockbuf_pushsync(&so->so_rcv, nextrecord); } } /* * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. If MSG_PEEK, we * just copy the data; if !MSG_PEEK, we call into the protocol to * perform externalization (or freeing if controlp == NULL). */ if (m != NULL && m->m_type == MT_CONTROL) { struct mbuf *cm = NULL, *cmn; struct mbuf **cme = &cm; do { if (flags & MSG_PEEK) { if (controlp != NULL) { *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); controlp = &(*controlp)->m_next; } m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m->m_next; m->m_next = NULL; *cme = m; cme = &(*cme)->m_next; m = so->so_rcv.sb_mb; } } while (m != NULL && m->m_type == MT_CONTROL); if ((flags & MSG_PEEK) == 0) sockbuf_pushsync(&so->so_rcv, nextrecord); while (cm != NULL) { cmn = cm->m_next; cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); error = (*pr->pr_domain->dom_externalize) (cm, controlp, flags); SOCKBUF_LOCK(&so->so_rcv); } else if (controlp != NULL) *controlp = cm; else m_freem(cm); if (controlp != NULL) { orig_resid = 0; while (*controlp != NULL) controlp = &(*controlp)->m_next; } cm = cmn; } if (m != NULL) nextrecord = so->so_rcv.sb_mb->m_nextpkt; else nextrecord = so->so_rcv.sb_mb; orig_resid = 0; } if (m != NULL) { if ((flags & MSG_PEEK) == 0) { KASSERT(m->m_nextpkt == nextrecord, ("soreceive: post-control, nextrecord !sync")); if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_mb == m, ("soreceive: post-control, sb_mb!=m")); KASSERT(so->so_rcv.sb_lastrecord == m, ("soreceive: post-control, lastrecord!=m")); } } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; } else { if ((flags & MSG_PEEK) == 0) { KASSERT(so->so_rcv.sb_mb == nextrecord, ("soreceive: sb_mb != nextrecord")); if (so->so_rcv.sb_mb == NULL) { KASSERT(so->so_rcv.sb_lastrecord == NULL, ("soreceive: sb_lastercord != NULL")); } } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * Now continue to read any data mbufs off of the head of the socket * buffer until the read request is satisfied. Note that 'type' is * used to store the type of any mbuf reads that have happened so far * such that soreceive() can stop reading if the type changes, which * causes soreceive() to return only one of regular data and inline * out-of-band data in a single socket receive operation. */ moff = 0; offset = 0; while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0 && error == 0) { /* * If the type of mbuf has changed since the last mbuf * examined ('type'), end the receive operation. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) { if (type != m->m_type) break; } else if (type == MT_OOBDATA) break; else KASSERT(m->m_type == MT_DATA, ("m->m_type == %d", m->m_type)); so->so_rcv.sb_state &= ~SBS_RCVATMARK; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. Otherwise copy * them out via the uio, then free. Sockbuf must be * consistent here (points to current mbuf, it points to next * record) when we drop priority; we must note any additions * to the sockbuf when we block interrupts again. */ if (mp == NULL) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); error = uiomove(mtod(m, char *) + moff, (int)len, uio); SOCKBUF_LOCK(&so->so_rcv); if (error) { /* * The MT_SONAME mbuf has already been removed * from the record, so it is necessary to * remove the data mbufs, if any, to preserve * the invariant in the case of PR_ADDR that * requires MT_SONAME mbufs at the head of * each record. */ if (pr->pr_flags & PR_ATOMIC && ((flags & MSG_PEEK) == 0)) (void)sbdroprecord_locked(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } else uio->uio_resid -= len; SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp != NULL) { m->m_nextpkt = NULL; *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } sockbuf_pushsync(&so->so_rcv, nextrecord); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); } } else { if (flags & MSG_PEEK) moff += len; else { if (mp != NULL) { if (flags & MSG_DONTWAIT) { *mp = m_copym(m, 0, len, M_NOWAIT); if (*mp == NULL) { /* * m_copym() couldn't * allocate an mbuf. * Adjust uio_resid back * (it was adjusted * down by len bytes, * which we didn't end * up "copying" over). */ uio->uio_resid += len; break; } } else { SOCKBUF_UNLOCK(&so->so_rcv); *mp = m_copym(m, 0, len, M_WAITOK); SOCKBUF_LOCK(&so->so_rcv); } } sbcut_locked(&so->so_rcv, len); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_rcv.sb_state |= SBS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), we * must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return with a * short count but without error. Keep sockbuf locked * against other readers. */ while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && !sosendallatonce(so) && nextrecord == NULL) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE) break; /* * Notify the protocol that some data has been * drained before blocking. */ if (pr->pr_flags & PR_WANTRCVD) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * We could receive some data while was notifying * the protocol. Skip blocking in this case. */ if (so->so_rcv.sb_mb == NULL) { error = sbwait(&so->so_rcv); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } m = so->so_rcv.sb_mb; if (m != NULL) nextrecord = m->m_nextpkt; } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m != NULL && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord_locked(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == NULL) { /* * First part is an inline SB_EMPTY_FIXUP(). Second * part makes sure sb_lastrecord is up-to-date if * there is still data in the socket buffer. */ so->so_rcv.sb_mb = nextrecord; if (so->so_rcv.sb_mb == NULL) { so->so_rcv.sb_mbtail = NULL; so->so_rcv.sb_lastrecord = NULL; } else if (nextrecord->m_nextpkt == NULL) so->so_rcv.sb_lastrecord = nextrecord; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * If soreceive() is being done from the socket callback, * then don't need to generate ACK to peer to update window, * since ACK will be generated on return to TCP. */ if (!(flags & MSG_SOCALLBCK) && (pr->pr_flags & PR_WANTRCVD)) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); goto restart; } SOCKBUF_UNLOCK(&so->so_rcv); if (flagsp != NULL) *flagsp |= flags; release: sbunlock(&so->so_rcv); return (error); } /* * Optimized version of soreceive() for stream (TCP) sockets. - * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled. */ int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int len = 0, error = 0, flags, oresid; struct sockbuf *sb; struct mbuf *m, *n = NULL; /* We only do stream sockets. */ if (so->so_type != SOCK_STREAM) return (EINVAL); if (psa != NULL) *psa = NULL; - if (controlp != NULL) - return (EINVAL); if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; + if (controlp != NULL) + *controlp = NULL; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp0 != NULL) *mp0 = NULL; sb = &so->so_rcv; /* Prevent other readers from entering the socket. */ error = sblock(sb, SBLOCKWAIT(flags)); if (error) goto out; SOCKBUF_LOCK(sb); /* Easy one, no space to copyout anything. */ if (uio->uio_resid == 0) { error = EINVAL; goto out; } oresid = uio->uio_resid; /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { error = ENOTCONN; goto out; } restart: SOCKBUF_LOCK_ASSERT(&so->so_rcv); /* Abort if socket has reported problems. */ if (so->so_error) { if (sbavail(sb) > 0) goto deliver; if (oresid > uio->uio_resid) goto out; error = so->so_error; if (!(flags & MSG_PEEK)) so->so_error = 0; goto out; } /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { if (sbavail(sb) > 0) goto deliver; else goto out; } /* Socket buffer is empty and we shall not block. */ if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } /* Socket buffer got some data that we shall deliver now. */ if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || sbavail(sb) >= sb->sb_lowat || sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat)) goto deliver; /* * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ error = sbwait(sb); if (error) goto out; goto restart; deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); /* Statistics. */ if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { if (*mp0 == NULL) *mp0 = sb->sb_mb; else m_cat(*mp0, sb->sb_mb); for (m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p not available", __func__, m)); len -= m->m_len; uio->uio_resid -= m->m_len; sbfree(sb, m); n = m; } n->m_next = NULL; sb->sb_mb = m; sb->sb_lastrecord = sb->sb_mb; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); } /* Copy the remainder. */ if (len > 0) { KASSERT(sb->sb_mb != NULL, ("%s: len > 0 && sb->sb_mb empty", __func__)); m = m_copym(sb->sb_mb, 0, len, M_NOWAIT); if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else uio->uio_resid -= len; if (*mp0 != NULL) m_cat(*mp0, m); else *mp0 = m; if (*mp0 == NULL) { error = ENOBUFS; goto out; } } } else { /* NB: Must unlock socket buffer as uiomove may sleep. */ SOCKBUF_UNLOCK(sb); error = m_mbuftouio(uio, sb->sb_mb, len); SOCKBUF_LOCK(sb); if (error) goto out; } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); /* * Remove the delivered data from the socket buffer unless we * were only peeking. */ if (!(flags & MSG_PEEK)) { if (len > 0) sbdrop_locked(sb, len); /* Notify protocol that we drained some data. */ if ((so->so_proto->pr_flags & PR_WANTRCVD) && (((flags & MSG_WAITALL) && uio->uio_resid > 0) || !(flags & MSG_SOCALLBCK))) { SOCKBUF_UNLOCK(sb); VNET_SO_ASSERT(so); (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(sb); } } /* * For MSG_WAITALL we may have to loop again and wait for * more data to come in. */ if ((flags & MSG_WAITALL) && uio->uio_resid > 0) goto restart; out: SOCKBUF_LOCK_ASSERT(sb); SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); SOCKBUF_UNLOCK(sb); sbunlock(sb); return (error); } /* * Optimized version of soreceive() for simple datagram cases from userspace. * Unlike in the stream case, we're able to drop a datagram if copyout() * fails, and because we handle datagrams atomically, we don't need to use a * sleep lock to prevent I/O interlacing. */ int soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, *m2; int flags, error; ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; /* * For any complicated cases, fall back to the full * soreceive_generic(). */ if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB)) return (soreceive_generic(so, psa, uio, mp0, controlp, flagsp)); /* * Enforce restrictions on use. */ KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, ("soreceive_dgram: wantrcvd")); KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic")); KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, ("soreceive_dgram: SBS_RCVATMARK")); KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, ("soreceive_dgram: P_CONNREQUIRED")); /* * Loop blocking while waiting for a datagram. */ SOCKBUF_LOCK(&so->so_rcv); while ((m = so->so_rcv.sb_mb) == NULL) { KASSERT(sbavail(&so->so_rcv) == 0, ("soreceive_dgram: sb_mb NULL but sbavail %u", sbavail(&so->so_rcv))); if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_rcv); return (error); } if (so->so_rcv.sb_state & SBS_CANTRCVMORE || uio->uio_resid == 0) { SOCKBUF_UNLOCK(&so->so_rcv); return (0); } if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { SOCKBUF_UNLOCK(&so->so_rcv); return (EWOULDBLOCK); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); return (error); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_lastrecord == m, ("soreceive_dgram: lastrecord != m")); } KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord, ("soreceive_dgram: m_nextpkt != nextrecord")); /* * Pull 'm' and its chain off the front of the packet queue. */ so->so_rcv.sb_mb = NULL; sockbuf_pushsync(&so->so_rcv, nextrecord); /* * Walk 'm's chain and free that many bytes from the socket buffer. */ for (m2 = m; m2 != NULL; m2 = m2->m_next) sbfree(&so->so_rcv, m2); /* * Do a few last checks before we let go of the lock. */ SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); m = m_free(m); } if (m == NULL) { /* XXXRW: Can this happen? */ return (0); } /* * Packet to copyout() is now in 'm' and it is disconnected from the * queue. * * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. We call into the * protocol to perform externalization (or freeing if controlp == * NULL). In some cases there can be only MT_CONTROL mbufs without * MT_DATA mbufs. */ if (m->m_type == MT_CONTROL) { struct mbuf *cm = NULL, *cmn; struct mbuf **cme = &cm; do { m2 = m->m_next; m->m_next = NULL; *cme = m; cme = &(*cme)->m_next; m = m2; } while (m != NULL && m->m_type == MT_CONTROL); while (cm != NULL) { cmn = cm->m_next; cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { error = (*pr->pr_domain->dom_externalize) (cm, controlp, flags); } else if (controlp != NULL) *controlp = cm; else m_freem(cm); if (controlp != NULL) { while (*controlp != NULL) controlp = &(*controlp)->m_next; } cm = cmn; } } KASSERT(m == NULL || m->m_type == MT_DATA, ("soreceive_dgram: !data")); while (m != NULL && uio->uio_resid > 0) { len = uio->uio_resid; if (len > m->m_len) len = m->m_len; error = uiomove(mtod(m, char *), (int)len, uio); if (error) { m_freem(m); return (error); } if (len == m->m_len) m = m_free(m); else { m->m_data += len; m->m_len -= len; } } if (m != NULL) { flags |= MSG_TRUNC; m_freem(m); } if (flagsp != NULL) *flagsp |= flags; return (0); } int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int error; CURVNET_SET(so->so_vnet); if (!SOLISTENING(so)) error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, controlp, flagsp)); else error = ENOTCONN; CURVNET_RESTORE(); return (error); } int soshutdown(struct socket *so, int how) { struct protosw *pr = so->so_proto; int error, soerror_enotconn; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) return (EINVAL); soerror_enotconn = 0; if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) { /* * POSIX mandates us to return ENOTCONN when shutdown(2) is * invoked on a datagram sockets, however historically we would * actually tear socket down. This is known to be leveraged by * some applications to unblock process waiting in recvXXX(2) * by other process that it shares that socket with. Try to meet * both backward-compatibility and POSIX requirements by forcing * ENOTCONN but still asking protocol to perform pru_shutdown(). */ if (so->so_type != SOCK_DGRAM) return (ENOTCONN); soerror_enotconn = 1; } CURVNET_SET(so->so_vnet); if (pr->pr_usrreqs->pru_flush != NULL) (*pr->pr_usrreqs->pru_flush)(so, how); if (how != SHUT_WR) sorflush(so); if (how != SHUT_RD) { error = (*pr->pr_usrreqs->pru_shutdown)(so); wakeup(&so->so_timeo); CURVNET_RESTORE(); return ((error == 0 && soerror_enotconn) ? ENOTCONN : error); } wakeup(&so->so_timeo); CURVNET_RESTORE(); return (soerror_enotconn ? ENOTCONN : 0); } void sorflush(struct socket *so) { struct sockbuf *sb = &so->so_rcv; struct protosw *pr = so->so_proto; struct socket aso; VNET_SO_ASSERT(so); /* * In order to avoid calling dom_dispose with the socket buffer mutex * held, and in order to generally avoid holding the lock for a long * time, we make a copy of the socket buffer and clear the original * (except locks, state). The new socket buffer copy won't have * initialized locks so we can only call routines that won't use or * assert those locks. * * Dislodge threads currently blocked in receive and wait to acquire * a lock against other simultaneous readers before clearing the * socket buffer. Don't let our acquire be interrupted by a signal * despite any existing socket disposition on interruptable waiting. */ socantrcvmore(so); (void) sblock(sb, SBL_WAIT | SBL_NOINTR); /* * Invalidate/clear most of the sockbuf structure, but leave selinfo * and mutex data unchanged. */ SOCKBUF_LOCK(sb); bzero(&aso, sizeof(aso)); aso.so_pcb = so->so_pcb; bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); bzero(&sb->sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); SOCKBUF_UNLOCK(sb); sbunlock(sb); /* * Dispose of special rights and flush the copied socket. Don't call * any unsafe routines (that rely on locks being initialized) on aso. */ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(&aso); sbrelease_internal(&aso.so_rcv, so); } /* * Wrapper for Socket established helper hook. * Parameters: socket, context of the hook point, hook id. */ static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id) { struct socket_hhook_data hhook_data = { .so = so, .hctx = hctx, .m = NULL, .status = 0 }; CURVNET_SET(so->so_vnet); HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd); CURVNET_RESTORE(); /* Ugly but needed, since hhooks return void for now */ return (hhook_data.status); } /* * Perhaps this routine, and sooptcopyout(), below, ought to come in an * additional variant to handle the case where the option value needs to be * some kind of integer, but not a specific size. In addition to their use * here, these functions are also called by the protocol-level pr_ctloutput() * routines. */ int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) { size_t valsize; /* * If the user gives us more than we wanted, we ignore it, but if we * don't get the minimum length the caller wants, we return EINVAL. * On success, sopt->sopt_valsize is set to however much we actually * retrieved. */ if ((valsize = sopt->sopt_valsize) < minlen) return EINVAL; if (valsize > len) sopt->sopt_valsize = valsize = len; if (sopt->sopt_td != NULL) return (copyin(sopt->sopt_val, buf, valsize)); bcopy(sopt->sopt_val, buf, valsize); return (0); } /* * Kernel version of setsockopt(2). * * XXX: optlen is size_t, not socklen_t */ int so_setsockopt(struct socket *so, int level, int optname, void *optval, size_t optlen) { struct sockopt sopt; sopt.sopt_level = level; sopt.sopt_name = optname; sopt.sopt_dir = SOPT_SET; sopt.sopt_val = optval; sopt.sopt_valsize = optlen; sopt.sopt_td = NULL; return (sosetopt(so, &sopt)); } int sosetopt(struct socket *so, struct sockopt *sopt) { int error, optval; struct linger l; struct timeval tv; sbintime_t val; uint32_t val32; #ifdef MAC struct mac extmac; #endif CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput != NULL) { error = (*so->so_proto->pr_ctloutput)(so, sopt); CURVNET_RESTORE(); return (error); } error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { case SO_ACCEPTFILTER: error = accept_filt_setopt(so, sopt); if (error) goto bad; break; case SO_LINGER: error = sooptcopyin(sopt, &l, sizeof l, sizeof l); if (error) goto bad; SOCK_LOCK(so); so->so_linger = l.l_linger; if (l.l_onoff) so->so_options |= SO_LINGER; else so->so_options &= ~SO_LINGER; SOCK_UNLOCK(so); break; case SO_DEBUG: case SO_KEEPALIVE: case SO_DONTROUTE: case SO_USELOOPBACK: case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: case SO_REUSEPORT_LB: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: case SO_NO_DDP: case SO_NO_OFFLOAD: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; SOCK_LOCK(so); if (optval) so->so_options |= sopt->sopt_name; else so->so_options &= ~sopt->sopt_name; SOCK_UNLOCK(so); break; case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } if (((so->so_proto->pr_domain->dom_family == PF_INET) || (so->so_proto->pr_domain->dom_family == PF_INET6) || (so->so_proto->pr_domain->dom_family == PF_ROUTE))) so->so_fibnum = optval; else so->so_fibnum = 0; break; case SO_USER_COOKIE: error = sooptcopyin(sopt, &val32, sizeof val32, sizeof val32); if (error) goto bad; so->so_user_cookie = val32; break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: case SO_RCVLOWAT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; /* * Values < 1 make no sense for any of these options, * so disallow them. */ if (optval < 1) { error = EINVAL; goto bad; } error = sbsetopt(so, sopt->sopt_name, optval); break; case SO_SNDTIMEO: case SO_RCVTIMEO: #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct timeval32 tv32; error = sooptcopyin(sopt, &tv32, sizeof tv32, sizeof tv32); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); } else #endif error = sooptcopyin(sopt, &tv, sizeof tv, sizeof tv); if (error) goto bad; if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= 1000000) { error = EDOM; goto bad; } if (tv.tv_sec > INT32_MAX) val = SBT_MAX; else val = tvtosbt(tv); switch (sopt->sopt_name) { case SO_SNDTIMEO: so->so_snd.sb_timeo = val; break; case SO_RCVTIMEO: so->so_rcv.sb_timeo = val; break; } break; case SO_LABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof extmac, sizeof extmac); if (error) goto bad; error = mac_setsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); #else error = EOPNOTSUPP; #endif break; case SO_TS_CLOCK: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; if (optval < 0 || optval > SO_TS_CLOCK_MAX) { error = EINVAL; goto bad; } so->so_ts_clock = optval; break; case SO_MAX_PACING_RATE: error = sooptcopyin(sopt, &val32, sizeof(val32), sizeof(val32)); if (error) goto bad; so->so_max_pacing_rate = val32; break; default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, HHOOK_SOCKET_OPT); else error = ENOPROTOOPT; break; } if (error == 0 && so->so_proto->pr_ctloutput != NULL) (void)(*so->so_proto->pr_ctloutput)(so, sopt); } bad: CURVNET_RESTORE(); return (error); } /* * Helper routine for getsockopt. */ int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) { int error; size_t valsize; error = 0; /* * Documented get behavior is that we always return a value, possibly * truncated to fit in the user's buffer. Traditional behavior is * that we always tell the user precisely how much we copied, rather * than something useful like the total amount we had available for * her. Note that this interface is not idempotent; the entire * answer must be generated ahead of time. */ valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; if (sopt->sopt_val != NULL) { if (sopt->sopt_td != NULL) error = copyout(buf, sopt->sopt_val, valsize); else bcopy(buf, sopt->sopt_val, valsize); } return (error); } int sogetopt(struct socket *so, struct sockopt *sopt) { int error, optval; struct linger l; struct timeval tv; #ifdef MAC struct mac extmac; #endif CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput != NULL) error = (*so->so_proto->pr_ctloutput)(so, sopt); else error = ENOPROTOOPT; CURVNET_RESTORE(); return (error); } else { switch (sopt->sopt_name) { case SO_ACCEPTFILTER: error = accept_filt_getopt(so, sopt); break; case SO_LINGER: SOCK_LOCK(so); l.l_onoff = so->so_options & SO_LINGER; l.l_linger = so->so_linger; SOCK_UNLOCK(so); error = sooptcopyout(sopt, &l, sizeof l); break; case SO_USELOOPBACK: case SO_DONTROUTE: case SO_DEBUG: case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: case SO_REUSEPORT_LB: case SO_BROADCAST: case SO_OOBINLINE: case SO_ACCEPTCONN: case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof optval); break; case SO_TYPE: optval = so->so_type; goto integer; case SO_PROTOCOL: optval = so->so_proto->pr_protocol; goto integer; case SO_ERROR: SOCK_LOCK(so); optval = so->so_error; so->so_error = 0; SOCK_UNLOCK(so); goto integer; case SO_SNDBUF: optval = SOLISTENING(so) ? so->sol_sbsnd_hiwat : so->so_snd.sb_hiwat; goto integer; case SO_RCVBUF: optval = SOLISTENING(so) ? so->sol_sbrcv_hiwat : so->so_rcv.sb_hiwat; goto integer; case SO_SNDLOWAT: optval = SOLISTENING(so) ? so->sol_sbsnd_lowat : so->so_snd.sb_lowat; goto integer; case SO_RCVLOWAT: optval = SOLISTENING(so) ? so->sol_sbrcv_lowat : so->so_rcv.sb_lowat; goto integer; case SO_SNDTIMEO: case SO_RCVTIMEO: tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct timeval32 tv32; CP(tv, tv32, tv_sec); CP(tv, tv32, tv_usec); error = sooptcopyout(sopt, &tv32, sizeof tv32); } else #endif error = sooptcopyout(sopt, &tv, sizeof tv); break; case SO_LABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) goto bad; error = mac_getsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); if (error) goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; #endif break; case SO_PEERLABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) goto bad; error = mac_getsockopt_peerlabel( sopt->sopt_td->td_ucred, so, &extmac); if (error) goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; #endif break; case SO_LISTENQLIMIT: optval = SOLISTENING(so) ? so->sol_qlimit : 0; goto integer; case SO_LISTENQLEN: optval = SOLISTENING(so) ? so->sol_qlen : 0; goto integer; case SO_LISTENINCQLEN: optval = SOLISTENING(so) ? so->sol_incqlen : 0; goto integer; case SO_TS_CLOCK: optval = so->so_ts_clock; goto integer; case SO_MAX_PACING_RATE: optval = so->so_max_pacing_rate; goto integer; default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, HHOOK_SOCKET_OPT); else error = ENOPROTOOPT; break; } } #ifdef MAC bad: #endif CURVNET_RESTORE(); return (error); } int soopt_getm(struct sockopt *sopt, struct mbuf **mp) { struct mbuf *m, *m_prev; int sopt_size = sopt->sopt_valsize; MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) return ENOBUFS; if (sopt_size > MLEN) { MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return ENOBUFS; } m->m_len = min(MCLBYTES, sopt_size); } else { m->m_len = min(MLEN, sopt_size); } sopt_size -= m->m_len; *mp = m; m_prev = m; while (sopt_size) { MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) { m_freem(*mp); return ENOBUFS; } if (sopt_size > MLEN) { MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK : M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); m_freem(*mp); return ENOBUFS; } m->m_len = min(MCLBYTES, sopt_size); } else { m->m_len = min(MLEN, sopt_size); } sopt_size -= m->m_len; m_prev->m_next = m; m_prev = m; } return (0); } int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; if (sopt->sopt_val == NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_td != NULL) { int error; error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); if (error != 0) { m_freem(m0); return(error); } } else bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); sopt->sopt_valsize -= m->m_len; sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; m = m->m_next; } if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ panic("ip6_sooptmcopyin"); return (0); } int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; size_t valsize = 0; if (sopt->sopt_val == NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_td != NULL) { int error; error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); if (error != 0) { m_freem(m0); return(error); } } else bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); sopt->sopt_valsize -= m->m_len; sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; valsize += m->m_len; m = m->m_next; } if (m != NULL) { /* enough soopt buffer should be given from user-land */ m_freem(m0); return(EINVAL); } sopt->sopt_valsize = valsize; return (0); } /* * sohasoutofband(): protocol notifies socket layer of the arrival of new * out-of-band data, which will then notify socket consumers. */ void sohasoutofband(struct socket *so) { if (so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGURG, 0); selwakeuppri(&so->so_rdsel, PSOCK); } int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { /* * We do not need to set or assert curvnet as long as everyone uses * sopoll_generic(). */ return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, td)); } int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { int revents; SOCK_LOCK(so); if (SOLISTENING(so)) { if (!(events & (POLLIN | POLLRDNORM))) revents = 0; else if (!TAILQ_EMPTY(&so->sol_comp)) revents = events & (POLLIN | POLLRDNORM); else { selrecord(td, &so->so_rdsel); revents = 0; } } else { revents = 0; SOCKBUF_LOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (events & (POLLIN | POLLRDNORM)) if (soreadabledata(so)) revents |= events & (POLLIN | POLLRDNORM); if (events & (POLLOUT | POLLWRNORM)) if (sowriteable(so)) revents |= events & (POLLOUT | POLLWRNORM); if (events & (POLLPRI | POLLRDBAND)) if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) revents |= events & (POLLPRI | POLLRDBAND); if ((events & POLLINIGNEOF) == 0) { if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { revents |= events & (POLLIN | POLLRDNORM); if (so->so_snd.sb_state & SBS_CANTSENDMORE) revents |= POLLHUP; } } if (revents == 0) { if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { selrecord(td, &so->so_rdsel); so->so_rcv.sb_flags |= SB_SEL; } if (events & (POLLOUT | POLLWRNORM)) { selrecord(td, &so->so_wrsel); so->so_snd.sb_flags |= SB_SEL; } } SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_snd); } SOCK_UNLOCK(so); return (revents); } int soo_kqfilter(struct file *fp, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &soread_filtops; knl = &so->so_rdsel.si_note; sb = &so->so_rcv; break; case EVFILT_WRITE: kn->kn_fop = &sowrite_filtops; knl = &so->so_wrsel.si_note; sb = &so->so_snd; break; case EVFILT_EMPTY: kn->kn_fop = &soempty_filtops; knl = &so->so_wrsel.si_note; sb = &so->so_snd; break; default: return (EINVAL); } SOCK_LOCK(so); if (SOLISTENING(so)) { knlist_add(knl, kn, 1); } else { SOCKBUF_LOCK(sb); knlist_add(knl, kn, 1); sb->sb_flags |= SB_KNOTE; SOCKBUF_UNLOCK(sb); } SOCK_UNLOCK(so); return (0); } /* * Some routines that return EOPNOTSUPP for entry points that are not * supported by a protocol. Fill in as needed. */ int pru_accept_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job) { return EOPNOTSUPP; } int pru_attach_notsupp(struct socket *so, int proto, struct thread *td) { return EOPNOTSUPP; } int pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_connect2_notsupp(struct socket *so1, struct socket *so2) { return EOPNOTSUPP; } int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { return EOPNOTSUPP; } int pru_disconnect_notsupp(struct socket *so) { return EOPNOTSUPP; } int pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) { return EOPNOTSUPP; } int pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_rcvd_notsupp(struct socket *so, int flags) { return EOPNOTSUPP; } int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) { return EOPNOTSUPP; } int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { return EOPNOTSUPP; } int pru_ready_notsupp(struct socket *so, struct mbuf *m, int count) { return (EOPNOTSUPP); } /* * This isn't really a ``null'' operation, but it's the default one and * doesn't do anything destructive. */ int pru_sense_null(struct socket *so, struct stat *sb) { sb->st_blksize = so->so_snd.sb_hiwat; return 0; } int pru_shutdown_notsupp(struct socket *so) { return EOPNOTSUPP; } int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { return EOPNOTSUPP; } int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { return EOPNOTSUPP; } int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, struct thread *td) { return EOPNOTSUPP; } static void filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; so_rdknl_lock(so); knlist_remove(&so->so_rdsel.si_note, kn, 1); if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note)) so->so_rcv.sb_flags &= ~SB_KNOTE; so_rdknl_unlock(so); } /*ARGSUSED*/ static int filt_soread(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; if (SOLISTENING(so)) { SOCK_LOCK_ASSERT(so); kn->kn_data = so->sol_qlen; return (!TAILQ_EMPTY(&so->sol_comp)); } SOCKBUF_LOCK_ASSERT(&so->so_rcv); kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl; if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } else if (so->so_error) /* temporary udp error */ return (1); if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_data >= kn->kn_sdata) return (1); } else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) return (1); /* This hook returning non-zero indicates an event, not error */ return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD)); } static void filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; so_wrknl_lock(so); knlist_remove(&so->so_wrsel.si_note, kn, 1); if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note)) so->so_snd.sb_flags &= ~SB_KNOTE; so_wrknl_unlock(so); } /*ARGSUSED*/ static int filt_sowrite(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; if (SOLISTENING(so)) return (0); SOCKBUF_LOCK_ASSERT(&so->so_snd); kn->kn_data = sbspace(&so->so_snd); hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } else if (so->so_error) /* temporary udp error */ return (1); else if (((so->so_state & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) return (0); else if (kn->kn_sfflags & NOTE_LOWAT) return (kn->kn_data >= kn->kn_sdata); else return (kn->kn_data >= so->so_snd.sb_lowat); } static int filt_soempty(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; if (SOLISTENING(so)) return (1); SOCKBUF_LOCK_ASSERT(&so->so_snd); kn->kn_data = sbused(&so->so_snd); if (kn->kn_data == 0) return (1); else return (0); } int socheckuid(struct socket *so, uid_t uid) { if (so == NULL) return (EPERM); if (so->so_cred->cr_uid != uid) return (EPERM); return (0); } /* * These functions are used by protocols to notify the socket layer (and its * consumers) of state changes in the sockets driven by protocol-side events. */ /* * Procedures to manipulate state flags of socket and do appropriate wakeups. * * Normal sequence from the active (originating) side is that * soisconnecting() is called during processing of connect() call, resulting * in an eventual call to soisconnected() if/when the connection is * established. When the connection is torn down soisdisconnecting() is * called during processing of disconnect() call, and soisdisconnected() is * called when the connection to the peer is totally severed. The semantics * of these routines are such that connectionless protocols can call * soisconnected() and soisdisconnected() only, bypassing the in-progress * calls when setting up a ``connection'' takes no time. * * From the passive side, a socket is created with two queues of sockets: * so_incomp for connections in progress and so_comp for connections already * made and awaiting user acceptance. As a protocol is preparing incoming * connections, it creates a socket structure queued on so_incomp by calling * sonewconn(). When the connection is established, soisconnected() is * called, and transfers the socket structure to so_comp, making it available * to accept(). * * If a socket is closed with sockets on either so_incomp or so_comp, these * sockets are dropped. * * If higher-level protocols are implemented in the kernel, the wakeups done * here will sometimes cause software-interrupt process scheduling. */ void soisconnecting(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; SOCK_UNLOCK(so); } void soisconnected(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; if (so->so_qstate == SQ_INCOMP) { struct socket *head = so->so_listen; int ret; KASSERT(head, ("%s: so %p on incomp of NULL", __func__, so)); /* * Promoting a socket from incomplete queue to complete, we * need to go through reverse order of locking. We first do * trylock, and if that doesn't succeed, we go the hard way * leaving a reference and rechecking consistency after proper * locking. */ if (__predict_false(SOLISTEN_TRYLOCK(head) == 0)) { soref(head); SOCK_UNLOCK(so); SOLISTEN_LOCK(head); SOCK_LOCK(so); if (__predict_false(head != so->so_listen)) { /* * The socket went off the listen queue, * should be lost race to close(2) of sol. * The socket is about to soabort(). */ SOCK_UNLOCK(so); sorele(head); return; } /* Not the last one, as so holds a ref. */ refcount_release(&head->so_count); } again: if ((so->so_options & SO_ACCEPTFILTER) == 0) { TAILQ_REMOVE(&head->sol_incomp, so, so_list); head->sol_incqlen--; TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list); head->sol_qlen++; so->so_qstate = SQ_COMP; SOCK_UNLOCK(so); solisten_wakeup(head); /* unlocks */ } else { SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, head->sol_accept_filter->accf_callback, head->sol_accept_filter_arg); so->so_options &= ~SO_ACCEPTFILTER; ret = head->sol_accept_filter->accf_callback(so, head->sol_accept_filter_arg, M_NOWAIT); if (ret == SU_ISCONNECTED) { soupcall_clear(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); goto again; } SOCKBUF_UNLOCK(&so->so_rcv); SOCK_UNLOCK(so); SOLISTEN_UNLOCK(head); } return; } SOCK_UNLOCK(so); wakeup(&so->so_timeo); sorwakeup(so); sowwakeup(so); } void soisdisconnecting(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTING; so->so_state |= SS_ISDISCONNECTING; if (!SOLISTENING(so)) { SOCKBUF_LOCK(&so->so_rcv); socantrcvmore_locked(so); SOCKBUF_LOCK(&so->so_snd); socantsendmore_locked(so); } SOCK_UNLOCK(so); wakeup(&so->so_timeo); } void soisdisconnected(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISDISCONNECTED; if (!SOLISTENING(so)) { SOCK_UNLOCK(so); SOCKBUF_LOCK(&so->so_rcv); socantrcvmore_locked(so); SOCKBUF_LOCK(&so->so_snd); sbdrop_locked(&so->so_snd, sbused(&so->so_snd)); socantsendmore_locked(so); } else SOCK_UNLOCK(so); wakeup(&so->so_timeo); } /* * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. */ struct sockaddr * sodupsockaddr(const struct sockaddr *sa, int mflags) { struct sockaddr *sa2; sa2 = malloc(sa->sa_len, M_SONAME, mflags); if (sa2) bcopy(sa, sa2, sa->sa_len); return sa2; } /* * Register per-socket destructor. */ void sodtor_set(struct socket *so, so_dtor_t *func) { SOCK_LOCK_ASSERT(so); so->so_dtor = func; } /* * Register per-socket buffer upcalls. */ void soupcall_set(struct socket *so, int which, so_upcall_t func, void *arg) { struct sockbuf *sb; KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so)); switch (which) { case SO_RCV: sb = &so->so_rcv; break; case SO_SND: sb = &so->so_snd; break; default: panic("soupcall_set: bad which"); } SOCKBUF_LOCK_ASSERT(sb); sb->sb_upcall = func; sb->sb_upcallarg = arg; sb->sb_flags |= SB_UPCALL; } void soupcall_clear(struct socket *so, int which) { struct sockbuf *sb; KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so)); switch (which) { case SO_RCV: sb = &so->so_rcv; break; case SO_SND: sb = &so->so_snd; break; default: panic("soupcall_clear: bad which"); } SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_upcall != NULL, ("%s: so %p no upcall to clear", __func__, so)); sb->sb_upcall = NULL; sb->sb_upcallarg = NULL; sb->sb_flags &= ~SB_UPCALL; } void solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg) { SOLISTEN_LOCK_ASSERT(so); so->sol_upcall = func; so->sol_upcallarg = arg; } static void so_rdknl_lock(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_LOCK(so); else SOCKBUF_LOCK(&so->so_rcv); } static void so_rdknl_unlock(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_UNLOCK(so); else SOCKBUF_UNLOCK(&so->so_rcv); } static void so_rdknl_assert_locked(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_LOCK_ASSERT(so); else SOCKBUF_LOCK_ASSERT(&so->so_rcv); } static void so_rdknl_assert_unlocked(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_UNLOCK_ASSERT(so); else SOCKBUF_UNLOCK_ASSERT(&so->so_rcv); } static void so_wrknl_lock(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_LOCK(so); else SOCKBUF_LOCK(&so->so_snd); } static void so_wrknl_unlock(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_UNLOCK(so); else SOCKBUF_UNLOCK(&so->so_snd); } static void so_wrknl_assert_locked(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_LOCK_ASSERT(so); else SOCKBUF_LOCK_ASSERT(&so->so_snd); } static void so_wrknl_assert_unlocked(void *arg) { struct socket *so = arg; if (SOLISTENING(so)) SOCK_UNLOCK_ASSERT(so); else SOCKBUF_UNLOCK_ASSERT(&so->so_snd); } /* * Create an external-format (``xsocket'') structure using the information in * the kernel-format socket structure pointed to by so. This is done to * reduce the spew of irrelevant information over this interface, to isolate * user code from changes in the kernel structure, and potentially to provide * information-hiding if we decide that some of this information should be * hidden from users. */ void sotoxsocket(struct socket *so, struct xsocket *xso) { xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; xso->so_uid = so->so_cred->cr_uid; xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; if (SOLISTENING(so)) { xso->so_qlen = so->sol_qlen; xso->so_incqlen = so->sol_incqlen; xso->so_qlimit = so->sol_qlimit; xso->so_oobmark = 0; bzero(&xso->so_snd, sizeof(xso->so_snd)); bzero(&xso->so_rcv, sizeof(xso->so_rcv)); } else { xso->so_state |= so->so_qstate; xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); } } struct sockbuf * so_sockbuf_rcv(struct socket *so) { return (&so->so_rcv); } struct sockbuf * so_sockbuf_snd(struct socket *so) { return (&so->so_snd); } int so_state_get(const struct socket *so) { return (so->so_state); } void so_state_set(struct socket *so, int val) { so->so_state = val; } int so_options_get(const struct socket *so) { return (so->so_options); } void so_options_set(struct socket *so, int val) { so->so_options = val; } int so_error_get(const struct socket *so) { return (so->so_error); } void so_error_set(struct socket *so, int val) { so->so_error = val; } int so_linger_get(const struct socket *so) { return (so->so_linger); } void so_linger_set(struct socket *so, int val) { so->so_linger = val; } struct protosw * so_protosw_get(const struct socket *so) { return (so->so_proto); } void so_protosw_set(struct socket *so, struct protosw *val) { so->so_proto = val; } void so_sorwakeup(struct socket *so) { sorwakeup(so); } void so_sowwakeup(struct socket *so) { sowwakeup(so); } void so_sorwakeup_locked(struct socket *so) { sorwakeup_locked(so); } void so_sowwakeup_locked(struct socket *so) { sowwakeup_locked(so); } void so_lock(struct socket *so) { SOCK_LOCK(so); } void so_unlock(struct socket *so) { SOCK_UNLOCK(so); } Index: projects/pnfs-planb-server/sys/modules/Makefile =================================================================== --- projects/pnfs-planb-server/sys/modules/Makefile (revision 334966) +++ projects/pnfs-planb-server/sys/modules/Makefile (revision 334967) @@ -1,839 +1,841 @@ # $FreeBSD$ SYSDIR?=${SRCTOP}/sys .include "${SYSDIR}/conf/kern.opts.mk" SUBDIR_PARALLEL= # Modules that include binary-only blobs of microcode should be selectable by # MK_SOURCELESS_UCODE option (see below). .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES) SUBDIR=${MODULES_OVERRIDE} .else SUBDIR= \ ${_3dfx} \ ${_3dfx_linux} \ ${_aac} \ ${_aacraid} \ accf_data \ accf_dns \ accf_http \ acl_nfs4 \ acl_posix1e \ ${_acpi} \ ae \ ${_aesni} \ age \ ${_agp} \ aha \ ahci \ ${_aic} \ aic7xxx \ alc \ ale \ alq \ ${_amd_ecc_inject} \ ${_amdsbwd} \ ${_amdsmn} \ ${_amdtemp} \ amr \ ${_an} \ ${_aout} \ ${_apm} \ ${_arcmsr} \ ${_allwinner} \ ${_armv8crypto} \ ${_asmc} \ ata \ ath \ ath_dfs \ ath_hal \ ath_hal_ar5210 \ ath_hal_ar5211 \ ath_hal_ar5212 \ ath_hal_ar5416 \ ath_hal_ar9300 \ ath_main \ ath_rate \ ath_pci \ ${_autofs} \ ${_auxio} \ ${_bce} \ ${_bcm283x_clkman} \ ${_bcm283x_pwm} \ bfe \ bge \ bhnd \ ${_bxe} \ ${_bios} \ ${_bktr} \ ${_blake2} \ ${_bm} \ bnxt \ bridgestp \ bwi \ bwn \ ${_bytgpio} \ ${_chvgpio} \ cam \ ${_cardbus} \ ${_carp} \ cas \ ${_cbb} \ cc \ ${_ccp} \ cd9660 \ cd9660_iconv \ ${_ce} \ ${_cfi} \ ${_chromebook_platform} \ ${_ciss} \ cloudabi \ ${_cloudabi32} \ ${_cloudabi64} \ ${_cmx} \ ${_coff} \ ${_coretemp} \ ${_cp} \ ${_cpsw} \ ${_cpuctl} \ ${_cpufreq} \ ${_crypto} \ ${_cryptodev} \ ${_cs} \ ${_ctau} \ ctl \ ${_cxgb} \ ${_cxgbe} \ dc \ dcons \ dcons_crom \ de \ ${_dpms} \ ${_dpt} \ ${_drm} \ ${_drm2} \ dummynet \ ${_ed} \ ${_efirt} \ ${_em} \ ${_ena} \ ${_ep} \ ${_epic} \ epoch_test \ esp \ ${_et} \ evdev \ ${_ex} \ ${_exca} \ ext2fs \ fdc \ fdescfs \ ${_fe} \ ${_ffec} \ filemon \ firewire \ firmware \ fuse \ ${_fxp} \ gem \ geom \ ${_glxiic} \ ${_glxsb} \ gpio \ hifn \ hme \ ${_hpt27xx} \ ${_hptiop} \ ${_hptmv} \ ${_hptnr} \ ${_hptrr} \ hwpmc \ ${_hwpmc_mips24k} \ ${_hwpmc_mips74k} \ ${_hyperv} \ i2c \ ${_ibcore} \ ${_ibcs2} \ ${_ichwd} \ ${_ida} \ if_bridge \ if_disc \ if_edsc \ ${_if_enc} \ if_epair \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ if_lagg \ ${_if_ndis} \ ${_if_stf} \ if_tap \ if_tun \ if_vlan \ if_vxlan \ ${_iir} \ imgact_binmisc \ ${_intelspi} \ ${_io} \ ${_ioat} \ ${_ipoib} \ ${_ipdivert} \ ${_ipfilter} \ ${_ipfw} \ ipfw_nat \ ${_ipfw_nat64} \ ${_ipfw_nptv6} \ ${_ipfw_pmod} \ ${_ipmi} \ ip6_mroute_mod \ ip_mroute_mod \ ${_ips} \ ${_ipsec} \ ${_ipw} \ ${_ipwfw} \ ${_isci} \ ${_iser} \ isp \ ${_ispfw} \ ${_iwi} \ ${_iwifw} \ ${_iwm} \ ${_iwmfw} \ ${_iwn} \ ${_iwnfw} \ ${_ix} \ ${_ixv} \ ${_ixl} \ ${_ixlv} \ jme \ joy \ kbdmux \ kgssapi \ kgssapi_krb5 \ khelp \ krpc \ ksyms \ le \ lge \ libalias \ libiconv \ libmchain \ ${_linprocfs} \ ${_linsysfs} \ ${_linux} \ ${_linux_common} \ ${_linux64} \ linuxkpi \ ${_lio} \ lpt \ mac_biba \ mac_bsdextended \ mac_ifoff \ mac_lomac \ mac_mls \ mac_none \ mac_partition \ mac_portacl \ mac_seeotheruids \ mac_stub \ mac_test \ malo \ md \ mdio \ mem \ mfi \ mii \ mlx \ ${_mlx4} \ ${_mlx4ib} \ ${_mlx4en} \ ${_mlx5} \ ${_mlx5en} \ ${_mlx5ib} \ ${_mly} \ mmc \ mmcsd \ mpr \ mps \ mpt \ mqueue \ mrsas \ msdosfs \ msdosfs_iconv \ ${_mse} \ msk \ ${_mthca} \ mvs \ mwl \ ${_mwlfw} \ mxge \ my \ ${_nandfs} \ ${_nandsim} \ ${_ncr} \ ${_nctgpio} \ ${_ncv} \ ${_ndis} \ ${_netgraph} \ ${_nfe} \ nfscl \ nfscommon \ nfsd \ nfslock \ nfslockd \ nfssvc \ nge \ nmdm \ ${_nsp} \ nullfs \ ${_ntb} \ ${_nvd} \ ${_nvme} \ ${_nvram} \ oce \ ${_ocs_fc} \ otus \ ${_otusfw} \ ow \ ${_padlock} \ ${_padlock_rng} \ ${_pccard} \ ${_pcfclock} \ pcn \ ${_pf} \ ${_pflog} \ ${_pfsync} \ plip \ ${_pms} \ ppbus \ ppc \ ppi \ pps \ procfs \ proto \ pseudofs \ ${_pst} \ pty \ puc \ ${_qlxge} \ ${_qlxgb} \ ${_qlxgbe} \ ${_qlnx} \ ral \ ${_ralfw} \ ${_random_fortuna} \ ${_random_yarrow} \ ${_random_other} \ rc4 \ ${_rdma} \ ${_rdrand_rng} \ re \ rl \ rtwn \ rtwn_pci \ rtwn_usb \ ${_rtwnfw} \ ${_s3} \ ${_safe} \ ${_sbni} \ scc \ ${_scsi_low} \ sdhci \ ${_sdhci_acpi} \ sdhci_pci \ sem \ send \ ${_sf} \ ${_sfxge} \ sge \ ${_sgx} \ ${_sgx_linux} \ siftr \ siis \ sis \ sk \ ${_smartpqi} \ smbfs \ sn \ snp \ sound \ ${_speaker} \ spi \ ${_splash} \ ${_sppp} \ ste \ ${_stg} \ stge \ ${_sym} \ ${_syscons} \ sysvipc \ tcp \ ${_ti} \ tl \ tmpfs \ ${_toecore} \ ${_tpm} \ trm \ ${_twa} \ twe \ tws \ tx \ ${_txp} \ uart \ ubsec \ udf \ udf_iconv \ ufs \ uinput \ unionfs \ usb \ ${_vesa} \ ${_virtio} \ vge \ ${_viawd} \ videomode \ vkbd \ ${_vmm} \ ${_vmware} \ ${_vpo} \ vr \ vte \ vx \ wb \ ${_wbwd} \ ${_wi} \ wlan \ wlan_acl \ wlan_amrr \ wlan_ccmp \ wlan_rssadapt \ wlan_tkip \ wlan_wep \ wlan_xauth \ ${_wpi} \ ${_wpifw} \ ${_x86bios} \ ${_xe} \ xl \ zlib .if ${MK_AUTOFS} != "no" || defined(ALL_MODULES) _autofs= autofs .endif .if ${MK_CDDL} != "no" || defined(ALL_MODULES) .if (${MACHINE_CPUARCH} != "arm" || ${MACHINE_ARCH:Marmv[67]*} != "") && \ ${MACHINE_CPUARCH} != "mips" && \ ${MACHINE_CPUARCH} != "sparc64" SUBDIR+= dtrace .endif SUBDIR+= opensolaris .endif .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) .if exists(${SRCTOP}/sys/opencrypto) _crypto= crypto _cryptodev= cryptodev _random_fortuna=random_fortuna _random_yarrow= random_yarrow _random_other= random_other .endif .endif .if ${MK_CUSE} != "no" || defined(ALL_MODULES) SUBDIR+= cuse .endif .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _carp= carp _toecore= toecore _if_enc= if_enc _if_gif= if_gif _if_gre= if_gre _ipfw_pmod= ipfw_pmod .if ${MK_IPSEC_SUPPORT} != "no" _ipsec= ipsec .endif .endif .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _if_stf= if_stf .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) _if_me= if_me _ipdivert= ipdivert _ipfw= ipfw .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) _ipfw_nat64= ipfw_nat64 .endif .endif .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) _ipfw_nptv6= ipfw_nptv6 .endif .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES) _ipfilter= ipfilter .endif .if ${MK_ISCSI} != "no" || defined(ALL_MODULES) SUBDIR+= cfiscsi SUBDIR+= iscsi SUBDIR+= iscsi_initiator .endif .if ${MK_NAND} != "no" || defined(ALL_MODULES) _nandfs= nandfs _nandsim= nandsim .endif .if ${MK_NETGRAPH} != "no" || defined(ALL_MODULES) _netgraph= netgraph .endif .if (${MK_PF} != "no" && (${MK_INET_SUPPORT} != "no" || \ ${MK_INET6_SUPPORT} != "no")) || defined(ALL_MODULES) _pf= pf _pflog= pflog .if ${MK_INET_SUPPORT} != "no" _pfsync= pfsync .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" _bce= bce _fxp= fxp _ispfw= ispfw _sf= sf _ti= ti _txp= txp .if ${MACHINE_CPUARCH} != "mips" _mwlfw= mwlfw _otusfw= otusfw _ralfw= ralfw _rtwnfw= rtwnfw .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && \ ${MACHINE_ARCH} != "powerpc" && ${MACHINE_ARCH} != "powerpcspe" && \ ${MACHINE_CPUARCH} != "riscv" _cxgbe= cxgbe .endif .if ${MK_TESTS} != "no" || defined(ALL_MODULES) SUBDIR+= tests .endif .if ${MK_ZFS} != "no" || defined(ALL_MODULES) SUBDIR+= zfs .endif .if (${MACHINE_CPUARCH} == "mips" && ${MACHINE_ARCH:Mmips64} == "") _hwpmc_mips24k= hwpmc_mips24k _hwpmc_mips74k= hwpmc_mips74k .endif .if ${MACHINE_CPUARCH} != "aarch64" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && ${MACHINE_CPUARCH} != "powerpc" && \ ${MACHINE_CPUARCH} != "riscv" _syscons= syscons _vpo= vpo .endif .if ${MACHINE_CPUARCH} != "mips" # no BUS_SPACE_UNSPECIFIED # No barrier instruction support (specific to this driver) _sym= sym # intr_disable() is a macro, causes problems .if ${MK_SOURCELESS_UCODE} != "no" _cxgb= cxgb .endif .endif .if ${MACHINE_CPUARCH} == "aarch64" _allwinner= allwinner _armv8crypto= armv8crypto _efirt= efirt _em= em .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" _agp= agp _an= an _aout= aout _bios= bios _bktr= bktr .if ${MK_SOURCELESS_UCODE} != "no" _bxe= bxe .endif _cardbus= cardbus _cbb= cbb _cpuctl= cpuctl _cpufreq= cpufreq _cs= cs _dpms= dpms _drm= drm _drm2= drm2 _ed= ed _em= em _ena= ena _ep= ep _et= et _exca= exca _fe= fe .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ibcore= ibcore .endif _if_ndis= if_ndis _io= io .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ipoib= ipoib _iser= iser .endif _ix= ix _ixv= ixv _linprocfs= linprocfs _linsysfs= linsysfs _linux= linux .if ${MK_SOURCELESS_UCODE} != "no" _lio= lio .endif _nctgpio= nctgpio _ndis= ndis _ocs_fc= ocs_fc _pccard= pccard .if ${MK_OFED} != "no" || defined(ALL_MODULES) _rdma= rdma .endif _safe= safe _scsi_low= scsi_low _speaker= speaker _splash= splash _sppp= sppp _vmware= vmware _wbwd= wbwd _wi= wi _xe= xe _aac= aac _aacraid= aacraid _acpi= acpi .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) +.if ${COMPILER_TYPE} != "gcc" || ${COMPILER_VERSION} > 40201 _aesni= aesni +.endif .endif _amd_ecc_inject=amd_ecc_inject _amdsbwd= amdsbwd _amdsmn= amdsmn _amdtemp= amdtemp _arcmsr= arcmsr _asmc= asmc .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _blake2= blake2 .endif _bytgpio= bytgpio _chvgpio= chvgpio _ciss= ciss _chromebook_platform= chromebook_platform _cmx= cmx _coretemp= coretemp .if ${MK_SOURCELESS_HOST} != "no" _hpt27xx= hpt27xx .endif _hptiop= hptiop .if ${MK_SOURCELESS_HOST} != "no" _hptmv= hptmv _hptnr= hptnr _hptrr= hptrr .endif _hyperv= hyperv _ichwd= ichwd _ida= ida _iir= iir _intelspi= intelspi _ipmi= ipmi _ips= ips _isci= isci _ipw= ipw _iwi= iwi _iwm= iwm _iwn= iwn .if ${MK_SOURCELESS_UCODE} != "no" _ipwfw= ipwfw _iwifw= iwifw _iwmfw= iwmfw _iwnfw= iwnfw .endif _mlx4= mlx4 _mlx5= mlx5 .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _mlx4en= mlx4en _mlx5en= mlx5en .endif .if ${MK_OFED} != "no" || defined(ALL_MODULES) _mthca= mthca _mlx4ib= mlx4ib _mlx5ib= mlx5ib .endif _mly= mly _nfe= nfe _nvd= nvd _nvme= nvme _nvram= nvram .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _padlock= padlock _padlock_rng= padlock_rng _rdrand_rng= rdrand_rng .endif _s3= s3 _sdhci_acpi= sdhci_acpi _tpm= tpm _twa= twa _vesa= vesa _viawd= viawd _virtio= virtio _wpi= wpi .if ${MK_SOURCELESS_UCODE} != "no" _wpifw= wpifw .endif _x86bios= x86bios .endif .if ${MACHINE_CPUARCH} == "amd64" _ccp= ccp _efirt= efirt _ioat= ioat _ixl= ixl _ixlv= ixlv _linux64= linux64 _linux_common= linux_common _ntb= ntb _pms= pms _qlxge= qlxge _qlxgb= qlxgb .if ${MK_SOURCELESS_UCODE} != "no" _qlxgbe= qlxgbe _qlnx= qlnx .endif _sfxge= sfxge _sgx= sgx _sgx_linux= sgx_linux _smartpqi= smartpqi .if ${MK_BHYVE} != "no" || defined(ALL_MODULES) _vmm= vmm .endif .endif .if ${MACHINE_CPUARCH} == "i386" # XXX some of these can move to the general case when de-i386'ed # XXX some of these can move now, but are untested on other architectures. _3dfx= 3dfx _3dfx_linux= 3dfx_linux _aic= aic _apm= apm .if ${MK_SOURCELESS_UCODE} != "no" _ce= ce .endif _coff= coff .if ${MK_SOURCELESS_UCODE} != "no" _cp= cp .endif _glxiic= glxiic _glxsb= glxsb #_ibcs2= ibcs2 _mse= mse _ncr= ncr _ncv= ncv _nsp= nsp _pcfclock= pcfclock _pst= pst _sbni= sbni _stg= stg .if ${MK_SOURCELESS_UCODE} != "no" _ctau= ctau .endif _dpt= dpt _ex= ex .endif .if ${MACHINE_CPUARCH} == "arm" _cfi= cfi _cpsw= cpsw .endif .if ${MACHINE_CPUARCH} == "powerpc" _agp= agp _an= an _bm= bm _cardbus= cardbus _cbb= cbb _cfi= cfi _cpufreq= cpufreq _drm= drm _exca= exca _ffec= ffec _nvd= nvd _nvme= nvme _pccard= pccard _wi= wi .endif .if ${MACHINE_ARCH} == "powerpc64" _drm2= drm2 .endif .if ${MACHINE_ARCH} == "powerpc64" || ${MACHINE_ARCH} == "powerpc" # Don't build powermac_nvram for powerpcspe, it's never supported. _nvram= powermac_nvram .endif .if ${MACHINE_CPUARCH} == "sparc64" _auxio= auxio _em= em _epic= epic .endif .if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_ARCH:Marmv[67]*} != "" || ${MACHINE_CPUARCH} == "i386") _cloudabi32= cloudabi32 .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" _cloudabi64= cloudabi64 .endif .endif .if ${MACHINE_ARCH:Marmv[67]*} != "" || ${MACHINE_CPUARCH} == "aarch64" _bcm283x_clkman= bcm283x_clkman _bcm283x_pwm= bcm283x_pwm .endif SUBDIR+=${MODULES_EXTRA} .for reject in ${WITHOUT_MODULES} SUBDIR:= ${SUBDIR:N${reject}} .endfor # Calling kldxref(8) for each module is expensive. .if !defined(NO_XREF) .MAKEFLAGS+= -DNO_XREF afterinstall: .PHONY @if type kldxref >/dev/null 2>&1; then \ ${ECHO} kldxref ${DESTDIR}${KMODDIR}; \ kldxref ${DESTDIR}${KMODDIR}; \ fi .endif .include "${SYSDIR}/conf/config.mk" SUBDIR:= ${SUBDIR:u:O} .include Index: projects/pnfs-planb-server/sys/netinet/tcp_hpts.c =================================================================== --- projects/pnfs-planb-server/sys/netinet/tcp_hpts.c (revision 334966) +++ projects/pnfs-planb-server/sys/netinet/tcp_hpts.c (revision 334967) @@ -1,1962 +1,1963 @@ /*- * Copyright (c) 2016-2018 Netflix Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" /** * Some notes about usage. * * The tcp_hpts system is designed to provide a high precision timer * system for tcp. Its main purpose is to provide a mechanism for * pacing packets out onto the wire. It can be used in two ways * by a given TCP stack (and those two methods can be used simultaneously). * * First, and probably the main thing its used by Rack and BBR for, it can * be used to call tcp_output() of a transport stack at some time in the future. * The normal way this is done is that tcp_output() of the stack schedules * itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The * slot is the time from now that the stack wants to be called but it * must be converted to tcp_hpts's notion of slot. This is done with * one of the macros HPTS_MS_TO_SLOTS or HPTS_USEC_TO_SLOTS. So a typical * call from the tcp_output() routine might look like: * * tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(550)); * * The above would schedule tcp_ouput() to be called in 550 useconds. * Note that if using this mechanism the stack will want to add near * its top a check to prevent unwanted calls (from user land or the * arrival of incoming ack's). So it would add something like: * * if (inp->inp_in_hpts) * return; * * to prevent output processing until the time alotted has gone by. * Of course this is a bare bones example and the stack will probably * have more consideration then just the above. * * Now the tcp_hpts system will call tcp_output in one of two forms, * it will first check to see if the stack as defined a * tfb_tcp_output_wtime() function, if so that is the routine it * will call, if that function is not defined then it will call the * tfb_tcp_output() function. The only difference between these * two calls is that the former passes the time in to the function * so the function does not have to access the time (which tcp_hpts * already has). What these functions do is of course totally up * to the individual tcp stack. * * Now the second function (actually two functions I guess :D) * the tcp_hpts system provides is the ability to either abort * a connection (later) or process input on a connection. * Why would you want to do this? To keep processor locality. * * So in order to use the input redirection function the * stack changes its tcp_do_segment() routine to instead * of process the data call the function: * * tcp_queue_pkt_to_input() * * You will note that the arguments to this function look * a lot like tcp_do_segments's arguments. This function * will assure that the tcp_hpts system will * call the functions tfb_tcp_hpts_do_segment() from the * correct CPU. Note that multiple calls can get pushed * into the tcp_hpts system this will be indicated by * the next to last argument to tfb_tcp_hpts_do_segment() * (nxt_pkt). If nxt_pkt is a 1 then another packet is * coming. If nxt_pkt is a 0 then this is the last call * that the tcp_hpts system has available for the tcp stack. * * The other point of the input system is to be able to safely * drop a tcp connection without worrying about the recursive * locking that may be occuring on the INP_WLOCK. So if * a stack wants to drop a connection it calls: * * tcp_set_inp_to_drop(tp, ETIMEDOUT) * * To schedule the tcp_hpts system to call * * tcp_drop(tp, drop_reason) * * at a future point. This is quite handy to prevent locking * issues when dropping connections. * */ #include #include #include #include #include #include #include #include #include /* for proc0 declaration */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES /* for logging */ #include #include #include #include #include /* required for icmp_var.h */ #include /* for ICMP_BANDLIM */ #include #include #include #include #define TCPOUTFLAGS #include #include #include #include #include #include #include #include #ifdef tcpdebug #include #endif /* tcpdebug */ #ifdef tcp_offload #include #endif #ifdef ipsec #include #include #endif /* ipsec */ #include "opt_rss.h" MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts"); #ifdef RSS static int tcp_bind_threads = 1; #else static int tcp_bind_threads = 0; #endif TUNABLE_INT("net.inet.tcp.bind_hptss", &tcp_bind_threads); static uint32_t tcp_hpts_logging_size = DEFAULT_HPTS_LOG; TUNABLE_INT("net.inet.tcp.hpts_logging_sz", &tcp_hpts_logging_size); static struct tcp_hptsi tcp_pace; static int tcp_hptsi_lock_inpinfo(struct inpcb *inp, struct tcpcb **tp); static void tcp_wakehpts(struct tcp_hpts_entry *p); static void tcp_wakeinput(struct tcp_hpts_entry *p); static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv); static void tcp_hptsi(struct tcp_hpts_entry *hpts, struct timeval *ctick); static void tcp_hpts_thread(void *ctx); static void tcp_init_hptsi(void *st); int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP; static int32_t tcp_hpts_callout_skip_swi = 0; SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts, CTLFLAG_RW, 0, "TCP Hpts controls"); #define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ if ((vvp)->tv_usec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_usec += 1000000; \ } \ } while (0) static int32_t logging_on = 0; static int32_t hpts_sleep_max = (NUM_OF_HPTSI_SLOTS - 2); static int32_t tcp_hpts_precision = 120; SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, precision, CTLFLAG_RW, &tcp_hpts_precision, 120, "Value for PRE() precision of callout"); SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, logging, CTLFLAG_RW, &logging_on, 0, "Turn on logging if compiled in"); counter_u64_t hpts_loops; SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, loops, CTLFLAG_RD, &hpts_loops, "Number of times hpts had to loop to catch up"); counter_u64_t back_tosleep; SYSCTL_COUNTER_U64(_net_inet_tcp_hpts, OID_AUTO, no_tcbsfound, CTLFLAG_RD, &back_tosleep, "Number of times hpts found no tcbs"); static int32_t in_newts_every_tcb = 0; SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, in_tsperpcb, CTLFLAG_RW, &in_newts_every_tcb, 0, "Do we have a new cts every tcb we process for input"); static int32_t in_ts_percision = 0; SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, in_tspercision, CTLFLAG_RW, &in_ts_percision, 0, "Do we use percise timestamp for clients on input"); static int32_t out_newts_every_tcb = 0; SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, out_tsperpcb, CTLFLAG_RW, &out_newts_every_tcb, 0, "Do we have a new cts every tcb we process for output"); static int32_t out_ts_percision = 0; SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, out_tspercision, CTLFLAG_RW, &out_ts_percision, 0, "Do we use a percise timestamp for every output cts"); SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, maxsleep, CTLFLAG_RW, &hpts_sleep_max, 0, "The maximum time the hpts will sleep <1 - 254>"); SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, minsleep, CTLFLAG_RW, &tcp_min_hptsi_time, 0, "The minimum time the hpts must sleep before processing more slots"); SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, skip_swi, CTLFLAG_RW, &tcp_hpts_callout_skip_swi, 0, "Do we have the callout call directly to the hpts?"); static void __tcp_hpts_log_it(struct tcp_hpts_entry *hpts, struct inpcb *inp, int event, uint32_t slot, uint32_t ticknow, int32_t line) { struct hpts_log *pl; HPTS_MTX_ASSERT(hpts); if (hpts->p_log == NULL) return; pl = &hpts->p_log[hpts->p_log_at]; hpts->p_log_at++; if (hpts->p_log_at >= hpts->p_logsize) { hpts->p_log_at = 0; hpts->p_log_wrapped = 1; } pl->inp = inp; if (inp) { pl->t_paceslot = inp->inp_hptsslot; pl->t_hptsreq = inp->inp_hpts_request; pl->p_onhpts = inp->inp_in_hpts; pl->p_oninput = inp->inp_in_input; } else { pl->t_paceslot = 0; pl->t_hptsreq = 0; pl->p_onhpts = 0; pl->p_oninput = 0; } pl->is_notempty = 1; pl->event = event; pl->line = line; pl->cts = tcp_get_usecs(NULL); pl->p_curtick = hpts->p_curtick; pl->p_prevtick = hpts->p_prevtick; pl->p_on_queue_cnt = hpts->p_on_queue_cnt; pl->ticknow = ticknow; pl->slot_req = slot; pl->p_nxt_slot = hpts->p_nxt_slot; pl->p_cur_slot = hpts->p_cur_slot; pl->p_hpts_sleep_time = hpts->p_hpts_sleep_time; pl->p_flags = (hpts->p_cpu & 0x7f); pl->p_flags <<= 7; pl->p_flags |= (hpts->p_num & 0x7f); pl->p_flags <<= 2; if (hpts->p_hpts_active) { pl->p_flags |= HPTS_HPTS_ACTIVE; } } #define tcp_hpts_log_it(a, b, c, d, e) __tcp_hpts_log_it(a, b, c, d, e, __LINE__) static void hpts_timeout_swi(void *arg) { struct tcp_hpts_entry *hpts; hpts = (struct tcp_hpts_entry *)arg; swi_sched(hpts->ie_cookie, 0); } static void hpts_timeout_dir(void *arg) { tcp_hpts_thread(arg); } static inline void hpts_sane_pace_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hptsh *head, int clear) { #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx) == 0) { /* We don't own the mutex? */ panic("%s: hpts:%p inp:%p no hpts mutex", __FUNCTION__, hpts, inp); } if (hpts->p_cpu != inp->inp_hpts_cpu) { /* It is not the right cpu/mutex? */ panic("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp); } if (inp->inp_in_hpts == 0) { /* We are not on the hpts? */ panic("%s: hpts:%p inp:%p not on the hpts?", __FUNCTION__, hpts, inp); } if (TAILQ_EMPTY(head) && (hpts->p_on_queue_cnt != 0)) { /* We should not be empty with a queue count */ panic("%s hpts:%p hpts bucket empty but cnt:%d", __FUNCTION__, hpts, hpts->p_on_queue_cnt); } #endif TAILQ_REMOVE(head, inp, inp_hpts); hpts->p_on_queue_cnt--; if (hpts->p_on_queue_cnt < 0) { /* Count should not go negative .. */ #ifdef INVARIANTS panic("Hpts goes negative inp:%p hpts:%p", inp, hpts); #endif hpts->p_on_queue_cnt = 0; } if (clear) { inp->inp_hpts_request = 0; inp->inp_in_hpts = 0; } } static inline void hpts_sane_pace_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hptsh *head, int line, int noref) { #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx) == 0) { /* We don't own the mutex? */ panic("%s: hpts:%p inp:%p no hpts mutex", __FUNCTION__, hpts, inp); } if (hpts->p_cpu != inp->inp_hpts_cpu) { /* It is not the right cpu/mutex? */ panic("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp); } if ((noref == 0) && (inp->inp_in_hpts == 1)) { /* We are already on the hpts? */ panic("%s: hpts:%p inp:%p already on the hpts?", __FUNCTION__, hpts, inp); } #endif TAILQ_INSERT_TAIL(head, inp, inp_hpts); inp->inp_in_hpts = 1; hpts->p_on_queue_cnt++; if (noref == 0) { in_pcbref(inp); } } static inline void hpts_sane_input_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, int clear) { #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx) == 0) { /* We don't own the mutex? */ panic("%s: hpts:%p inp:%p no hpts mutex", __FUNCTION__, hpts, inp); } if (hpts->p_cpu != inp->inp_input_cpu) { /* It is not the right cpu/mutex? */ panic("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp); } if (inp->inp_in_input == 0) { /* We are not on the input hpts? */ panic("%s: hpts:%p inp:%p not on the input hpts?", __FUNCTION__, hpts, inp); } #endif TAILQ_REMOVE(&hpts->p_input, inp, inp_input); hpts->p_on_inqueue_cnt--; if (hpts->p_on_inqueue_cnt < 0) { #ifdef INVARIANTS panic("Hpts in goes negative inp:%p hpts:%p", inp, hpts); #endif hpts->p_on_inqueue_cnt = 0; } #ifdef INVARIANTS if (TAILQ_EMPTY(&hpts->p_input) && (hpts->p_on_inqueue_cnt != 0)) { /* We should not be empty with a queue count */ panic("%s hpts:%p in_hpts input empty but cnt:%d", __FUNCTION__, hpts, hpts->p_on_inqueue_cnt); } #endif if (clear) inp->inp_in_input = 0; } static inline void hpts_sane_input_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, int line) { #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx) == 0) { /* We don't own the mutex? */ panic("%s: hpts:%p inp:%p no hpts mutex", __FUNCTION__, hpts, inp); } if (hpts->p_cpu != inp->inp_input_cpu) { /* It is not the right cpu/mutex? */ panic("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp); } if (inp->inp_in_input == 1) { /* We are already on the input hpts? */ panic("%s: hpts:%p inp:%p already on the input hpts?", __FUNCTION__, hpts, inp); } #endif TAILQ_INSERT_TAIL(&hpts->p_input, inp, inp_input); inp->inp_in_input = 1; hpts->p_on_inqueue_cnt++; in_pcbref(inp); } static int sysctl_tcp_hpts_log(SYSCTL_HANDLER_ARGS) { struct tcp_hpts_entry *hpts; size_t sz; int32_t logging_was, i; int32_t error = 0; /* * HACK: Turn off logging so no locks are required this really needs * a memory barrier :) */ logging_was = logging_on; logging_on = 0; if (!req->oldptr) { /* How much? */ sz = 0; for (i = 0; i < tcp_pace.rp_num_hptss; i++) { hpts = tcp_pace.rp_ent[i]; if (hpts->p_log == NULL) continue; sz += (sizeof(struct hpts_log) * hpts->p_logsize); } error = SYSCTL_OUT(req, 0, sz); } else { for (i = 0; i < tcp_pace.rp_num_hptss; i++) { hpts = tcp_pace.rp_ent[i]; if (hpts->p_log == NULL) continue; if (hpts->p_log_wrapped) sz = (sizeof(struct hpts_log) * hpts->p_logsize); else sz = (sizeof(struct hpts_log) * hpts->p_log_at); error = SYSCTL_OUT(req, hpts->p_log, sz); } } logging_on = logging_was; return error; } SYSCTL_PROC(_net_inet_tcp_hpts, OID_AUTO, log, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_tcp_hpts_log, "A", "tcp hptsi log"); /* * Try to get the INP_INFO lock. * * This function always succeeds in getting the lock. It will clear * *tpp and return (1) if something critical changed while the inpcb * was unlocked. Otherwise, it will leave *tpp unchanged and return (0). * * This function relies on the fact that the hpts always holds a * reference on the inpcb while the segment is on the hptsi wheel and * in the input queue. * */ static int tcp_hptsi_lock_inpinfo(struct inpcb *inp, struct tcpcb **tpp) { struct tcp_function_block *tfb; struct tcpcb *tp; void *ptr; /* Try the easy way. */ if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) return (0); /* * OK, let's try the hard way. We'll save the function pointer block * to make sure that doesn't change while we aren't holding the * lock. */ tp = *tpp; tfb = tp->t_fb; ptr = tp->t_fb_ptr; INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); /* If the session went away, return an error. */ if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) || (inp->inp_flags2 & INP_FREED)) { *tpp = NULL; return (1); } /* * If the function block or stack-specific data block changed, * report an error. */ tp = intotcpcb(inp); if ((tp->t_fb != tfb) && (tp->t_fb_ptr != ptr)) { *tpp = NULL; return (1); } return (0); } static void tcp_wakehpts(struct tcp_hpts_entry *hpts) { HPTS_MTX_ASSERT(hpts); swi_sched(hpts->ie_cookie, 0); if (hpts->p_hpts_active == 2) { /* Rare sleeping on a ENOBUF */ wakeup_one(hpts); } } static void tcp_wakeinput(struct tcp_hpts_entry *hpts) { HPTS_MTX_ASSERT(hpts); swi_sched(hpts->ie_cookie, 0); if (hpts->p_hpts_active == 2) { /* Rare sleeping on a ENOBUF */ wakeup_one(hpts); } } struct tcp_hpts_entry * tcp_cur_hpts(struct inpcb *inp) { int32_t hpts_num; struct tcp_hpts_entry *hpts; hpts_num = inp->inp_hpts_cpu; hpts = tcp_pace.rp_ent[hpts_num]; return (hpts); } struct tcp_hpts_entry * tcp_hpts_lock(struct inpcb *inp) { struct tcp_hpts_entry *hpts; int32_t hpts_num; again: hpts_num = inp->inp_hpts_cpu; hpts = tcp_pace.rp_ent[hpts_num]; #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx)) { panic("Hpts:%p owns mtx prior-to lock line:%d", hpts, __LINE__); } #endif mtx_lock(&hpts->p_mtx); if (hpts_num != inp->inp_hpts_cpu) { mtx_unlock(&hpts->p_mtx); goto again; } return (hpts); } struct tcp_hpts_entry * tcp_input_lock(struct inpcb *inp) { struct tcp_hpts_entry *hpts; int32_t hpts_num; again: hpts_num = inp->inp_input_cpu; hpts = tcp_pace.rp_ent[hpts_num]; #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx)) { panic("Hpts:%p owns mtx prior-to lock line:%d", hpts, __LINE__); } #endif mtx_lock(&hpts->p_mtx); if (hpts_num != inp->inp_input_cpu) { mtx_unlock(&hpts->p_mtx); goto again; } return (hpts); } static void tcp_remove_hpts_ref(struct inpcb *inp, struct tcp_hpts_entry *hpts, int line) { int32_t add_freed; if (inp->inp_flags2 & INP_FREED) { /* * Need to play a special trick so that in_pcbrele_wlocked * does not return 1 when it really should have returned 0. */ add_freed = 1; inp->inp_flags2 &= ~INP_FREED; } else { add_freed = 0; } #ifndef INP_REF_DEBUG if (in_pcbrele_wlocked(inp)) { /* * This should not happen. We have the inpcb referred to by * the main socket (why we are called) and the hpts. It * should always return 0. */ panic("inpcb:%p release ret 1", inp); } #else if (__in_pcbrele_wlocked(inp, line)) { /* * This should not happen. We have the inpcb referred to by * the main socket (why we are called) and the hpts. It * should always return 0. */ panic("inpcb:%p release ret 1", inp); } #endif if (add_freed) { inp->inp_flags2 |= INP_FREED; } } static void tcp_hpts_remove_locked_output(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line) { if (inp->inp_in_hpts) { hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], 1); tcp_remove_hpts_ref(inp, hpts, line); } } static void tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line) { HPTS_MTX_ASSERT(hpts); if (inp->inp_in_input) { hpts_sane_input_remove(hpts, inp, 1); tcp_remove_hpts_ref(inp, hpts, line); } } /* * Called normally with the INP_LOCKED but it * does not matter, the hpts lock is the key * but the lock order allows us to hold the * INP lock and then get the hpts lock. * * Valid values in the flags are * HPTS_REMOVE_OUTPUT - remove from the output of the hpts. * HPTS_REMOVE_INPUT - remove from the input of the hpts. * Note that you can or both values together and get two * actions. */ void __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line) { struct tcp_hpts_entry *hpts; INP_WLOCK_ASSERT(inp); if (flags & HPTS_REMOVE_OUTPUT) { hpts = tcp_hpts_lock(inp); tcp_hpts_remove_locked_output(hpts, inp, flags, line); mtx_unlock(&hpts->p_mtx); } if (flags & HPTS_REMOVE_INPUT) { hpts = tcp_input_lock(inp); tcp_hpts_remove_locked_input(hpts, inp, flags, line); mtx_unlock(&hpts->p_mtx); } } static inline int hpts_tick(struct tcp_hpts_entry *hpts, int32_t plus) { return ((hpts->p_prevtick + plus) % NUM_OF_HPTSI_SLOTS); } static int tcp_queue_to_hpts_immediate_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line, int32_t noref) { int32_t need_wake = 0; uint32_t ticknow = 0; HPTS_MTX_ASSERT(hpts); if (inp->inp_in_hpts == 0) { /* Ok we need to set it on the hpts in the current slot */ if (hpts->p_hpts_active == 0) { /* A sleeping hpts we want in next slot to run */ if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERT_SLEEPER, 0, hpts_tick(hpts, 1)); } inp->inp_hptsslot = hpts_tick(hpts, 1); inp->inp_hpts_request = 0; if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_SLEEP_BEFORE, 1, ticknow); } need_wake = 1; } else if ((void *)inp == hpts->p_inp) { /* * We can't allow you to go into the same slot we * are in. We must put you out. */ inp->inp_hptsslot = hpts->p_nxt_slot; } else inp->inp_hptsslot = hpts->p_cur_slot; hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref); inp->inp_hpts_request = 0; if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_IMMEDIATE, 0, 0); } if (need_wake) { /* * Activate the hpts if it is sleeping and its * timeout is not 1. */ if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_WAKEUP_HPTS, 0, ticknow); } hpts->p_direct_wake = 1; tcp_wakehpts(hpts); } } return (need_wake); } int __tcp_queue_to_hpts_immediate(struct inpcb *inp, int32_t line) { int32_t ret; struct tcp_hpts_entry *hpts; INP_WLOCK_ASSERT(inp); hpts = tcp_hpts_lock(inp); ret = tcp_queue_to_hpts_immediate_locked(inp, hpts, line, 0); mtx_unlock(&hpts->p_mtx); return (ret); } static void tcp_hpts_insert_locked(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t slot, uint32_t cts, int32_t line, struct hpts_diag *diag, int32_t noref) { int32_t need_new_to = 0; int32_t need_wakeup = 0; uint32_t largest_slot; uint32_t ticknow = 0; uint32_t slot_calc; HPTS_MTX_ASSERT(hpts); if (diag) { memset(diag, 0, sizeof(struct hpts_diag)); diag->p_hpts_active = hpts->p_hpts_active; diag->p_nxt_slot = hpts->p_nxt_slot; diag->p_cur_slot = hpts->p_cur_slot; diag->slot_req = slot; } if ((inp->inp_in_hpts == 0) || noref) { inp->inp_hpts_request = slot; if (slot == 0) { /* Immediate */ tcp_queue_to_hpts_immediate_locked(inp, hpts, line, noref); return; } if (hpts->p_hpts_active) { /* * Its slot - 1 since nxt_slot is the next tick that * will go off since the hpts is awake */ if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERT_NORMAL, slot, 0); } /* * We want to make sure that we don't place a inp in * the range of p_cur_slot <-> p_nxt_slot. If we * take from p_nxt_slot to the end, plus p_cur_slot * and then take away 2, we will know how many is * the max slots we can use. */ if (hpts->p_nxt_slot > hpts->p_cur_slot) { /* * Non-wrap case nxt_slot <-> cur_slot we * don't want to land in. So the diff gives * us what is taken away from the number of * slots. */ largest_slot = NUM_OF_HPTSI_SLOTS - (hpts->p_nxt_slot - hpts->p_cur_slot); } else if (hpts->p_nxt_slot == hpts->p_cur_slot) { largest_slot = NUM_OF_HPTSI_SLOTS - 2; } else { /* * Wrap case so the diff gives us the number * of slots that we can land in. */ largest_slot = hpts->p_cur_slot - hpts->p_nxt_slot; } /* * We take away two so we never have a problem (20 * usec's) out of 1024000 usecs */ largest_slot -= 2; if (inp->inp_hpts_request > largest_slot) { /* * Restrict max jump of slots and remember * leftover */ slot = largest_slot; inp->inp_hpts_request -= largest_slot; } else { /* This one will run when we hit it */ inp->inp_hpts_request = 0; } if (hpts->p_nxt_slot == hpts->p_cur_slot) slot_calc = (hpts->p_nxt_slot + slot) % NUM_OF_HPTSI_SLOTS; else slot_calc = (hpts->p_nxt_slot + slot - 1) % NUM_OF_HPTSI_SLOTS; if (slot_calc == hpts->p_cur_slot) { #ifdef INVARIANTS /* TSNH */ panic("Hpts:%p impossible slot calculation slot_calc:%u slot:%u largest:%u\n", hpts, slot_calc, slot, largest_slot); #endif if (slot_calc) slot_calc--; else slot_calc = NUM_OF_HPTSI_SLOTS - 1; } inp->inp_hptsslot = slot_calc; if (diag) { diag->inp_hptsslot = inp->inp_hptsslot; } } else { /* * The hpts is sleeping, we need to figure out where * it will wake up at and if we need to reschedule * its time-out. */ uint32_t have_slept, yet_to_sleep; uint32_t slot_now; struct timeval tv; ticknow = tcp_gethptstick(&tv); slot_now = ticknow % NUM_OF_HPTSI_SLOTS; /* * The user wants to be inserted at (slot_now + * slot) % NUM_OF_HPTSI_SLOTS, so lets set that up. */ largest_slot = NUM_OF_HPTSI_SLOTS - 2; if (inp->inp_hpts_request > largest_slot) { /* Adjust the residual in inp_hpts_request */ slot = largest_slot; inp->inp_hpts_request -= largest_slot; } else { /* No residual it all fits */ inp->inp_hpts_request = 0; } inp->inp_hptsslot = (slot_now + slot) % NUM_OF_HPTSI_SLOTS; if (diag) { diag->slot_now = slot_now; diag->inp_hptsslot = inp->inp_hptsslot; diag->p_on_min_sleep = hpts->p_on_min_sleep; } if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERT_SLEEPER, slot, ticknow); } /* Now do we need to restart the hpts's timer? */ if (TSTMP_GT(ticknow, hpts->p_curtick)) have_slept = ticknow - hpts->p_curtick; else have_slept = 0; if (have_slept < hpts->p_hpts_sleep_time) { /* This should be what happens */ yet_to_sleep = hpts->p_hpts_sleep_time - have_slept; } else { /* We are over-due */ yet_to_sleep = 0; need_wakeup = 1; } if (diag) { diag->have_slept = have_slept; diag->yet_to_sleep = yet_to_sleep; diag->hpts_sleep_time = hpts->p_hpts_sleep_time; } if ((hpts->p_on_min_sleep == 0) && (yet_to_sleep > slot)) { /* * We need to reschedule the hptss time-out. */ hpts->p_hpts_sleep_time = slot; need_new_to = slot * HPTS_TICKS_PER_USEC; } } hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref); if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_INSERTED, slot, ticknow); } /* * Now how far is the hpts sleeping to? if active is 1, its * up and ticking we do nothing, otherwise we may need to * reschedule its callout if need_new_to is set from above. */ if (need_wakeup) { if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_RESCHEDULE, 1, 0); } hpts->p_direct_wake = 1; tcp_wakehpts(hpts); if (diag) { diag->need_new_to = 0; diag->co_ret = 0xffff0000; } } else if (need_new_to) { int32_t co_ret; struct timeval tv; sbintime_t sb; tv.tv_sec = 0; tv.tv_usec = 0; while (need_new_to > HPTS_USEC_IN_SEC) { tv.tv_sec++; need_new_to -= HPTS_USEC_IN_SEC; } tv.tv_usec = need_new_to; sb = tvtosbt(tv); if (tcp_hpts_callout_skip_swi == 0) { co_ret = callout_reset_sbt_on(&hpts->co, sb, 0, hpts_timeout_swi, hpts, hpts->p_cpu, (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))); } else { co_ret = callout_reset_sbt_on(&hpts->co, sb, 0, hpts_timeout_dir, hpts, hpts->p_cpu, C_PREL(tcp_hpts_precision)); } if (diag) { diag->need_new_to = need_new_to; diag->co_ret = co_ret; } } } else { #ifdef INVARIANTS panic("Hpts:%p tp:%p already on hpts and add?", hpts, inp); #endif } } uint32_t tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag){ struct tcp_hpts_entry *hpts; uint32_t slot_on, cts; struct timeval tv; /* * We now return the next-slot the hpts will be on, beyond its * current run (if up) or where it was when it stopped if it is * sleeping. */ INP_WLOCK_ASSERT(inp); hpts = tcp_hpts_lock(inp); if (in_ts_percision) microuptime(&tv); else getmicrouptime(&tv); cts = tcp_tv_to_usectick(&tv); tcp_hpts_insert_locked(hpts, inp, slot, cts, line, diag, 0); slot_on = hpts->p_nxt_slot; mtx_unlock(&hpts->p_mtx); return (slot_on); } uint32_t __tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){ return (tcp_hpts_insert_diag(inp, slot, line, NULL)); } int __tcp_queue_to_input_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line) { int32_t retval = 0; HPTS_MTX_ASSERT(hpts); if (inp->inp_in_input == 0) { /* Ok we need to set it on the hpts in the current slot */ hpts_sane_input_insert(hpts, inp, line); retval = 1; if (hpts->p_hpts_active == 0) { /* * Activate the hpts if it is sleeping. */ if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_WAKEUP_INPUT, 0, 0); } retval = 2; hpts->p_direct_wake = 1; tcp_wakeinput(hpts); } } else if (hpts->p_hpts_active == 0) { retval = 4; hpts->p_direct_wake = 1; tcp_wakeinput(hpts); } return (retval); } void tcp_queue_pkt_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked) { /* Setup packet for input first */ INP_WLOCK_ASSERT(tp->t_inpcb); m->m_pkthdr.pace_thoff = (uint16_t) ((caddr_t)th - mtod(m, caddr_t)); m->m_pkthdr.pace_tlen = (uint16_t) tlen; m->m_pkthdr.pace_drphdrlen = drop_hdrlen; m->m_pkthdr.pace_tos = iptos; m->m_pkthdr.pace_lock = (uint8_t) ti_locked; if (tp->t_in_pkt == NULL) { tp->t_in_pkt = m; tp->t_tail_pkt = m; } else { tp->t_tail_pkt->m_nextpkt = m; tp->t_tail_pkt = m; } } int32_t __tcp_queue_to_input(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, int32_t tlen, int32_t drop_hdrlen, uint8_t iptos, uint8_t ti_locked, int32_t line){ struct tcp_hpts_entry *hpts; int32_t ret; tcp_queue_pkt_to_input(tp, m, th, tlen, drop_hdrlen, iptos, ti_locked); hpts = tcp_input_lock(tp->t_inpcb); ret = __tcp_queue_to_input_locked(tp->t_inpcb, hpts, line); mtx_unlock(&hpts->p_mtx); return (ret); } void __tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line) { struct tcp_hpts_entry *hpts; struct tcpcb *tp; tp = intotcpcb(inp); hpts = tcp_input_lock(tp->t_inpcb); if (inp->inp_in_input == 0) { /* Ok we need to set it on the hpts in the current slot */ hpts_sane_input_insert(hpts, inp, line); if (hpts->p_hpts_active == 0) { /* * Activate the hpts if it is sleeping. */ hpts->p_direct_wake = 1; tcp_wakeinput(hpts); } } else if (hpts->p_hpts_active == 0) { hpts->p_direct_wake = 1; tcp_wakeinput(hpts); } inp->inp_hpts_drop_reas = reason; mtx_unlock(&hpts->p_mtx); } static uint16_t hpts_random_cpu(struct inpcb *inp){ /* * No flow type set distribute the load randomly. */ uint16_t cpuid; uint32_t ran; /* * If one has been set use it i.e. we want both in and out on the * same hpts. */ if (inp->inp_input_cpu_set) { return (inp->inp_input_cpu); } else if (inp->inp_hpts_cpu_set) { return (inp->inp_hpts_cpu); } /* Nothing set use a random number */ ran = arc4random(); cpuid = (ran & 0xffff) % mp_ncpus; return (cpuid); } static uint16_t hpts_cpuid(struct inpcb *inp){ uint16_t cpuid; /* * If one has been set use it i.e. we want both in and out on the * same hpts. */ if (inp->inp_input_cpu_set) { return (inp->inp_input_cpu); } else if (inp->inp_hpts_cpu_set) { return (inp->inp_hpts_cpu); } /* If one is set the other must be the same */ #ifdef RSS cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); if (cpuid == NETISR_CPUID_NONE) return (hpts_random_cpu(inp)); else return (cpuid); #else /* * We don't have a flowid -> cpuid mapping, so cheat and just map * unknown cpuids to curcpu. Not the best, but apparently better * than defaulting to swi 0. */ if (inp->inp_flowtype != M_HASHTYPE_NONE) { cpuid = inp->inp_flowid % mp_ncpus; return (cpuid); } cpuid = hpts_random_cpu(inp); return (cpuid); #endif } /* * Do NOT try to optimize the processing of inp's * by first pulling off all the inp's into a temporary * list (e.g. TAILQ_CONCAT). If you do that the subtle * interactions of switching CPU's will kill because of * problems in the linked list manipulation. Basically * you would switch cpu's with the hpts mutex locked * but then while you were processing one of the inp's * some other one that you switch will get a new * packet on the different CPU. It will insert it * on the new hptss input list. Creating a temporary * link in the inp will not fix it either, since * the other hpts will be doing the same thing and * you will both end up using the temporary link. * * You will die in an ASSERT for tailq corruption if you * run INVARIANTS or you will die horribly without * INVARIANTS in some unknown way with a corrupt linked * list. */ static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv) { struct mbuf *m, *n; struct tcpcb *tp; struct inpcb *inp; uint16_t drop_reason; int16_t set_cpu; uint32_t did_prefetch = 0; int32_t ti_locked = TI_UNLOCKED; HPTS_MTX_ASSERT(hpts); while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) { HPTS_MTX_ASSERT(hpts); hpts_sane_input_remove(hpts, inp, 0); if (inp->inp_input_cpu_set == 0) { set_cpu = 1; } else { set_cpu = 0; } hpts->p_inp = inp; drop_reason = inp->inp_hpts_drop_reas; inp->inp_in_input = 0; mtx_unlock(&hpts->p_mtx); if (drop_reason) { INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; } else { ti_locked = TI_UNLOCKED; } INP_WLOCK(inp); if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) || (inp->inp_flags2 & INP_FREED)) { out: hpts->p_inp = NULL; if (ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); } if (in_pcbrele_wlocked(inp) == 0) { INP_WUNLOCK(inp); } ti_locked = TI_UNLOCKED; mtx_lock(&hpts->p_mtx); continue; } tp = intotcpcb(inp); if ((tp == NULL) || (tp->t_inpcb == NULL)) { goto out; } if (drop_reason) { /* This tcb is being destroyed for drop_reason */ m = tp->t_in_pkt; if (m) n = m->m_nextpkt; else n = NULL; tp->t_in_pkt = NULL; while (m) { m_freem(m); m = n; if (m) n = m->m_nextpkt; } tp = tcp_drop(tp, drop_reason); INP_INFO_RUNLOCK(&V_tcbinfo); if (tp == NULL) { INP_WLOCK(inp); } if (in_pcbrele_wlocked(inp) == 0) INP_WUNLOCK(inp); mtx_lock(&hpts->p_mtx); continue; } if (set_cpu) { /* * Setup so the next time we will move to the right * CPU. This should be a rare event. It will * sometimes happens when we are the client side * (usually not the server). Somehow tcp_output() * gets called before the tcp_do_segment() sets the * intial state. This means the r_cpu and r_hpts_cpu * is 0. We get on the hpts, and then tcp_input() * gets called setting up the r_cpu to the correct * value. The hpts goes off and sees the mis-match. * We simply correct it here and the CPU will switch * to the new hpts nextime the tcb gets added to the * the hpts (not this time) :-) */ tcp_set_hpts(inp); } CURVNET_SET(tp->t_vnet); m = tp->t_in_pkt; n = NULL; if (m != NULL && (m->m_pkthdr.pace_lock == TI_RLOCKED || tp->t_state != TCPS_ESTABLISHED)) { ti_locked = TI_RLOCKED; if (tcp_hptsi_lock_inpinfo(inp, &tp)) { CURVNET_RESTORE(); goto out; } m = tp->t_in_pkt; } if (in_newts_every_tcb) { if (in_ts_percision) microuptime(tv); else getmicrouptime(tv); } if (tp->t_fb_ptr != NULL) { kern_prefetch(tp->t_fb_ptr, &did_prefetch); did_prefetch = 1; } /* Any input work to do, if so do it first */ if ((m != NULL) && (m == tp->t_in_pkt)) { struct tcphdr *th; int32_t tlen, drop_hdrlen, nxt_pkt; uint8_t iptos; n = m->m_nextpkt; tp->t_in_pkt = tp->t_tail_pkt = NULL; while (m) { th = (struct tcphdr *)(mtod(m, caddr_t)+m->m_pkthdr.pace_thoff); tlen = m->m_pkthdr.pace_tlen; drop_hdrlen = m->m_pkthdr.pace_drphdrlen; iptos = m->m_pkthdr.pace_tos; m->m_nextpkt = NULL; if (n) nxt_pkt = 1; else nxt_pkt = 0; inp->inp_input_calls = 1; if (tp->t_fb->tfb_tcp_hpts_do_segment) { /* Use the hpts specific do_segment */ (*tp->t_fb->tfb_tcp_hpts_do_segment) (m, th, inp->inp_socket, tp, drop_hdrlen, tlen, iptos, ti_locked, nxt_pkt, tv); } else { /* Use the default do_segment */ (*tp->t_fb->tfb_tcp_do_segment) (m, th, inp->inp_socket, tp, drop_hdrlen, tlen, iptos, ti_locked); } /* * Do segment returns unlocked we need the * lock again but we also need some kasserts * here. */ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_UNLOCK_ASSERT(inp); m = n; if (m) n = m->m_nextpkt; if (m != NULL && m->m_pkthdr.pace_lock == TI_RLOCKED) { INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; } else ti_locked = TI_UNLOCKED; INP_WLOCK(inp); /* * Since we have an opening here we must * re-check if the tcb went away while we * were getting the lock(s). */ if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) || (inp->inp_flags2 & INP_FREED)) { out_free: while (m) { m_freem(m); m = n; if (m) n = m->m_nextpkt; } CURVNET_RESTORE(); goto out; } /* * Now that we hold the INP lock, check if * we need to upgrade our lock. */ if (ti_locked == TI_UNLOCKED && (tp->t_state != TCPS_ESTABLISHED)) { ti_locked = TI_RLOCKED; if (tcp_hptsi_lock_inpinfo(inp, &tp)) goto out_free; } } /** end while(m) */ } /** end if ((m != NULL) && (m == tp->t_in_pkt)) */ if (in_pcbrele_wlocked(inp) == 0) INP_WUNLOCK(inp); if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_UNLOCK_ASSERT(inp); ti_locked = TI_UNLOCKED; mtx_lock(&hpts->p_mtx); hpts->p_inp = NULL; CURVNET_RESTORE(); } } static int tcp_hpts_est_run(struct tcp_hpts_entry *hpts) { int32_t ticks_to_run; if (hpts->p_prevtick && (SEQ_GT(hpts->p_curtick, hpts->p_prevtick))) { ticks_to_run = hpts->p_curtick - hpts->p_prevtick; if (ticks_to_run >= (NUM_OF_HPTSI_SLOTS - 1)) { ticks_to_run = NUM_OF_HPTSI_SLOTS - 2; } } else { if (hpts->p_prevtick == hpts->p_curtick) { /* This happens when we get woken up right away */ return (-1); } ticks_to_run = 1; } /* Set in where we will be when we catch up */ hpts->p_nxt_slot = (hpts->p_cur_slot + ticks_to_run) % NUM_OF_HPTSI_SLOTS; if (hpts->p_nxt_slot == hpts->p_cur_slot) { panic("Impossible math -- hpts:%p p_nxt_slot:%d p_cur_slot:%d ticks_to_run:%d", hpts, hpts->p_nxt_slot, hpts->p_cur_slot, ticks_to_run); } return (ticks_to_run); } static void tcp_hptsi(struct tcp_hpts_entry *hpts, struct timeval *ctick) { struct tcpcb *tp; struct inpcb *inp = NULL, *ninp; struct timeval tv; int32_t ticks_to_run, i, error, tick_now, interum_tick; int32_t paced_cnt = 0; int32_t did_prefetch = 0; int32_t prefetch_ninp = 0; int32_t prefetch_tp = 0; uint32_t cts; int16_t set_cpu; HPTS_MTX_ASSERT(hpts); hpts->p_curtick = tcp_tv_to_hptstick(ctick); cts = tcp_tv_to_usectick(ctick); memcpy(&tv, ctick, sizeof(struct timeval)); hpts->p_cur_slot = hpts_tick(hpts, 1); /* Figure out if we had missed ticks */ again: HPTS_MTX_ASSERT(hpts); ticks_to_run = tcp_hpts_est_run(hpts); if (!TAILQ_EMPTY(&hpts->p_input)) { tcp_input_data(hpts, &tv); } #ifdef INVARIANTS if (TAILQ_EMPTY(&hpts->p_input) && (hpts->p_on_inqueue_cnt != 0)) { panic("tp:%p in_hpts input empty but cnt:%d", hpts, hpts->p_on_inqueue_cnt); } #endif HPTS_MTX_ASSERT(hpts); /* Reset the ticks to run and time if we need too */ interum_tick = tcp_gethptstick(&tv); if (interum_tick != hpts->p_curtick) { /* Save off the new time we execute to */ *ctick = tv; hpts->p_curtick = interum_tick; cts = tcp_tv_to_usectick(&tv); hpts->p_cur_slot = hpts_tick(hpts, 1); ticks_to_run = tcp_hpts_est_run(hpts); } if (ticks_to_run == -1) { goto no_run; } if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_SETTORUN, ticks_to_run, 0); } if (hpts->p_on_queue_cnt == 0) { goto no_one; } HPTS_MTX_ASSERT(hpts); for (i = 0; i < ticks_to_run; i++) { /* * Calculate our delay, if there are no extra ticks there * was not any */ hpts->p_delayed_by = (ticks_to_run - (i + 1)) * HPTS_TICKS_PER_USEC; HPTS_MTX_ASSERT(hpts); while ((inp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_cur_slot])) != NULL) { /* For debugging */ if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_HPTSI, ticks_to_run, i); } hpts->p_inp = inp; paced_cnt++; if (hpts->p_cur_slot != inp->inp_hptsslot) { panic("Hpts:%p inp:%p slot mis-aligned %u vs %u", hpts, inp, hpts->p_cur_slot, inp->inp_hptsslot); } /* Now pull it */ if (inp->inp_hpts_cpu_set == 0) { set_cpu = 1; } else { set_cpu = 0; } hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[hpts->p_cur_slot], 0); if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_cur_slot])) != NULL) { /* We prefetch the next inp if possible */ kern_prefetch(ninp, &prefetch_ninp); prefetch_ninp = 1; } if (inp->inp_hpts_request) { /* * This guy is deferred out further in time * then our wheel had on it. Push him back * on the wheel. */ int32_t remaining_slots; remaining_slots = ticks_to_run - (i + 1); if (inp->inp_hpts_request > remaining_slots) { /* * Keep INVARIANTS happy by clearing * the flag */ tcp_hpts_insert_locked(hpts, inp, inp->inp_hpts_request, cts, __LINE__, NULL, 1); hpts->p_inp = NULL; continue; } inp->inp_hpts_request = 0; } /* * We clear the hpts flag here after dealing with * remaining slots. This way anyone looking with the * TCB lock will see its on the hpts until just * before we unlock. */ inp->inp_in_hpts = 0; mtx_unlock(&hpts->p_mtx); INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) { mtx_lock(&hpts->p_mtx); if (logging_on) tcp_hpts_log_it(hpts, hpts->p_inp, HPTSLOG_INP_DONE, 0, 1); hpts->p_inp = NULL; continue; } if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { out_now: #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx)) { panic("Hpts:%p owns mtx prior-to lock line:%d", hpts, __LINE__); } #endif INP_WUNLOCK(inp); mtx_lock(&hpts->p_mtx); if (logging_on) tcp_hpts_log_it(hpts, hpts->p_inp, HPTSLOG_INP_DONE, 0, 3); hpts->p_inp = NULL; continue; } tp = intotcpcb(inp); if ((tp == NULL) || (tp->t_inpcb == NULL)) { goto out_now; } if (set_cpu) { /* * Setup so the next time we will move to * the right CPU. This should be a rare * event. It will sometimes happens when we * are the client side (usually not the * server). Somehow tcp_output() gets called * before the tcp_do_segment() sets the * intial state. This means the r_cpu and * r_hpts_cpu is 0. We get on the hpts, and * then tcp_input() gets called setting up * the r_cpu to the correct value. The hpts * goes off and sees the mis-match. We * simply correct it here and the CPU will * switch to the new hpts nextime the tcb * gets added to the the hpts (not this one) * :-) */ tcp_set_hpts(inp); } if (out_newts_every_tcb) { struct timeval sv; if (out_ts_percision) microuptime(&sv); else getmicrouptime(&sv); cts = tcp_tv_to_usectick(&sv); } CURVNET_SET(tp->t_vnet); /* * There is a hole here, we get the refcnt on the * inp so it will still be preserved but to make * sure we can get the INP we need to hold the p_mtx * above while we pull out the tp/inp, as long as * fini gets the lock first we are assured of having * a sane INP we can lock and test. */ #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx)) { panic("Hpts:%p owns mtx before tcp-output:%d", hpts, __LINE__); } #endif if (tp->t_fb_ptr != NULL) { kern_prefetch(tp->t_fb_ptr, &did_prefetch); did_prefetch = 1; } inp->inp_hpts_calls = 1; if (tp->t_fb->tfb_tcp_output_wtime != NULL) { error = (*tp->t_fb->tfb_tcp_output_wtime) (tp, &tv); } else { error = tp->t_fb->tfb_tcp_output(tp); } if (ninp && ninp->inp_ppcb) { /* * If we have a nxt inp, see if we can * prefetch its ppcb. Note this may seem * "risky" since we have no locks (other * than the previous inp) and there no * assurance that ninp was not pulled while * we were processing inp and freed. If this * occured it could mean that either: * * a) Its NULL (which is fine we won't go * here) b) Its valid (which is cool we * will prefetch it) c) The inp got * freed back to the slab which was * reallocated. Then the piece of memory was * re-used and something else (not an * address) is in inp_ppcb. If that occurs * we don't crash, but take a TLB shootdown * performance hit (same as if it was NULL * and we tried to pre-fetch it). * * Considering that the likelyhood of is * quite rare we will take a risk on doing * this. If performance drops after testing * we can always take this out. NB: the * kern_prefetch on amd64 actually has * protection against a bad address now via * the DMAP_() tests. This will prevent the * TLB hit, and instead if occurs just * cause us to load cache with a useless * address (to us). */ kern_prefetch(ninp->inp_ppcb, &prefetch_tp); prefetch_tp = 1; } INP_WUNLOCK(inp); INP_UNLOCK_ASSERT(inp); CURVNET_RESTORE(); #ifdef INVARIANTS if (mtx_owned(&hpts->p_mtx)) { panic("Hpts:%p owns mtx prior-to lock line:%d", hpts, __LINE__); } #endif mtx_lock(&hpts->p_mtx); if (logging_on) tcp_hpts_log_it(hpts, hpts->p_inp, HPTSLOG_INP_DONE, 0, 4); hpts->p_inp = NULL; } HPTS_MTX_ASSERT(hpts); hpts->p_inp = NULL; hpts->p_cur_slot++; if (hpts->p_cur_slot >= NUM_OF_HPTSI_SLOTS) { hpts->p_cur_slot = 0; } } no_one: HPTS_MTX_ASSERT(hpts); hpts->p_prevtick = hpts->p_curtick; hpts->p_delayed_by = 0; /* * Check to see if we took an excess amount of time and need to run * more ticks (if we did not hit eno-bufs). */ /* Re-run any input that may be there */ (void)tcp_gethptstick(&tv); if (!TAILQ_EMPTY(&hpts->p_input)) { tcp_input_data(hpts, &tv); } #ifdef INVARIANTS if (TAILQ_EMPTY(&hpts->p_input) && (hpts->p_on_inqueue_cnt != 0)) { panic("tp:%p in_hpts input empty but cnt:%d", hpts, hpts->p_on_inqueue_cnt); } #endif tick_now = tcp_gethptstick(&tv); if (SEQ_GT(tick_now, hpts->p_prevtick)) { struct timeval res; /* Did we really spend a full tick or more in here? */ timersub(&tv, ctick, &res); if (res.tv_sec || (res.tv_usec >= HPTS_TICKS_PER_USEC)) { counter_u64_add(hpts_loops, 1); if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_TOLONG, (uint32_t) res.tv_usec, tick_now); } *ctick = res; hpts->p_curtick = tick_now; goto again; } } no_run: { uint32_t t = 0, i, fnd = 0; if (hpts->p_on_queue_cnt) { /* * Find next slot that is occupied and use that to * be the sleep time. */ for (i = 1, t = hpts->p_nxt_slot; i < NUM_OF_HPTSI_SLOTS; i++) { if (TAILQ_EMPTY(&hpts->p_hptss[t]) == 0) { fnd = 1; break; } t = (t + 1) % NUM_OF_HPTSI_SLOTS; } if (fnd) { hpts->p_hpts_sleep_time = i; } else { counter_u64_add(back_tosleep, 1); #ifdef INVARIANTS panic("Hpts:%p cnt:%d but non found", hpts, hpts->p_on_queue_cnt); #endif hpts->p_on_queue_cnt = 0; goto non_found; } t++; } else { /* No one on the wheel sleep for all but 2 slots */ non_found: if (hpts_sleep_max == 0) hpts_sleep_max = 1; hpts->p_hpts_sleep_time = min((NUM_OF_HPTSI_SLOTS - 2), hpts_sleep_max); t = 0; } if (logging_on) { tcp_hpts_log_it(hpts, inp, HPTSLOG_SLEEPSET, t, (hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC)); } } } void __tcp_set_hpts(struct inpcb *inp, int32_t line) { struct tcp_hpts_entry *hpts; INP_WLOCK_ASSERT(inp); hpts = tcp_hpts_lock(inp); if ((inp->inp_in_hpts == 0) && (inp->inp_hpts_cpu_set == 0)) { inp->inp_hpts_cpu = hpts_cpuid(inp); inp->inp_hpts_cpu_set = 1; } mtx_unlock(&hpts->p_mtx); hpts = tcp_input_lock(inp); if ((inp->inp_input_cpu_set == 0) && (inp->inp_in_input == 0)) { inp->inp_input_cpu = hpts_cpuid(inp); inp->inp_input_cpu_set = 1; } mtx_unlock(&hpts->p_mtx); } uint16_t tcp_hpts_delayedby(struct inpcb *inp){ return (tcp_pace.rp_ent[inp->inp_hpts_cpu]->p_delayed_by); } static void tcp_hpts_thread(void *ctx) { struct tcp_hpts_entry *hpts; struct timeval tv; sbintime_t sb; hpts = (struct tcp_hpts_entry *)ctx; mtx_lock(&hpts->p_mtx); if (hpts->p_direct_wake) { /* Signaled by input */ if (logging_on) tcp_hpts_log_it(hpts, NULL, HPTSLOG_AWAKE, 1, 1); callout_stop(&hpts->co); } else { /* Timed out */ if (callout_pending(&hpts->co) || !callout_active(&hpts->co)) { if (logging_on) tcp_hpts_log_it(hpts, NULL, HPTSLOG_AWAKE, 2, 2); mtx_unlock(&hpts->p_mtx); return; } callout_deactivate(&hpts->co); if (logging_on) tcp_hpts_log_it(hpts, NULL, HPTSLOG_AWAKE, 3, 3); } hpts->p_hpts_active = 1; (void)tcp_gethptstick(&tv); tcp_hptsi(hpts, &tv); HPTS_MTX_ASSERT(hpts); tv.tv_sec = 0; tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC; if (tcp_min_hptsi_time && (tv.tv_usec < tcp_min_hptsi_time)) { tv.tv_usec = tcp_min_hptsi_time; hpts->p_on_min_sleep = 1; } else { /* Clear the min sleep flag */ hpts->p_on_min_sleep = 0; } hpts->p_hpts_active = 0; sb = tvtosbt(tv); if (tcp_hpts_callout_skip_swi == 0) { callout_reset_sbt_on(&hpts->co, sb, 0, hpts_timeout_swi, hpts, hpts->p_cpu, (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))); } else { callout_reset_sbt_on(&hpts->co, sb, 0, hpts_timeout_dir, hpts, hpts->p_cpu, C_PREL(tcp_hpts_precision)); } hpts->p_direct_wake = 0; mtx_unlock(&hpts->p_mtx); } #undef timersub static void tcp_init_hptsi(void *st) { int32_t i, j, error, bound = 0, created = 0; size_t sz, asz; struct timeval tv; sbintime_t sb; struct tcp_hpts_entry *hpts; char unit[16]; uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU; tcp_pace.rp_proc = NULL; tcp_pace.rp_num_hptss = ncpus; hpts_loops = counter_u64_alloc(M_WAITOK); back_tosleep = counter_u64_alloc(M_WAITOK); sz = (tcp_pace.rp_num_hptss * sizeof(struct tcp_hpts_entry *)); tcp_pace.rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO); asz = sizeof(struct hptsh) * NUM_OF_HPTSI_SLOTS; for (i = 0; i < tcp_pace.rp_num_hptss; i++) { tcp_pace.rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry), M_TCPHPTS, M_WAITOK | M_ZERO); tcp_pace.rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS, M_WAITOK); hpts = tcp_pace.rp_ent[i]; /* * Init all the hpts structures that are not specifically * zero'd by the allocations. Also lets attach them to the * appropriate sysctl block as well. */ mtx_init(&hpts->p_mtx, "tcp_hpts_lck", "hpts", MTX_DEF | MTX_DUPOK); TAILQ_INIT(&hpts->p_input); for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) { TAILQ_INIT(&hpts->p_hptss[j]); } sysctl_ctx_init(&hpts->hpts_ctx); sprintf(unit, "%d", i); hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx, SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts), OID_AUTO, unit, CTLFLAG_RW, 0, ""); SYSCTL_ADD_INT(&hpts->hpts_ctx, SYSCTL_CHILDREN(hpts->hpts_root), OID_AUTO, "in_qcnt", CTLFLAG_RD, &hpts->p_on_inqueue_cnt, 0, "Count TCB's awaiting input processing"); SYSCTL_ADD_INT(&hpts->hpts_ctx, SYSCTL_CHILDREN(hpts->hpts_root), OID_AUTO, "out_qcnt", CTLFLAG_RD, &hpts->p_on_queue_cnt, 0, "Count TCB's awaiting output processing"); SYSCTL_ADD_UINT(&hpts->hpts_ctx, SYSCTL_CHILDREN(hpts->hpts_root), OID_AUTO, "active", CTLFLAG_RD, &hpts->p_hpts_active, 0, "Is the hpts active"); SYSCTL_ADD_UINT(&hpts->hpts_ctx, SYSCTL_CHILDREN(hpts->hpts_root), OID_AUTO, "curslot", CTLFLAG_RD, &hpts->p_cur_slot, 0, "What the current slot is if active"); SYSCTL_ADD_UINT(&hpts->hpts_ctx, SYSCTL_CHILDREN(hpts->hpts_root), OID_AUTO, "curtick", CTLFLAG_RD, &hpts->p_curtick, 0, "What the current tick on if active"); SYSCTL_ADD_UINT(&hpts->hpts_ctx, SYSCTL_CHILDREN(hpts->hpts_root), OID_AUTO, "logsize", CTLFLAG_RD, &hpts->p_logsize, 0, "Hpts logging buffer size"); hpts->p_hpts_sleep_time = NUM_OF_HPTSI_SLOTS - 2; hpts->p_num = i; hpts->p_prevtick = hpts->p_curtick = tcp_gethptstick(&tv); hpts->p_prevtick -= 1; hpts->p_prevtick %= NUM_OF_HPTSI_SLOTS; hpts->p_cpu = 0xffff; hpts->p_nxt_slot = 1; hpts->p_logsize = tcp_hpts_logging_size; if (hpts->p_logsize) { sz = (sizeof(struct hpts_log) * hpts->p_logsize); hpts->p_log = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO); } callout_init(&hpts->co, 1); } /* * Now lets start ithreads to handle the hptss. */ CPU_FOREACH(i) { hpts = tcp_pace.rp_ent[i]; hpts->p_cpu = i; error = swi_add(&hpts->ie, "hpts", tcp_hpts_thread, (void *)hpts, SWI_NET, INTR_MPSAFE, &hpts->ie_cookie); if (error) { panic("Can't add hpts:%p i:%d err:%d", hpts, i, error); } created++; if (tcp_bind_threads) { if (intr_event_bind(hpts->ie, i) == 0) bound++; } tv.tv_sec = 0; tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC; sb = tvtosbt(tv); if (tcp_hpts_callout_skip_swi == 0) { callout_reset_sbt_on(&hpts->co, sb, 0, hpts_timeout_swi, hpts, hpts->p_cpu, (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))); } else { callout_reset_sbt_on(&hpts->co, sb, 0, hpts_timeout_dir, hpts, hpts->p_cpu, C_PREL(tcp_hpts_precision)); } } printf("TCP Hpts created %d swi interrupt thread and bound %d\n", created, bound); return; } SYSINIT(tcphptsi, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, tcp_init_hptsi, NULL); +MODULE_VERSION(tcphpts, 1); Index: projects/pnfs-planb-server/sys/netinet/tcp_stacks/rack.c =================================================================== --- projects/pnfs-planb-server/sys/netinet/tcp_stacks/rack.c (revision 334966) +++ projects/pnfs-planb-server/sys/netinet/tcp_stacks/rack.c (revision 334967) @@ -1,9164 +1,9161 @@ /*- * Copyright (c) 2016-2018 * Netflix Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #ifdef TCP_HHOOK #include #endif #include #include #include #include #include #include /* for proc0 declaration */ #include #include #include #include #ifdef NETFLIX_STATS #include #endif #include #include #include #include #include #include #include #include #define TCPSTATES /* for logging */ #include #include #include #include #include /* required for icmp_var.h */ #include /* for ICMP_BANDLIM */ #include #include #include #include #define TCPOUTFLAGS #include #include #include #include #include #include #include #include #include #ifdef NETFLIX_CWV #include #endif #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ #ifdef TCP_OFFLOAD #include #endif #ifdef INET6 #include #endif #include #if defined(IPSEC) || defined(IPSEC_SUPPORT) #include #include #endif /* IPSEC */ #include #include #include #ifdef MAC #include #endif #include "sack_filter.h" #include "tcp_rack.h" #include "rack_bbr_common.h" uma_zone_t rack_zone; uma_zone_t rack_pcb_zone; #ifndef TICKS2SBT #define TICKS2SBT(__t) (tick_sbt * ((sbintime_t)(__t))) #endif struct sysctl_ctx_list rack_sysctl_ctx; struct sysctl_oid *rack_sysctl_root; -#ifndef TCPHPTS -fatal error missing option TCPHSTS in the build; -#endif - #define CUM_ACKED 1 #define SACKED 2 /* * The RACK module incorporates a number of * TCP ideas that have been put out into the IETF * over the last few years: * - Matt Mathis's Rate Halving which slowly drops * the congestion window so that the ack clock can * be maintained during a recovery. * - Yuchung Cheng's RACK TCP (for which its named) that * will stop us using the number of dup acks and instead * use time as the gage of when we retransmit. * - Reorder Detection of RFC4737 and the Tail-Loss probe draft * of Dukkipati et.al. * RACK depends on SACK, so if an endpoint arrives that * cannot do SACK the state machine below will shuttle the * connection back to using the "default" TCP stack that is * in FreeBSD. * * To implement RACK the original TCP stack was first decomposed * into a functional state machine with individual states * for each of the possible TCP connection states. The do_segement * functions role in life is to mandate the connection supports SACK * initially and then assure that the RACK state matches the conenction * state before calling the states do_segment function. Each * state is simplified due to the fact that the original do_segment * has been decomposed and we *know* what state we are in (no * switches on the state) and all tests for SACK are gone. This * greatly simplifies what each state does. * * TCP output is also over-written with a new version since it * must maintain the new rack scoreboard. * */ static int32_t rack_precache = 1; static int32_t rack_tlp_thresh = 1; static int32_t rack_reorder_thresh = 2; static int32_t rack_reorder_fade = 60000; /* 0 - never fade, def 60,000 * - 60 seconds */ static int32_t rack_pkt_delay = 1; static int32_t rack_inc_var = 0;/* For TLP */ static int32_t rack_reduce_largest_on_idle = 0; static int32_t rack_min_pace_time = 0; static int32_t rack_min_pace_time_seg_req=6; static int32_t rack_early_recovery = 1; static int32_t rack_early_recovery_max_seg = 6; static int32_t rack_send_a_lot_in_prr = 1; static int32_t rack_min_to = 1; /* Number of ms minimum timeout */ static int32_t rack_tlp_in_recovery = 1; /* Can we do TLP in recovery? */ static int32_t rack_verbose_logging = 0; static int32_t rack_ignore_data_after_close = 1; /* * Currently regular tcp has a rto_min of 30ms * the backoff goes 12 times so that ends up * being a total of 122.850 seconds before a * connection is killed. */ static int32_t rack_tlp_min = 10; static int32_t rack_rto_min = 30; /* 30ms same as main freebsd */ static int32_t rack_rto_max = 30000; /* 30 seconds */ static const int32_t rack_free_cache = 2; static int32_t rack_hptsi_segments = 40; static int32_t rack_rate_sample_method = USE_RTT_LOW; static int32_t rack_pace_every_seg = 1; static int32_t rack_delayed_ack_time = 200; /* 200ms */ static int32_t rack_slot_reduction = 4; static int32_t rack_lower_cwnd_at_tlp = 0; static int32_t rack_use_proportional_reduce = 0; static int32_t rack_proportional_rate = 10; static int32_t rack_tlp_max_resend = 2; static int32_t rack_limited_retran = 0; static int32_t rack_always_send_oldest = 0; static int32_t rack_sack_block_limit = 128; static int32_t rack_use_sack_filter = 1; static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE; /* Rack specific counters */ counter_u64_t rack_badfr; counter_u64_t rack_badfr_bytes; counter_u64_t rack_rtm_prr_retran; counter_u64_t rack_rtm_prr_newdata; counter_u64_t rack_timestamp_mismatch; counter_u64_t rack_reorder_seen; counter_u64_t rack_paced_segments; counter_u64_t rack_unpaced_segments; counter_u64_t rack_saw_enobuf; counter_u64_t rack_saw_enetunreach; /* Tail loss probe counters */ counter_u64_t rack_tlp_tot; counter_u64_t rack_tlp_newdata; counter_u64_t rack_tlp_retran; counter_u64_t rack_tlp_retran_bytes; counter_u64_t rack_tlp_retran_fail; counter_u64_t rack_to_tot; counter_u64_t rack_to_arm_rack; counter_u64_t rack_to_arm_tlp; counter_u64_t rack_to_alloc; counter_u64_t rack_to_alloc_hard; counter_u64_t rack_to_alloc_emerg; counter_u64_t rack_sack_proc_all; counter_u64_t rack_sack_proc_short; counter_u64_t rack_sack_proc_restart; counter_u64_t rack_runt_sacks; counter_u64_t rack_used_tlpmethod; counter_u64_t rack_used_tlpmethod2; counter_u64_t rack_enter_tlp_calc; counter_u64_t rack_input_idle_reduces; counter_u64_t rack_tlp_does_nada; /* Temp CPU counters */ counter_u64_t rack_find_high; counter_u64_t rack_progress_drops; counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE]; counter_u64_t rack_opts_arry[RACK_OPTS_SIZE]; static void rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick, int event, int line); static int rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t * ti_locked, uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val); static int rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static void rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery); static struct rack_sendmap *rack_alloc(struct tcp_rack *rack); static struct rack_sendmap * rack_check_recovery_mode(struct tcpcb *tp, uint32_t tsused); static void rack_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); static void rack_counter_destroy(void); static int rack_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); static int32_t rack_ctor(void *mem, int32_t size, void *arg, int32_t how); static void rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, int32_t ti_locked); static void rack_dtor(void *mem, int32_t size, void *arg); static void rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm, uint32_t t, uint32_t cts); static struct rack_sendmap * rack_find_high_nonack(struct tcp_rack *rack, struct rack_sendmap *rsm); static struct rack_sendmap *rack_find_lowest_rsm(struct tcp_rack *rack); static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm); static void rack_fini(struct tcpcb *tp, int32_t tcb_is_purged); static int rack_get_sockopt(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack); static int32_t rack_handoff_ok(struct tcpcb *tp); static int32_t rack_init(struct tcpcb *tp); static void rack_init_sysctls(void); static void rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th); static void rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts, uint8_t pass, struct rack_sendmap *hintrsm); static void rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm); static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num); static int32_t rack_output(struct tcpcb *tp); static void rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, int32_t ti_locked, int32_t nxt_pkt, struct timeval *tv); static uint32_t rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts); static void rack_post_recovery(struct tcpcb *tp, struct tcphdr *th); static void rack_remxt_tmr(struct tcpcb *tp); static int rack_set_sockopt(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack); static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack); static int32_t rack_stopall(struct tcpcb *tp); static void rack_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta); static int32_t rack_timer_active(struct tcpcb *tp, uint32_t timer_type); static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line); static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type); static uint32_t rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp); static void rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t ts); static int rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type); static int32_t tcp_addrack(module_t mod, int32_t type, void *data); static void rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ti_locked, int32_t * ret_val); static int rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static void rack_do_drop(struct mbuf *m, struct tcpcb *tp, int32_t * ti_locked); static void rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t * ti_locked, int32_t thflags, int32_t tlen, int32_t * ret_val); static void rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t * ti_locked, int32_t rstreason, int32_t tlen); static int rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt); static int rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt); static int rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * ti_locked, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val); static int rack_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t * ti_locked); struct rack_sendmap * tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused); static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt); static void tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th); static int rack_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ti_locked, int32_t tlen, int32_t thflags, int32_t * ret_val); int32_t rack_clear_counter=0; static int sysctl_rack_clear(SYSCTL_HANDLER_ARGS) { uint32_t stat; int32_t error; error = SYSCTL_OUT(req, &rack_clear_counter, sizeof(uint32_t)); if (error || req->newptr == NULL) return error; error = SYSCTL_IN(req, &stat, sizeof(uint32_t)); if (error) return (error); if (stat == 1) { #ifdef INVARIANTS printf("Clearing RACK counters\n"); #endif counter_u64_zero(rack_badfr); counter_u64_zero(rack_badfr_bytes); counter_u64_zero(rack_rtm_prr_retran); counter_u64_zero(rack_rtm_prr_newdata); counter_u64_zero(rack_timestamp_mismatch); counter_u64_zero(rack_reorder_seen); counter_u64_zero(rack_tlp_tot); counter_u64_zero(rack_tlp_newdata); counter_u64_zero(rack_tlp_retran); counter_u64_zero(rack_tlp_retran_bytes); counter_u64_zero(rack_tlp_retran_fail); counter_u64_zero(rack_to_tot); counter_u64_zero(rack_to_arm_rack); counter_u64_zero(rack_to_arm_tlp); counter_u64_zero(rack_paced_segments); counter_u64_zero(rack_unpaced_segments); counter_u64_zero(rack_saw_enobuf); counter_u64_zero(rack_saw_enetunreach); counter_u64_zero(rack_to_alloc_hard); counter_u64_zero(rack_to_alloc_emerg); counter_u64_zero(rack_sack_proc_all); counter_u64_zero(rack_sack_proc_short); counter_u64_zero(rack_sack_proc_restart); counter_u64_zero(rack_to_alloc); counter_u64_zero(rack_find_high); counter_u64_zero(rack_runt_sacks); counter_u64_zero(rack_used_tlpmethod); counter_u64_zero(rack_used_tlpmethod2); counter_u64_zero(rack_enter_tlp_calc); counter_u64_zero(rack_progress_drops); counter_u64_zero(rack_tlp_does_nada); } rack_clear_counter = 0; return (0); } static void rack_init_sysctls() { SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "rate_sample_method", CTLFLAG_RW, &rack_rate_sample_method , USE_RTT_LOW, "What method should we use for rate sampling 0=high, 1=low "); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "data_after_close", CTLFLAG_RW, &rack_ignore_data_after_close, 0, "Do we hold off sending a RST until all pending data is ack'd"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlpmethod", CTLFLAG_RW, &rack_tlp_threshold_use, TLP_USE_TWO_ONE, "What method do we do for TLP time calc 0=no-de-ack-comp, 1=ID, 2=2.1, 3=2.2"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "min_pace_time", CTLFLAG_RW, &rack_min_pace_time, 0, "Should we enforce a minimum pace time of 1ms"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "min_pace_segs", CTLFLAG_RW, &rack_min_pace_time_seg_req, 6, "How many segments have to be in the len to enforce min-pace-time"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "idle_reduce_high", CTLFLAG_RW, &rack_reduce_largest_on_idle, 0, "Should we reduce the largest cwnd seen to IW on idle reduction"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "bb_verbose", CTLFLAG_RW, &rack_verbose_logging, 0, "Should RACK black box logging be verbose"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "sackfiltering", CTLFLAG_RW, &rack_use_sack_filter, 1, "Do we use sack filtering?"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "delayed_ack", CTLFLAG_RW, &rack_delayed_ack_time, 200, "Delayed ack time (200ms)"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlpminto", CTLFLAG_RW, &rack_tlp_min, 10, "TLP minimum timeout per the specification (10ms)"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "precache", CTLFLAG_RW, &rack_precache, 0, "Where should we precache the mcopy (0 is not at all)"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "sblklimit", CTLFLAG_RW, &rack_sack_block_limit, 128, "When do we start paying attention to small sack blocks"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "send_oldest", CTLFLAG_RW, &rack_always_send_oldest, 1, "Should we always send the oldest TLP and RACK-TLP"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "rack_tlp_in_recovery", CTLFLAG_RW, &rack_tlp_in_recovery, 1, "Can we do a TLP during recovery?"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "rack_tlimit", CTLFLAG_RW, &rack_limited_retran, 0, "How many times can a rack timeout drive out sends"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "minrto", CTLFLAG_RW, &rack_rto_min, 0, "Minimum RTO in ms -- set with caution below 1000 due to TLP"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "maxrto", CTLFLAG_RW, &rack_rto_max, 0, "Maxiumum RTO in ms -- should be at least as large as min_rto"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_retry", CTLFLAG_RW, &rack_tlp_max_resend, 2, "How many times does TLP retry a single segment or multiple with no ACK"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "recovery_loss_prop", CTLFLAG_RW, &rack_use_proportional_reduce, 0, "Should we proportionaly reduce cwnd based on the number of losses "); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "recovery_prop", CTLFLAG_RW, &rack_proportional_rate, 10, "What percent reduction per loss"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_cwnd_flag", CTLFLAG_RW, &rack_lower_cwnd_at_tlp, 0, "When a TLP completes a retran should we enter recovery?"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "hptsi_reduces", CTLFLAG_RW, &rack_slot_reduction, 4, "When setting a slot should we reduce by divisor"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "hptsi_every_seg", CTLFLAG_RW, &rack_pace_every_seg, 1, "Should we pace out every segment hptsi"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "hptsi_seg_max", CTLFLAG_RW, &rack_hptsi_segments, 6, "Should we pace out only a limited size of segments"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "prr_sendalot", CTLFLAG_RW, &rack_send_a_lot_in_prr, 1, "Send a lot in prr"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "minto", CTLFLAG_RW, &rack_min_to, 1, "Minimum rack timeout in milliseconds"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "earlyrecoveryseg", CTLFLAG_RW, &rack_early_recovery_max_seg, 6, "Max segments in early recovery"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "earlyrecovery", CTLFLAG_RW, &rack_early_recovery, 1, "Do we do early recovery with rack"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "reorder_thresh", CTLFLAG_RW, &rack_reorder_thresh, 2, "What factor for rack will be added when seeing reordering (shift right)"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "rtt_tlp_thresh", CTLFLAG_RW, &rack_tlp_thresh, 1, "what divisor for TLP rtt/retran will be added (1=rtt, 2=1/2 rtt etc)"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "reorder_fade", CTLFLAG_RW, &rack_reorder_fade, 0, "Does reorder detection fade, if so how many ms (0 means never)"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "pktdelay", CTLFLAG_RW, &rack_pkt_delay, 1, "Extra RACK time (in ms) besides reordering thresh"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "inc_var", CTLFLAG_RW, &rack_inc_var, 0, "Should rack add to the TLP timer the variance in rtt calculation"); rack_badfr = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "badfr", CTLFLAG_RD, &rack_badfr, "Total number of bad FRs"); rack_badfr_bytes = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "badfr_bytes", CTLFLAG_RD, &rack_badfr_bytes, "Total number of bad FRs"); rack_rtm_prr_retran = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "prrsndret", CTLFLAG_RD, &rack_rtm_prr_retran, "Total number of prr based retransmits"); rack_rtm_prr_newdata = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "prrsndnew", CTLFLAG_RD, &rack_rtm_prr_newdata, "Total number of prr based new transmits"); rack_timestamp_mismatch = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tsnf", CTLFLAG_RD, &rack_timestamp_mismatch, "Total number of timestamps that we could not find the reported ts"); rack_find_high = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "findhigh", CTLFLAG_RD, &rack_find_high, "Total number of FIN causing find-high"); rack_reorder_seen = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "reordering", CTLFLAG_RD, &rack_reorder_seen, "Total number of times we added delay due to reordering"); rack_tlp_tot = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_to_total", CTLFLAG_RD, &rack_tlp_tot, "Total number of tail loss probe expirations"); rack_tlp_newdata = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_new", CTLFLAG_RD, &rack_tlp_newdata, "Total number of tail loss probe sending new data"); rack_tlp_retran = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_retran", CTLFLAG_RD, &rack_tlp_retran, "Total number of tail loss probe sending retransmitted data"); rack_tlp_retran_bytes = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_retran_bytes", CTLFLAG_RD, &rack_tlp_retran_bytes, "Total bytes of tail loss probe sending retransmitted data"); rack_tlp_retran_fail = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_retran_fail", CTLFLAG_RD, &rack_tlp_retran_fail, "Total number of tail loss probe sending retransmitted data that failed (wait for t3)"); rack_to_tot = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "rack_to_tot", CTLFLAG_RD, &rack_to_tot, "Total number of times the rack to expired?"); rack_to_arm_rack = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "arm_rack", CTLFLAG_RD, &rack_to_arm_rack, "Total number of times the rack timer armed?"); rack_to_arm_tlp = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "arm_tlp", CTLFLAG_RD, &rack_to_arm_tlp, "Total number of times the tlp timer armed?"); rack_paced_segments = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "paced", CTLFLAG_RD, &rack_paced_segments, "Total number of times a segment send caused hptsi"); rack_unpaced_segments = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "unpaced", CTLFLAG_RD, &rack_unpaced_segments, "Total number of times a segment did not cause hptsi"); rack_saw_enobuf = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "saw_enobufs", CTLFLAG_RD, &rack_saw_enobuf, "Total number of times a segment did not cause hptsi"); rack_saw_enetunreach = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "saw_enetunreach", CTLFLAG_RD, &rack_saw_enetunreach, "Total number of times a segment did not cause hptsi"); rack_to_alloc = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "allocs", CTLFLAG_RD, &rack_to_alloc, "Total allocations of tracking structures"); rack_to_alloc_hard = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "allochard", CTLFLAG_RD, &rack_to_alloc_hard, "Total allocations done with sleeping the hard way"); rack_to_alloc_emerg = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "allocemerg", CTLFLAG_RD, &rack_to_alloc_emerg, "Total alocations done from emergency cache"); rack_sack_proc_all = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "sack_long", CTLFLAG_RD, &rack_sack_proc_all, "Total times we had to walk whole list for sack processing"); rack_sack_proc_restart = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "sack_restart", CTLFLAG_RD, &rack_sack_proc_restart, "Total times we had to walk whole list due to a restart"); rack_sack_proc_short = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "sack_short", CTLFLAG_RD, &rack_sack_proc_short, "Total times we took shortcut for sack processing"); rack_enter_tlp_calc = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_calc_entered", CTLFLAG_RD, &rack_enter_tlp_calc, "Total times we called calc-tlp"); rack_used_tlpmethod = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "hit_tlp_method", CTLFLAG_RD, &rack_used_tlpmethod, "Total number of runt sacks"); rack_used_tlpmethod2 = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "hit_tlp_method2", CTLFLAG_RD, &rack_used_tlpmethod2, "Total number of runt sacks 2"); rack_runt_sacks = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "runtsacks", CTLFLAG_RD, &rack_runt_sacks, "Total number of runt sacks"); rack_progress_drops = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "prog_drops", CTLFLAG_RD, &rack_progress_drops, "Total number of progress drops"); rack_input_idle_reduces = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "idle_reduce_oninput", CTLFLAG_RD, &rack_input_idle_reduces, "Total number of idle reductions on input"); rack_tlp_does_nada = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "tlp_nada", CTLFLAG_RD, &rack_tlp_does_nada, "Total number of nada tlp calls"); COUNTER_ARRAY_ALLOC(rack_out_size, TCP_MSS_ACCT_SIZE, M_WAITOK); SYSCTL_ADD_COUNTER_U64_ARRAY(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "outsize", CTLFLAG_RD, rack_out_size, TCP_MSS_ACCT_SIZE, "MSS send sizes"); COUNTER_ARRAY_ALLOC(rack_opts_arry, RACK_OPTS_SIZE, M_WAITOK); SYSCTL_ADD_COUNTER_U64_ARRAY(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "opts", CTLFLAG_RD, rack_opts_arry, RACK_OPTS_SIZE, "RACK Option Stats"); SYSCTL_ADD_PROC(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, "clear", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, &rack_clear_counter, 0, sysctl_rack_clear, "IU", "Clear counters"); } static inline int32_t rack_progress_timeout_check(struct tcpcb *tp) { if (tp->t_maxunacktime && tp->t_acktime && TSTMP_GT(ticks, tp->t_acktime)) { if ((ticks - tp->t_acktime) >= tp->t_maxunacktime) { /* * There is an assumption that the caller * will drop the connection so we will * increment the counters here. */ struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; counter_u64_add(rack_progress_drops, 1); #ifdef NETFLIX_STATS TCPSTAT_INC(tcps_progdrops); #endif rack_log_progress_event(rack, tp, ticks, PROGRESS_DROP, __LINE__); return (1); } } return (0); } static void rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.flex1 = TICKS_2_MSEC(rack->rc_tp->t_srtt >> TCP_RTT_SHIFT); log.u_bbr.flex2 = to; log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags; log.u_bbr.flex4 = slot; log.u_bbr.flex5 = rack->rc_inp->inp_hptsslot; log.u_bbr.flex6 = rack->rc_tp->t_rxtcur; log.u_bbr.flex8 = which; log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_TIMERSTAR, 0, 0, &log, false); } } static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex8 = to_num; log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt; log.u_bbr.flex2 = rack->rc_rack_rtt; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_RTO, 0, 0, &log, false); } } static void rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, int32_t t, uint32_t o_srtt, uint32_t o_var) { if (tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex1 = t; log.u_bbr.flex2 = o_srtt; log.u_bbr.flex3 = o_var; log.u_bbr.flex4 = rack->r_ctl.rack_rs.rs_rtt_lowest; log.u_bbr.flex5 = rack->r_ctl.rack_rs.rs_rtt_highest; log.u_bbr.flex6 = rack->r_ctl.rack_rs.rs_rtt_cnt; log.u_bbr.rttProp = rack->r_ctl.rack_rs.rs_rtt_tot; log.u_bbr.flex8 = rack->r_ctl.rc_rate_sample_method; TCP_LOG_EVENT(tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_BBRRTT, 0, 0, &log, false); } } static void rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt) { /* * Log the rtt sample we are * applying to the srtt algorithm in * useconds. */ if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; struct timeval tv; /* Convert our ms to a microsecond */ log.u_bbr.flex1 = rtt * 1000; log.u_bbr.timeStamp = tcp_get_usecs(&tv); TCP_LOG_EVENTP(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, TCP_LOG_RTT, 0, 0, &log, false, &tv); } } static inline void rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick, int event, int line) { if (rack_verbose_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex1 = line; log.u_bbr.flex2 = tick; log.u_bbr.flex3 = tp->t_maxunacktime; log.u_bbr.flex4 = tp->t_acktime; log.u_bbr.flex8 = event; TCP_LOG_EVENT(tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_PROGRESS, 0, 0, &log, false); } } static void rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex1 = slot; log.u_bbr.flex7 = (0x0000ffff & rack->r_ctl.rc_hpts_flags); log.u_bbr.flex8 = rack->rc_in_persist; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_BBRSND, 0, 0, &log, false); } } static void rack_log_doseg_done(struct tcp_rack *rack, uint32_t cts, int32_t nxt_pkt, int32_t did_out, int way_out) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; log.u_bbr.flex1 = did_out; log.u_bbr.flex2 = nxt_pkt; log.u_bbr.flex3 = way_out; log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags; log.u_bbr.flex7 = rack->r_wanted_output; log.u_bbr.flex8 = rack->rc_in_persist; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_DOSEG_DONE, 0, 0, &log, false); } } static void rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot, uint8_t hpts_calling) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex1 = slot; log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags; log.u_bbr.flex7 = hpts_calling; log.u_bbr.flex8 = rack->rc_in_persist; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_JUSTRET, 0, tlen, &log, false); } } static void rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex1 = line; log.u_bbr.flex2 = 0; log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags; log.u_bbr.flex4 = 0; log.u_bbr.flex6 = rack->rc_tp->t_rxtcur; log.u_bbr.flex8 = hpts_removed; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_TIMERCANC, 0, 0, &log, false); } } static void rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, int32_t timers) { if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.flex1 = timers; log.u_bbr.flex2 = ret; log.u_bbr.flex3 = rack->r_ctl.rc_timer_exp; log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags; log.u_bbr.flex5 = cts; TCP_LOG_EVENT(rack->rc_tp, NULL, &rack->rc_inp->inp_socket->so_rcv, &rack->rc_inp->inp_socket->so_snd, BBR_LOG_TO_PROCESS, 0, 0, &log, false); } } static void rack_counter_destroy() { counter_u64_free(rack_badfr); counter_u64_free(rack_badfr_bytes); counter_u64_free(rack_rtm_prr_retran); counter_u64_free(rack_rtm_prr_newdata); counter_u64_free(rack_timestamp_mismatch); counter_u64_free(rack_reorder_seen); counter_u64_free(rack_tlp_tot); counter_u64_free(rack_tlp_newdata); counter_u64_free(rack_tlp_retran); counter_u64_free(rack_tlp_retran_bytes); counter_u64_free(rack_tlp_retran_fail); counter_u64_free(rack_to_tot); counter_u64_free(rack_to_arm_rack); counter_u64_free(rack_to_arm_tlp); counter_u64_free(rack_paced_segments); counter_u64_free(rack_unpaced_segments); counter_u64_free(rack_saw_enobuf); counter_u64_free(rack_saw_enetunreach); counter_u64_free(rack_to_alloc_hard); counter_u64_free(rack_to_alloc_emerg); counter_u64_free(rack_sack_proc_all); counter_u64_free(rack_sack_proc_short); counter_u64_free(rack_sack_proc_restart); counter_u64_free(rack_to_alloc); counter_u64_free(rack_find_high); counter_u64_free(rack_runt_sacks); counter_u64_free(rack_enter_tlp_calc); counter_u64_free(rack_used_tlpmethod); counter_u64_free(rack_used_tlpmethod2); counter_u64_free(rack_progress_drops); counter_u64_free(rack_input_idle_reduces); counter_u64_free(rack_tlp_does_nada); COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE); COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE); } static struct rack_sendmap * rack_alloc(struct tcp_rack *rack) { struct rack_sendmap *rsm; counter_u64_add(rack_to_alloc, 1); rack->r_ctl.rc_num_maps_alloced++; rsm = uma_zalloc(rack_zone, M_NOWAIT); if (rsm) { return (rsm); } if (rack->rc_free_cnt) { counter_u64_add(rack_to_alloc_emerg, 1); rsm = TAILQ_FIRST(&rack->r_ctl.rc_free); TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_next); rack->rc_free_cnt--; return (rsm); } return (NULL); } static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm) { rack->r_ctl.rc_num_maps_alloced--; if (rack->r_ctl.rc_tlpsend == rsm) rack->r_ctl.rc_tlpsend = NULL; if (rack->r_ctl.rc_next == rsm) rack->r_ctl.rc_next = NULL; if (rack->r_ctl.rc_sacklast == rsm) rack->r_ctl.rc_sacklast = NULL; if (rack->rc_free_cnt < rack_free_cache) { memset(rsm, 0, sizeof(struct rack_sendmap)); TAILQ_INSERT_TAIL(&rack->r_ctl.rc_free, rsm, r_next); rack->rc_free_cnt++; return; } uma_zfree(rack_zone, rsm); } /* * CC wrapper hook functions */ static void rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery) { #ifdef NETFLIX_STATS int32_t gput; #endif #ifdef NETFLIX_CWV u_long old_cwnd = tp->snd_cwnd; #endif INP_WLOCK_ASSERT(tp->t_inpcb); tp->ccv->nsegs = nsegs; tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); if ((recovery) && (rack->r_ctl.rc_early_recovery_segs)) { uint32_t max; max = rack->r_ctl.rc_early_recovery_segs * tp->t_maxseg; if (tp->ccv->bytes_this_ack > max) { tp->ccv->bytes_this_ack = max; } } if (tp->snd_cwnd <= tp->snd_wnd) tp->ccv->flags |= CCF_CWND_LIMITED; else tp->ccv->flags &= ~CCF_CWND_LIMITED; if (type == CC_ACK) { #ifdef NETFLIX_STATS stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF, ((int32_t) tp->snd_cwnd) - tp->snd_wnd); if ((tp->t_flags & TF_GPUTINPROG) && SEQ_GEQ(th->th_ack, tp->gput_ack)) { gput = (((int64_t) (th->th_ack - tp->gput_seq)) << 3) / max(1, tcp_ts_getticks() - tp->gput_ts); stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT, gput); /* * XXXLAS: This is a temporary hack, and should be * chained off VOI_TCP_GPUT when stats(9) grows an * API to deal with chained VOIs. */ if (tp->t_stats_gput_prev > 0) stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_GPUT_ND, ((gput - tp->t_stats_gput_prev) * 100) / tp->t_stats_gput_prev); tp->t_flags &= ~TF_GPUTINPROG; tp->t_stats_gput_prev = gput; if (tp->t_maxpeakrate) { /* * We update t_peakrate_thr. This gives us roughly * one update per round trip time. */ tcp_update_peakrate_thr(tp); } } #endif if (tp->snd_cwnd > tp->snd_ssthresh) { tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, nsegs * V_tcp_abc_l_var * tp->t_maxseg); if (tp->t_bytes_acked >= tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; tp->ccv->flags |= CCF_ABC_SENTAWND; } } else { tp->ccv->flags &= ~CCF_ABC_SENTAWND; tp->t_bytes_acked = 0; } } if (CC_ALGO(tp)->ack_received != NULL) { /* XXXLAS: Find a way to live without this */ tp->ccv->curack = th->th_ack; CC_ALGO(tp)->ack_received(tp->ccv, type); } #ifdef NETFLIX_STATS stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd); #endif if (rack->r_ctl.rc_rack_largest_cwnd < tp->snd_cwnd) { rack->r_ctl.rc_rack_largest_cwnd = tp->snd_cwnd; } #ifdef NETFLIX_CWV if (tp->cwv_enabled) { /* * Per RFC 7661: The behaviour in the non-validated phase is * specified as: o A sender determines whether to increase * the cwnd based upon whether it is cwnd-limited (see * Section 4.5.3): * A sender that is cwnd-limited MAY use * the standard TCP method to increase cwnd (i.e., the * standard method permits a TCP sender that fully utilises * the cwnd to increase the cwnd each time it receives an * ACK). * A sender that is not cwnd-limited MUST NOT * increase the cwnd when ACK packets are received in this * phase (i.e., needs to avoid growing the cwnd when it has * not recently sent using the current size of cwnd). */ if ((tp->snd_cwnd > old_cwnd) && (tp->cwv_cwnd_valid == 0) && (!(tp->ccv->flags & CCF_CWND_LIMITED))) { tp->snd_cwnd = old_cwnd; } /* Try to update pipeAck and NCWV state */ if (TCPS_HAVEESTABLISHED(tp->t_state) && !IN_RECOVERY(tp->t_flags)) { uint32_t data = sbavail(&(tp->t_inpcb->inp_socket->so_snd)); tcp_newcwv_update_pipeack(tp, data); } } #endif /* we enforce max peak rate if it is set. */ if (tp->t_peakrate_thr && tp->snd_cwnd > tp->t_peakrate_thr) { tp->snd_cwnd = tp->t_peakrate_thr; } } static void tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th) { struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; INP_WLOCK_ASSERT(tp->t_inpcb); if (rack->r_ctl.rc_prr_sndcnt > 0) rack->r_wanted_output++; } static void rack_post_recovery(struct tcpcb *tp, struct tcphdr *th) { struct tcp_rack *rack; INP_WLOCK_ASSERT(tp->t_inpcb); rack = (struct tcp_rack *)tp->t_fb_ptr; if (CC_ALGO(tp)->post_recovery != NULL) { tp->ccv->curack = th->th_ack; CC_ALGO(tp)->post_recovery(tp->ccv); } /* * Here we can in theory adjust cwnd to be based on the number of * losses in the window (rack->r_ctl.rc_loss_count). This is done * based on the rack_use_proportional flag. */ if (rack->r_ctl.rc_prop_reduce && rack->r_ctl.rc_prop_rate) { int32_t reduce; reduce = (rack->r_ctl.rc_loss_count * rack->r_ctl.rc_prop_rate); if (reduce > 50) { reduce = 50; } tp->snd_cwnd -= ((reduce * tp->snd_cwnd) / 100); } else { if (tp->snd_cwnd > tp->snd_ssthresh) { /* Drop us down to the ssthresh (1/2 cwnd at loss) */ tp->snd_cwnd = tp->snd_ssthresh; } } if (rack->r_ctl.rc_prr_sndcnt > 0) { /* Suck the next prr cnt back into cwnd */ tp->snd_cwnd += rack->r_ctl.rc_prr_sndcnt; rack->r_ctl.rc_prr_sndcnt = 0; } EXIT_RECOVERY(tp->t_flags); #ifdef NETFLIX_CWV if (tp->cwv_enabled) { if ((tp->cwv_cwnd_valid == 0) && (tp->snd_cwv.in_recovery)) tcp_newcwv_end_recovery(tp); } #endif } static void rack_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) { struct tcp_rack *rack; INP_WLOCK_ASSERT(tp->t_inpcb); rack = (struct tcp_rack *)tp->t_fb_ptr; switch (type) { case CC_NDUPACK: /* rack->r_ctl.rc_ssthresh_set = 1;*/ if (!IN_FASTRECOVERY(tp->t_flags)) { rack->r_ctl.rc_tlp_rtx_out = 0; rack->r_ctl.rc_prr_delivered = 0; rack->r_ctl.rc_prr_out = 0; rack->r_ctl.rc_loss_count = 0; rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una; tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } break; case CC_ECN: if (!IN_CONGRECOVERY(tp->t_flags)) { TCPSTAT_INC(tcps_ecn_rcwnd); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } break; case CC_RTO: tp->t_dupacks = 0; tp->t_bytes_acked = 0; EXIT_RECOVERY(tp->t_flags); tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg) * tp->t_maxseg; tp->snd_cwnd = tp->t_maxseg; break; case CC_RTO_ERR: TCPSTAT_INC(tcps_sndrexmitbad); /* RTO was unnecessary, so reset everything. */ tp->snd_cwnd = tp->snd_cwnd_prev; tp->snd_ssthresh = tp->snd_ssthresh_prev; tp->snd_recover = tp->snd_recover_prev; if (tp->t_flags & TF_WASFRECOVERY) ENTER_FASTRECOVERY(tp->t_flags); if (tp->t_flags & TF_WASCRECOVERY) ENTER_CONGRECOVERY(tp->t_flags); tp->snd_nxt = tp->snd_max; tp->t_badrxtwin = 0; break; } if (CC_ALGO(tp)->cong_signal != NULL) { if (th != NULL) tp->ccv->curack = th->th_ack; CC_ALGO(tp)->cong_signal(tp->ccv, type); } #ifdef NETFLIX_CWV if (tp->cwv_enabled) { if (tp->snd_cwv.in_recovery == 0 && IN_RECOVERY(tp->t_flags)) { tcp_newcwv_enter_recovery(tp); } if (type == CC_RTO) { tcp_newcwv_reset(tp); } } #endif } static inline void rack_cc_after_idle(struct tcpcb *tp, int reduce_largest) { uint32_t i_cwnd; INP_WLOCK_ASSERT(tp->t_inpcb); #ifdef NETFLIX_STATS TCPSTAT_INC(tcps_idle_restarts); if (tp->t_state == TCPS_ESTABLISHED) TCPSTAT_INC(tcps_idle_estrestarts); #endif if (CC_ALGO(tp)->after_idle != NULL) CC_ALGO(tp)->after_idle(tp->ccv); if (tp->snd_cwnd == 1) i_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ else if (V_tcp_initcwnd_segments) i_cwnd = min((V_tcp_initcwnd_segments * tp->t_maxseg), max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460)); else if (V_tcp_do_rfc3390) i_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380)); else { /* Per RFC5681 Section 3.1 */ if (tp->t_maxseg > 2190) i_cwnd = 2 * tp->t_maxseg; else if (tp->t_maxseg > 1095) i_cwnd = 3 * tp->t_maxseg; else i_cwnd = 4 * tp->t_maxseg; } if (reduce_largest) { /* * Do we reduce the largest cwnd to make * rack play nice on restart hptsi wise? */ if (((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rack_largest_cwnd > i_cwnd) ((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rack_largest_cwnd = i_cwnd; } /* * Being idle is no differnt than the initial window. If the cc * clamps it down below the initial window raise it to the initial * window. */ if (tp->snd_cwnd < i_cwnd) { tp->snd_cwnd = i_cwnd; } } /* * Indicate whether this ack should be delayed. We can delay the ack if * following conditions are met: * - There is no delayed ack timer in progress. * - Our last ack wasn't a 0-sized window. We never want to delay * the ack that opens up a 0-sized window. * - LRO wasn't used for this segment. We make sure by checking that the * segment size is not larger than the MSS. * - Delayed acks are enabled or this is a half-synchronized T/TCP * connection. */ #define DELAY_ACK(tp, tlen) \ (((tp->t_flags & TF_RXWIN0SENT) == 0) && \ ((tp->t_flags & TF_DELACK) == 0) && \ (tlen <= tp->t_maxseg) && \ (tp->t_delayed_ack || (tp->t_flags & TF_NEEDSYN))) static inline void rack_calc_rwin(struct socket *so, struct tcpcb *tp) { int32_t win; /* * Calculate amount of space in receive window, and then do TCP * input processing. Receive window is amount of space in rcv queue, * but not less than advertised window. */ win = sbspace(&so->so_rcv); if (win < 0) win = 0; tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); } static void rack_do_drop(struct mbuf *m, struct tcpcb *tp, int32_t * ti_locked) { if (*ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; } /* * Drop space held by incoming segment and return. */ if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); if (m) m_freem(m); } static void rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t * ti_locked, int32_t rstreason, int32_t tlen) { if (*ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; } if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); INP_WUNLOCK(tp->t_inpcb); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); } /* * The value in ret_val informs the caller * if we dropped the tcb (and lock) or not. * 1 = we dropped it, 0 = the TCB is still locked * and valid. */ static void rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t * ti_locked, int32_t thflags, int32_t tlen, int32_t * ret_val) { /* * Generate an ACK dropping incoming segment if it occupies sequence * space, where the ACK reflects our state. * * We can now skip the test for the RST flag since all paths to this * code happen after packets containing RST have been dropped. * * In the SYN-RECEIVED state, don't send an ACK unless the segment * we received passes the SYN-RECEIVED ACK test. If it fails send a * RST. This breaks the loop in the "LAND" DoS attack, and also * prevents an ACK storm between two listening ports that have been * sent forged SYN segments, each with the source address of the * other. */ struct tcp_rack *rack; if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max))) { *ret_val = 1; rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return; } else *ret_val = 0; if (*ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; } rack = (struct tcp_rack *)tp->t_fb_ptr; rack->r_wanted_output++; tp->t_flags |= TF_ACKNOW; if (m) m_freem(m); } static int rack_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t * ti_locked) { /* * RFC5961 Section 3.2 * * - RST drops connection only if SEG.SEQ == RCV.NXT. - If RST is in * window, we send challenge ACK. * * Note: to take into account delayed ACKs, we should test against * last_ack_sent instead of rcv_nxt. Note 2: we handle special case * of closed window, not covered by the RFC. */ int dropped = 0; if ((SEQ_GEQ(th->th_seq, (tp->last_ack_sent - 1)) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(*ti_locked == TI_RLOCKED, ("%s: TH_RST ti_locked %d, th %p tp %p", __func__, *ti_locked, th, tp)); KASSERT(tp->t_state != TCPS_SYN_SENT, ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", __func__, th, tp)); if (V_tcp_insecure_rst || (tp->last_ack_sent == th->th_seq) || (tp->rcv_nxt == th->th_seq) || ((tp->last_ack_sent - 1) == th->th_seq)) { TCPSTAT_INC(tcps_drops); /* Drop the connection. */ switch (tp->t_state) { case TCPS_SYN_RECEIVED: so->so_error = ECONNREFUSED; goto close; case TCPS_ESTABLISHED: case TCPS_FIN_WAIT_1: case TCPS_FIN_WAIT_2: case TCPS_CLOSE_WAIT: case TCPS_CLOSING: case TCPS_LAST_ACK: so->so_error = ECONNRESET; close: tcp_state_change(tp, TCPS_CLOSED); /* FALLTHROUGH */ default: tp = tcp_close(tp); } dropped = 1; rack_do_drop(m, tp, ti_locked); } else { TCPSTAT_INC(tcps_badrst); /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; } } else { m_freem(m); } return (dropped); } /* * The value in ret_val informs the caller * if we dropped the tcb (and lock) or not. * 1 = we dropped it, 0 = the TCB is still locked * and valid. */ static void rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ti_locked, int32_t * ret_val) { KASSERT(*ti_locked == TI_RLOCKED, ("tcp_do_segment: TH_SYN ti_locked %d", *ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); TCPSTAT_INC(tcps_badsyn); if (V_tcp_insecure_syn && SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { tp = tcp_drop(tp, ECONNRESET); *ret_val = 1; rack_do_drop(m, tp, ti_locked); } else { /* Send challenge ACK. */ tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt, tp->snd_nxt, TH_ACK); tp->last_ack_sent = tp->rcv_nxt; m = NULL; *ret_val = 0; rack_do_drop(m, NULL, ti_locked); } } /* * rack_ts_check returns 1 for you should not proceed. It places * in ret_val what should be returned 1/0 by the caller. The 1 indicates * that the TCB is unlocked and probably dropped. The 0 indicates the * TCB is still valid and locked. */ static int rack_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ti_locked, int32_t tlen, int32_t thflags, int32_t * ret_val) { /* Check to see if ts_recent is over 24 days old. */ if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates ts_recent, * the age will be reset later and ts_recent will get a * valid value. If it does not, setting ts_recent to zero * will at least satisfy the requirement that zero be placed * in the timestamp echo reply when ts_recent isn't valid. * The age isn't reset until we get a valid ts_recent * because we don't want out-of-order segments to be dropped * when ts_recent is old. */ tp->ts_recent = 0; } else { TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, tlen); TCPSTAT_INC(tcps_pawsdrop); *ret_val = 0; if (tlen) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, ret_val); } else { rack_do_drop(m, NULL, ti_locked); } return (1); } return (0); } /* * rack_drop_checks returns 1 for you should not proceed. It places * in ret_val what should be returned 1/0 by the caller. The 1 indicates * that the TCB is unlocked and probably dropped. The 0 indicates the * TCB is still valid and locked. */ static int rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * ti_locked, int32_t * thf, int32_t * drop_hdrlen, int32_t * ret_val) { int32_t todrop; int32_t thflags; int32_t tlen; thflags = *thf; tlen = *tlenp; todrop = tp->rcv_nxt - th->th_seq; if (todrop > 0) { if (thflags & TH_SYN) { thflags &= ~TH_SYN; th->th_seq++; if (th->th_urp > 1) th->th_urp--; else thflags &= ~TH_URG; todrop--; } /* * Following if statement from Stevens, vol. 2, p. 960. */ if (todrop > tlen || (todrop == tlen && (thflags & TH_FIN) == 0)) { /* * Any valid FIN must be to the left of the window. * At this point the FIN must be a duplicate or out * of sequence; drop it. */ thflags &= ~TH_FIN; /* * Send an ACK to resynchronize and drop any data. * But keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; todrop = tlen; TCPSTAT_INC(tcps_rcvduppack); TCPSTAT_ADD(tcps_rcvdupbyte, todrop); } else { TCPSTAT_INC(tcps_rcvpartduppack); TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); } *drop_hdrlen += todrop; /* drop from the top afterwards */ th->th_seq += todrop; tlen -= todrop; if (th->th_urp > todrop) th->th_urp -= todrop; else { thflags &= ~TH_URG; th->th_urp = 0; } } /* * If segment ends after window, drop trailing data (and PUSH and * FIN); if nothing left, just ACK. */ todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); if (todrop > 0) { TCPSTAT_INC(tcps_rcvpackafterwin); if (todrop >= tlen) { TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen); /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from * incoming segments. Continue processing, but * remember to ack. Otherwise, drop segment and * ack. */ if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_rcvwinprobe); } else { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, ret_val); return (1); } } else TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); m_adj(m, -todrop); tlen -= todrop; thflags &= ~(TH_PUSH | TH_FIN); } *thf = thflags; *tlenp = tlen; return (0); } static struct rack_sendmap * rack_find_lowest_rsm(struct tcp_rack *rack) { struct rack_sendmap *rsm; /* * Walk the time-order transmitted list looking for an rsm that is * not acked. This will be the one that was sent the longest time * ago that is still outstanding. */ TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) { if (rsm->r_flags & RACK_ACKED) { continue; } goto finish; } finish: return (rsm); } static struct rack_sendmap * rack_find_high_nonack(struct tcp_rack *rack, struct rack_sendmap *rsm) { struct rack_sendmap *prsm; /* * Walk the sequence order list backward until we hit and arrive at * the highest seq not acked. In theory when this is called it * should be the last segment (which it was not). */ counter_u64_add(rack_find_high, 1); prsm = rsm; TAILQ_FOREACH_REVERSE_FROM(prsm, &rack->r_ctl.rc_map, rack_head, r_next) { if (prsm->r_flags & (RACK_ACKED | RACK_HAS_FIN)) { continue; } return (prsm); } return (NULL); } static uint32_t rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts) { int32_t lro; uint32_t thresh; /* * lro is the flag we use to determine if we have seen reordering. * If it gets set we have seen reordering. The reorder logic either * works in one of two ways: * * If reorder-fade is configured, then we track the last time we saw * re-ordering occur. If we reach the point where enough time as * passed we no longer consider reordering has occuring. * * Or if reorder-face is 0, then once we see reordering we consider * the connection to alway be subject to reordering and just set lro * to 1. * * In the end if lro is non-zero we add the extra time for * reordering in. */ if (srtt == 0) srtt = 1; if (rack->r_ctl.rc_reorder_ts) { if (rack->r_ctl.rc_reorder_fade) { if (SEQ_GEQ(cts, rack->r_ctl.rc_reorder_ts)) { lro = cts - rack->r_ctl.rc_reorder_ts; if (lro == 0) { /* * No time as passed since the last * reorder, mark it as reordering. */ lro = 1; } } else { /* Negative time? */ lro = 0; } if (lro > rack->r_ctl.rc_reorder_fade) { /* Turn off reordering seen too */ rack->r_ctl.rc_reorder_ts = 0; lro = 0; } } else { /* Reodering does not fade */ lro = 1; } } else { lro = 0; } thresh = srtt + rack->r_ctl.rc_pkt_delay; if (lro) { /* It must be set, if not you get 1/4 rtt */ if (rack->r_ctl.rc_reorder_shift) thresh += (srtt >> rack->r_ctl.rc_reorder_shift); else thresh += (srtt >> 2); } else { thresh += 1; } /* We don't let the rack timeout be above a RTO */ if (thresh > TICKS_2_MSEC(rack->rc_tp->t_rxtcur)) { thresh = TICKS_2_MSEC(rack->rc_tp->t_rxtcur); } /* And we don't want it above the RTO max either */ if (thresh > rack_rto_max) { thresh = rack_rto_max; } return (thresh); } static uint32_t rack_calc_thresh_tlp(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t srtt) { struct rack_sendmap *prsm; uint32_t thresh, len; int maxseg; if (srtt == 0) srtt = 1; if (rack->r_ctl.rc_tlp_threshold) thresh = srtt + (srtt / rack->r_ctl.rc_tlp_threshold); else thresh = (srtt * 2); /* Get the previous sent packet, if any */ maxseg = tcp_maxseg(tp); counter_u64_add(rack_enter_tlp_calc, 1); len = rsm->r_end - rsm->r_start; if (rack->rack_tlp_threshold_use == TLP_USE_ID) { /* Exactly like the ID */ if (((tp->snd_max - tp->snd_una) - rack->r_ctl.rc_sacked + rack->r_ctl.rc_holes_rxt) <= maxseg) { uint32_t alt_thresh; /* * Compensate for delayed-ack with the d-ack time. */ counter_u64_add(rack_used_tlpmethod, 1); alt_thresh = srtt + (srtt / 2) + rack_delayed_ack_time; if (alt_thresh > thresh) thresh = alt_thresh; } } else if (rack->rack_tlp_threshold_use == TLP_USE_TWO_ONE) { /* 2.1 behavior */ prsm = TAILQ_PREV(rsm, rack_head, r_tnext); if (prsm && (len <= maxseg)) { /* * Two packets outstanding, thresh should be (2*srtt) + * possible inter-packet delay (if any). */ uint32_t inter_gap = 0; int idx, nidx; counter_u64_add(rack_used_tlpmethod, 1); idx = rsm->r_rtr_cnt - 1; nidx = prsm->r_rtr_cnt - 1; if (TSTMP_GEQ(rsm->r_tim_lastsent[nidx], prsm->r_tim_lastsent[idx])) { /* Yes it was sent later (or at the same time) */ inter_gap = rsm->r_tim_lastsent[idx] - prsm->r_tim_lastsent[nidx]; } thresh += inter_gap; } else if (len <= maxseg) { /* * Possibly compensate for delayed-ack. */ uint32_t alt_thresh; counter_u64_add(rack_used_tlpmethod2, 1); alt_thresh = srtt + (srtt / 2) + rack_delayed_ack_time; if (alt_thresh > thresh) thresh = alt_thresh; } } else if (rack->rack_tlp_threshold_use == TLP_USE_TWO_TWO) { /* 2.2 behavior */ if (len <= maxseg) { uint32_t alt_thresh; /* * Compensate for delayed-ack with the d-ack time. */ counter_u64_add(rack_used_tlpmethod, 1); alt_thresh = srtt + (srtt / 2) + rack_delayed_ack_time; if (alt_thresh > thresh) thresh = alt_thresh; } } /* Not above an RTO */ if (thresh > TICKS_2_MSEC(tp->t_rxtcur)) { thresh = TICKS_2_MSEC(tp->t_rxtcur); } /* Not above a RTO max */ if (thresh > rack_rto_max) { thresh = rack_rto_max; } /* Apply user supplied min TLP */ if (thresh < rack_tlp_min) { thresh = rack_tlp_min; } return (thresh); } static struct rack_sendmap * rack_check_recovery_mode(struct tcpcb *tp, uint32_t tsused) { /* * Check to see that we don't need to fall into recovery. We will * need to do so if our oldest transmit is past the time we should * have had an ack. */ struct tcp_rack *rack; struct rack_sendmap *rsm; int32_t idx; uint32_t srtt_cur, srtt, thresh; rack = (struct tcp_rack *)tp->t_fb_ptr; if (TAILQ_EMPTY(&rack->r_ctl.rc_map)) { return (NULL); } srtt_cur = tp->t_srtt >> TCP_RTT_SHIFT; srtt = TICKS_2_MSEC(srtt_cur); if (rack->rc_rack_rtt && (srtt > rack->rc_rack_rtt)) srtt = rack->rc_rack_rtt; rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); if (rsm == NULL) return (NULL); if (rsm->r_flags & RACK_ACKED) { rsm = rack_find_lowest_rsm(rack); if (rsm == NULL) return (NULL); } idx = rsm->r_rtr_cnt - 1; thresh = rack_calc_thresh_rack(rack, srtt, tsused); if (tsused < rsm->r_tim_lastsent[idx]) { return (NULL); } if ((tsused - rsm->r_tim_lastsent[idx]) < thresh) { return (NULL); } /* Ok if we reach here we are over-due */ rack->r_ctl.rc_rsm_start = rsm->r_start; rack->r_ctl.rc_cwnd_at = tp->snd_cwnd; rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh; rack_cong_signal(tp, NULL, CC_NDUPACK); return (rsm); } static uint32_t rack_get_persists_timer_val(struct tcpcb *tp, struct tcp_rack *rack) { int32_t t; int32_t tt; uint32_t ret_val; t = TICKS_2_MSEC((tp->t_srtt >> TCP_RTT_SHIFT) + ((tp->t_rttvar * 4) >> TCP_RTT_SHIFT)); TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], tcp_persmin, tcp_persmax); if (tp->t_rxtshift < TCP_MAXRXTSHIFT) tp->t_rxtshift++; rack->r_ctl.rc_hpts_flags |= PACE_TMR_PERSIT; ret_val = (uint32_t)tt; return (ret_val); } static uint32_t rack_timer_start(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { /* * Start the FR timer, we do this based on getting the first one in * the rc_tmap. Note that if its NULL we must stop the timer. in all * events we need to stop the running timer (if its running) before * starting the new one. */ uint32_t thresh, exp, to, srtt, time_since_sent; uint32_t srtt_cur; int32_t idx; int32_t is_tlp_timer = 0; struct rack_sendmap *rsm; if (rack->t_timers_stopped) { /* All timers have been stopped none are to run */ return (0); } if (rack->rc_in_persist) { /* We can't start any timer in persists */ return (rack_get_persists_timer_val(tp, rack)); } rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); if (rsm == NULL) { /* Nothing on the send map */ activate_rxt: if (SEQ_LT(tp->snd_una, tp->snd_max) || sbavail(&(tp->t_inpcb->inp_socket->so_snd))) { rack->r_ctl.rc_hpts_flags |= PACE_TMR_RXT; to = TICKS_2_MSEC(tp->t_rxtcur); if (to == 0) to = 1; return (to); } return (0); } if (rsm->r_flags & RACK_ACKED) { rsm = rack_find_lowest_rsm(rack); if (rsm == NULL) { /* No lowest? */ goto activate_rxt; } } /* Convert from ms to usecs */ if (rsm->r_flags & RACK_SACK_PASSED) { if ((tp->t_flags & TF_SENTFIN) && ((tp->snd_max - tp->snd_una) == 1) && (rsm->r_flags & RACK_HAS_FIN)) { /* * We don't start a rack timer if all we have is a * FIN outstanding. */ goto activate_rxt; } if (tp->t_srtt) { srtt_cur = (tp->t_srtt >> TCP_RTT_SHIFT); srtt = TICKS_2_MSEC(srtt_cur); } else srtt = RACK_INITIAL_RTO; thresh = rack_calc_thresh_rack(rack, srtt, cts); idx = rsm->r_rtr_cnt - 1; exp = rsm->r_tim_lastsent[idx] + thresh; if (SEQ_GEQ(exp, cts)) { to = exp - cts; if (to < rack->r_ctl.rc_min_to) { to = rack->r_ctl.rc_min_to; } } else { to = rack->r_ctl.rc_min_to; } } else { /* Ok we need to do a TLP not RACK */ if ((rack->rc_tlp_in_progress != 0) || (rack->r_ctl.rc_tlp_rtx_out != 0)) { /* * The previous send was a TLP or a tlp_rtx is in * process. */ goto activate_rxt; } rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext); if (rsm == NULL) { /* We found no rsm to TLP with. */ goto activate_rxt; } if (rsm->r_flags & RACK_HAS_FIN) { /* If its a FIN we dont do TLP */ rsm = NULL; goto activate_rxt; } idx = rsm->r_rtr_cnt - 1; if (TSTMP_GT(cts, rsm->r_tim_lastsent[idx])) time_since_sent = cts - rsm->r_tim_lastsent[idx]; else time_since_sent = 0; is_tlp_timer = 1; if (tp->t_srtt) { srtt_cur = (tp->t_srtt >> TCP_RTT_SHIFT); srtt = TICKS_2_MSEC(srtt_cur); } else srtt = RACK_INITIAL_RTO; thresh = rack_calc_thresh_tlp(tp, rack, rsm, srtt); if (thresh > time_since_sent) to = thresh - time_since_sent; else to = rack->r_ctl.rc_min_to; if (to > TCPTV_REXMTMAX) { /* * If the TLP time works out to larger than the max * RTO lets not do TLP.. just RTO. */ goto activate_rxt; } if (rsm->r_start != rack->r_ctl.rc_last_tlp_seq) { /* * The tail is no longer the last one I did a probe * on */ rack->r_ctl.rc_tlp_seg_send_cnt = 0; rack->r_ctl.rc_last_tlp_seq = rsm->r_start; } } if (is_tlp_timer == 0) { rack->r_ctl.rc_hpts_flags |= PACE_TMR_RACK; } else { if ((rack->r_ctl.rc_tlp_send_cnt > rack_tlp_max_resend) || (rack->r_ctl.rc_tlp_seg_send_cnt > rack_tlp_max_resend)) { /* * We have exceeded how many times we can retran the * current TLP timer, switch to the RTO timer. */ goto activate_rxt; } else { rack->r_ctl.rc_hpts_flags |= PACE_TMR_TLP; } } if (to == 0) to = 1; return (to); } static void rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { if (rack->rc_in_persist == 0) { if (((tp->t_flags & TF_SENTFIN) == 0) && (tp->snd_max - tp->snd_una) >= sbavail(&rack->rc_inp->inp_socket->so_snd)) /* Must need to send more data to enter persist */ return; rack->r_ctl.rc_went_idle_time = cts; rack_timer_cancel(tp, rack, cts, __LINE__); tp->t_rxtshift = 0; rack->rc_in_persist = 1; } } static void rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack) { if (rack->rc_inp->inp_in_hpts) { tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT); rack->r_ctl.rc_hpts_flags = 0; } rack->rc_in_persist = 0; rack->r_ctl.rc_went_idle_time = 0; tp->t_flags &= ~TF_FORCEDATA; tp->t_rxtshift = 0; } static void rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts, int32_t line, int32_t slot, uint32_t tot_len_this_send, int32_t frm_out_sbavail) { struct inpcb *inp; uint32_t delayed_ack = 0; uint32_t hpts_timeout; uint8_t stopped; uint32_t left = 0; inp = tp->t_inpcb; if (inp->inp_in_hpts) { /* A previous call is already set up */ return; } if (tp->t_state == TCPS_CLOSED) { return; } stopped = rack->rc_tmr_stopped; if (stopped && TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) { left = rack->r_ctl.rc_timer_exp - cts; } rack->r_ctl.rc_timer_exp = 0; if (rack->rc_inp->inp_in_hpts == 0) { rack->r_ctl.rc_hpts_flags = 0; } if (slot) { /* We are hptsi too */ rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT; } else if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) { /* * We are still left on the hpts when the to goes * it will be for output. */ if (TSTMP_GT(cts, rack->r_ctl.rc_last_output_to)) slot = cts - rack->r_ctl.rc_last_output_to; else slot = 1; } if ((tp->snd_wnd == 0) && TCPS_HAVEESTABLISHED(tp->t_state)) { /* No send window.. we must enter persist */ rack_enter_persist(tp, rack, cts); } else if ((frm_out_sbavail && (frm_out_sbavail > (tp->snd_max - tp->snd_una)) && (tp->snd_wnd < tp->t_maxseg)) && TCPS_HAVEESTABLISHED(tp->t_state)) { /* * If we have no window or we can't send a segment (and have * data to send.. we cheat here and frm_out_sbavail is * passed in with the sbavail(sb) only from bbr_output) and * we are established, then we must enter persits (if not * already in persits). */ rack_enter_persist(tp, rack, cts); } hpts_timeout = rack_timer_start(tp, rack, cts); if (tp->t_flags & TF_DELACK) { delayed_ack = tcp_delacktime; rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK; } if (delayed_ack && ((hpts_timeout == 0) || (delayed_ack < hpts_timeout))) hpts_timeout = delayed_ack; else rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK; /* * If no timers are going to run and we will fall off the hptsi * wheel, we resort to a keep-alive timer if its configured. */ if ((hpts_timeout == 0) && (slot == 0)) { if ((tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && (tp->t_state <= TCPS_CLOSING)) { /* * Ok we have no timer (persists, rack, tlp, rxt or * del-ack), we don't have segments being paced. So * all that is left is the keepalive timer. */ if (TCPS_HAVEESTABLISHED(tp->t_state)) { /* Get the established keep-alive time */ hpts_timeout = TP_KEEPIDLE(tp); } else { /* Get the initial setup keep-alive time */ hpts_timeout = TP_KEEPINIT(tp); } rack->r_ctl.rc_hpts_flags |= PACE_TMR_KEEP; } } if (left && (stopped & (PACE_TMR_KEEP | PACE_TMR_DELACK)) == (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK)) { /* * RACK, TLP, persists and RXT timers all are restartable * based on actions input .. i.e we received a packet (ack * or sack) and that changes things (rw, or snd_una etc). * Thus we can restart them with a new value. For * keep-alive, delayed_ack we keep track of what was left * and restart the timer with a smaller value. */ if (left < hpts_timeout) hpts_timeout = left; } if (hpts_timeout) { /* * Hack alert for now we can't time-out over 2,147,483 * seconds (a bit more than 596 hours), which is probably ok * :). */ if (hpts_timeout > 0x7ffffffe) hpts_timeout = 0x7ffffffe; rack->r_ctl.rc_timer_exp = cts + hpts_timeout; } if (slot) { rack->r_ctl.rc_last_output_to = cts + slot; if ((hpts_timeout == 0) || (hpts_timeout > slot)) { if (rack->rc_inp->inp_in_hpts == 0) tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(slot)); rack_log_to_start(rack, cts, hpts_timeout, slot, 1); } else { /* * Arrange for the hpts to kick back in after the * t-o if the t-o does not cause a send. */ if (rack->rc_inp->inp_in_hpts == 0) tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(hpts_timeout)); rack_log_to_start(rack, cts, hpts_timeout, slot, 0); } } else if (hpts_timeout) { if (rack->rc_inp->inp_in_hpts == 0) tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(hpts_timeout)); rack_log_to_start(rack, cts, hpts_timeout, slot, 0); } else { /* No timer starting */ #ifdef INVARIANTS if (SEQ_GT(tp->snd_max, tp->snd_una)) { panic("tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?", tp, rack, tot_len_this_send, cts, slot, hpts_timeout); } #endif } rack->rc_tmr_stopped = 0; if (slot) rack_log_type_bbrsnd(rack, tot_len_this_send, slot, cts); } /* * RACK Timer, here we simply do logging and house keeping. * the normal rack_output() function will call the * appropriate thing to check if we need to do a RACK retransmit. * We return 1, saying don't proceed with rack_output only * when all timers have been stopped (destroyed PCB?). */ static int rack_timeout_rack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { /* * This timer simply provides an internal trigger to send out data. * The check_recovery_mode call will see if there are needed * retransmissions, if so we will enter fast-recovery. The output * call may or may not do the same thing depending on sysctl * settings. */ struct rack_sendmap *rsm; int32_t recovery; if (tp->t_timers->tt_flags & TT_STOPPED) { return (1); } if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) { /* Its not time yet */ return (0); } rack_log_to_event(rack, RACK_TO_FRM_RACK); recovery = IN_RECOVERY(tp->t_flags); counter_u64_add(rack_to_tot, 1); if (rack->r_state && (rack->r_state != tp->t_state)) rack_set_state(tp, rack); rsm = rack_check_recovery_mode(tp, cts); if (rsm) { uint32_t rtt; rtt = rack->rc_rack_rtt; if (rtt == 0) rtt = 1; if ((recovery == 0) && (rack->r_ctl.rc_prr_sndcnt < tp->t_maxseg)) { /* * The rack-timeout that enter's us into recovery * will force out one MSS and set us up so that we * can do one more send in 2*rtt (transitioning the * rack timeout into a rack-tlp). */ rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; } else if ((rack->r_ctl.rc_prr_sndcnt < tp->t_maxseg) && ((rsm->r_end - rsm->r_start) > rack->r_ctl.rc_prr_sndcnt)) { /* * When a rack timer goes, we have to send at * least one segment. They will be paced a min of 1ms * apart via the next rack timer (or further * if the rack timer dictates it). */ rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; } } else { /* This is a case that should happen rarely if ever */ counter_u64_add(rack_tlp_does_nada, 1); #ifdef TCP_BLACKBOX tcp_log_dump_tp_logbuf(tp, "nada counter trips", M_NOWAIT, true); #endif rack->r_ctl.rc_resend = TAILQ_FIRST(&rack->r_ctl.rc_tmap); } rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RACK; return (0); } /* * TLP Timer, here we simply setup what segment we want to * have the TLP expire on, the normal rack_output() will then * send it out. * * We return 1, saying don't proceed with rack_output only * when all timers have been stopped (destroyed PCB?). */ static int rack_timeout_tlp(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { /* * Tail Loss Probe. */ struct rack_sendmap *rsm = NULL; struct socket *so; uint32_t amm, old_prr_snd = 0; uint32_t out, avail; if (tp->t_timers->tt_flags & TT_STOPPED) { return (1); } if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) { /* Its not time yet */ return (0); } if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); return (1); } /* * A TLP timer has expired. We have been idle for 2 rtts. So we now * need to figure out how to force a full MSS segment out. */ rack_log_to_event(rack, RACK_TO_FRM_TLP); counter_u64_add(rack_tlp_tot, 1); if (rack->r_state && (rack->r_state != tp->t_state)) rack_set_state(tp, rack); so = tp->t_inpcb->inp_socket; avail = sbavail(&so->so_snd); out = tp->snd_max - tp->snd_una; rack->rc_timer_up = 1; /* * If we are in recovery we can jazz out a segment if new data is * present simply by setting rc_prr_sndcnt to a segment. */ if ((avail > out) && ((rack_always_send_oldest == 0) || (TAILQ_EMPTY(&rack->r_ctl.rc_tmap)))) { /* New data is available */ amm = avail - out; if (amm > tp->t_maxseg) { amm = tp->t_maxseg; } else if ((amm < tp->t_maxseg) && ((tp->t_flags & TF_NODELAY) == 0)) { /* not enough to fill a MTU and no-delay is off */ goto need_retran; } if (IN_RECOVERY(tp->t_flags)) { /* Unlikely */ old_prr_snd = rack->r_ctl.rc_prr_sndcnt; if (out + amm <= tp->snd_wnd) rack->r_ctl.rc_prr_sndcnt = amm; else goto need_retran; } else { /* Set the send-new override */ if (out + amm <= tp->snd_wnd) rack->r_ctl.rc_tlp_new_data = amm; else goto need_retran; } rack->r_ctl.rc_tlp_seg_send_cnt = 0; rack->r_ctl.rc_last_tlp_seq = tp->snd_max; rack->r_ctl.rc_tlpsend = NULL; counter_u64_add(rack_tlp_newdata, 1); goto send; } need_retran: /* * Ok we need to arrange the last un-acked segment to be re-sent, or * optionally the first un-acked segment. */ if (rack_always_send_oldest) rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); else { rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_map, rack_sendmap, r_next); if (rsm && (rsm->r_flags & (RACK_ACKED | RACK_HAS_FIN))) { rsm = rack_find_high_nonack(rack, rsm); } } if (rsm == NULL) { counter_u64_add(rack_tlp_does_nada, 1); #ifdef TCP_BLACKBOX tcp_log_dump_tp_logbuf(tp, "nada counter trips", M_NOWAIT, true); #endif goto out; } if ((rsm->r_end - rsm->r_start) > tp->t_maxseg) { /* * We need to split this the last segment in two. */ int32_t idx; struct rack_sendmap *nrsm; nrsm = rack_alloc(rack); if (nrsm == NULL) { /* * No memory to split, we will just exit and punt * off to the RXT timer. */ counter_u64_add(rack_tlp_does_nada, 1); goto out; } nrsm->r_start = (rsm->r_end - tp->t_maxseg); nrsm->r_end = rsm->r_end; nrsm->r_rtr_cnt = rsm->r_rtr_cnt; nrsm->r_flags = rsm->r_flags; nrsm->r_sndcnt = rsm->r_sndcnt; nrsm->r_rtr_bytes = 0; rsm->r_end = nrsm->r_start; for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) { nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx]; } TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next); if (rsm->r_in_tmap) { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); nrsm->r_in_tmap = 1; } rsm->r_flags &= (~RACK_HAS_FIN); rsm = nrsm; } rack->r_ctl.rc_tlpsend = rsm; rack->r_ctl.rc_tlp_rtx_out = 1; if (rsm->r_start == rack->r_ctl.rc_last_tlp_seq) { rack->r_ctl.rc_tlp_seg_send_cnt++; tp->t_rxtshift++; } else { rack->r_ctl.rc_last_tlp_seq = rsm->r_start; rack->r_ctl.rc_tlp_seg_send_cnt = 1; } send: rack->r_ctl.rc_tlp_send_cnt++; if (rack->r_ctl.rc_tlp_send_cnt > rack_tlp_max_resend) { /* * Can't [re]/transmit a segment we have not heard from the * peer in max times. We need the retransmit timer to take * over. */ restore: rack->r_ctl.rc_tlpsend = NULL; if (rsm) rsm->r_flags &= ~RACK_TLP; rack->r_ctl.rc_prr_sndcnt = old_prr_snd; counter_u64_add(rack_tlp_retran_fail, 1); goto out; } else if (rsm) { rsm->r_flags |= RACK_TLP; } if (rsm && (rsm->r_start == rack->r_ctl.rc_last_tlp_seq) && (rack->r_ctl.rc_tlp_seg_send_cnt > rack_tlp_max_resend)) { /* * We don't want to send a single segment more than the max * either. */ goto restore; } rack->r_timer_override = 1; rack->r_tlp_running = 1; rack->rc_tlp_in_progress = 1; rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_TLP; return (0); out: rack->rc_timer_up = 0; rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_TLP; return (0); } /* * Delayed ack Timer, here we simply need to setup the * ACK_NOW flag and remove the DELACK flag. From there * the output routine will send the ack out. * * We only return 1, saying don't proceed, if all timers * are stopped (destroyed PCB?). */ static int rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { if (tp->t_timers->tt_flags & TT_STOPPED) { return (1); } rack_log_to_event(rack, RACK_TO_FRM_DELACK); tp->t_flags &= ~TF_DELACK; tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_delack); rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_DELACK; return (0); } /* * Persists timer, here we simply need to setup the * FORCE-DATA flag the output routine will send * the one byte send. * * We only return 1, saying don't proceed, if all timers * are stopped (destroyed PCB?). */ static int rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { struct inpcb *inp; int32_t retval = 0; inp = tp->t_inpcb; if (tp->t_timers->tt_flags & TT_STOPPED) { return (1); } if (rack->rc_in_persist == 0) return (0); if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(inp, ETIMEDOUT); return (1); } KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); /* * Persistence timer into zero window. Force a byte to be output, if * possible. */ TCPSTAT_INC(tcps_persisttimeo); /* * Hack: if the peer is dead/unreachable, we do not time out if the * window is closed. After a full backoff, drop the connection if * the idle time (no responses to probes) reaches the maximum * backoff that we would use if retransmitting. */ if (tp->t_rxtshift == TCP_MAXRXTSHIFT && (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { TCPSTAT_INC(tcps_persistdrop); retval = 1; tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); goto out; } if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) && tp->snd_una == tp->snd_max) rack_exit_persist(tp, rack); rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_PERSIT; /* * If the user has closed the socket then drop a persisting * connection after a much reduced timeout. */ if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { retval = 1; TCPSTAT_INC(tcps_persistdrop); tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); goto out; } tp->t_flags |= TF_FORCEDATA; out: rack_log_to_event(rack, RACK_TO_FRM_PERSIST); return (retval); } /* * If a keepalive goes off, we had no other timers * happening. We always return 1 here since this * routine either drops the connection or sends * out a segment with respond. */ static int rack_timeout_keepalive(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { struct tcptemp *t_template; struct inpcb *inp; if (tp->t_timers->tt_flags & TT_STOPPED) { return (1); } rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP; inp = tp->t_inpcb; rack_log_to_event(rack, RACK_TO_FRM_KEEP); /* * Keep-alive timer went off; send something or drop connection if * idle for too long. */ TCPSTAT_INC(tcps_keeptimeo); if (tp->t_state < TCPS_ESTABLISHED) goto dropit; if ((tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) goto dropit; /* * Send a packet designed to force a response if the peer is * up and reachable: either an ACK if the connection is * still alive, or an RST if the peer has closed the * connection due to timeout or reboot. Using sequence * number tp->snd_una-1 causes the transmitted zero-length * segment to lie outside the receive window; by the * protocol spec, this requires the correspondent TCP to * respond. */ TCPSTAT_INC(tcps_keepprobe); t_template = tcpip_maketemplate(inp); if (t_template) { tcp_respond(tp, t_template->tt_ipgen, &t_template->tt_t, (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0); free(t_template, M_TEMP); } } rack_start_hpts_timer(rack, tp, cts, __LINE__, 0, 0, 0); return (1); dropit: TCPSTAT_INC(tcps_keepdrops); tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); return (1); } /* * Retransmit helper function, clear up all the ack * flags and take care of important book keeping. */ static void rack_remxt_tmr(struct tcpcb *tp) { /* * The retransmit timer went off, all sack'd blocks must be * un-acked. */ struct rack_sendmap *rsm, *trsm = NULL; struct tcp_rack *rack; int32_t cnt = 0; rack = (struct tcp_rack *)tp->t_fb_ptr; rack_timer_cancel(tp, rack, tcp_ts_getticks(), __LINE__); rack_log_to_event(rack, RACK_TO_FRM_TMR); if (rack->r_state && (rack->r_state != tp->t_state)) rack_set_state(tp, rack); /* * Ideally we would like to be able to * mark SACK-PASS on anything not acked here. * However, if we do that we would burst out * all that data 1ms apart. This would be unwise, * so for now we will just let the normal rxt timer * and tlp timer take care of it. */ TAILQ_FOREACH(rsm, &rack->r_ctl.rc_map, r_next) { if (rsm->r_flags & RACK_ACKED) { cnt++; rsm->r_sndcnt = 0; if (rsm->r_in_tmap == 0) { /* We must re-add it back to the tlist */ if (trsm == NULL) { TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext); } else { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, trsm, rsm, r_tnext); } rsm->r_in_tmap = 1; trsm = rsm; } } rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS); } /* Clear the count (we just un-acked them) */ rack->r_ctl.rc_sacked = 0; /* Clear the tlp rtx mark */ rack->r_ctl.rc_tlp_rtx_out = 0; rack->r_ctl.rc_tlp_seg_send_cnt = 0; rack->r_ctl.rc_resend = TAILQ_FIRST(&rack->r_ctl.rc_map); /* Setup so we send one segment */ if (rack->r_ctl.rc_prr_sndcnt < tp->t_maxseg) rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; rack->r_timer_override = 1; } /* * Re-transmit timeout! If we drop the PCB we will return 1, otherwise * we will setup to retransmit the lowest seq number outstanding. */ static int rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { int32_t rexmt; struct inpcb *inp; int32_t retval = 0; inp = tp->t_inpcb; if (tp->t_timers->tt_flags & TT_STOPPED) { return (1); } if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(inp, ETIMEDOUT); return (1); } rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RXT; if (TCPS_HAVEESTABLISHED(tp->t_state) && (tp->snd_una == tp->snd_max)) { /* Nothing outstanding .. nothing to do */ return (0); } /* * Retransmission timer went off. Message has not been acked within * retransmit interval. Back off to a longer retransmit interval * and retransmit one segment. */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; TCPSTAT_INC(tcps_timeoutdrop); retval = 1; tcp_set_inp_to_drop(rack->rc_inp, (tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT)); goto out; } rack_remxt_tmr(tp); if (tp->t_state == TCPS_SYN_SENT) { /* * If the SYN was retransmitted, indicate CWND to be limited * to 1 segment in cc_conn_init(). */ tp->snd_cwnd = 1; } else if (tp->t_rxtshift == 1) { /* * first retransmit; record ssthresh and cwnd so they can be * recovered if this turns out to be a "bad" retransmit. A * retransmit is considered "bad" if an ACK for this segment * is received within RTT/2 interval; the assumption here is * that the ACK was already in flight. See "On Estimating * End-to-End Network Path Properties" by Allman and Paxson * for more details. */ tp->snd_cwnd_prev = tp->snd_cwnd; tp->snd_ssthresh_prev = tp->snd_ssthresh; tp->snd_recover_prev = tp->snd_recover; if (IN_FASTRECOVERY(tp->t_flags)) tp->t_flags |= TF_WASFRECOVERY; else tp->t_flags &= ~TF_WASFRECOVERY; if (IN_CONGRECOVERY(tp->t_flags)) tp->t_flags |= TF_WASCRECOVERY; else tp->t_flags &= ~TF_WASCRECOVERY; tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); tp->t_flags |= TF_PREVVALID; } else tp->t_flags &= ~TF_PREVVALID; TCPSTAT_INC(tcps_rexmttimeo); if ((tp->t_state == TCPS_SYN_SENT) || (tp->t_state == TCPS_SYN_RECEIVED)) rexmt = MSEC_2_TICKS(RACK_INITIAL_RTO * tcp_syn_backoff[tp->t_rxtshift]); else rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; TCPT_RANGESET(tp->t_rxtcur, rexmt, max(MSEC_2_TICKS(rack_rto_min), rexmt), MSEC_2_TICKS(rack_rto_max)); /* * We enter the path for PLMTUD if connection is established or, if * connection is FIN_WAIT_1 status, reason for the last is that if * amount of data we send is very small, we could send it in couple * of packets and process straight to FIN. In that case we won't * catch ESTABLISHED state. */ if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED)) || (tp->t_state == TCPS_FIN_WAIT_1))) { #ifdef INET6 int32_t isipv6; #endif /* * Idea here is that at each stage of mtu probe (usually, * 1448 -> 1188 -> 524) should be given 2 chances to recover * before further clamping down. 'tp->t_rxtshift % 2 == 0' * should take care of that. */ if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD | TF2_PLPMTU_MAXSEGSNT)) == (TF2_PLPMTU_PMTUD | TF2_PLPMTU_MAXSEGSNT)) && (tp->t_rxtshift >= 2 && tp->t_rxtshift < 6 && tp->t_rxtshift % 2 == 0)) { /* * Enter Path MTU Black-hole Detection mechanism: - * Disable Path MTU Discovery (IP "DF" bit). - * Reduce MTU to lower value than what we negotiated * with peer. */ if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) { /* Record that we may have found a black hole. */ tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; /* Keep track of previous MSS. */ tp->t_pmtud_saved_maxseg = tp->t_maxseg; } /* * Reduce the MSS to blackhole value or to the * default in an attempt to retransmit. */ #ifdef INET6 isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; if (isipv6 && tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) { /* Use the sysctl tuneable blackhole MSS. */ tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss; TCPSTAT_INC(tcps_pmtud_blackhole_activated); } else if (isipv6) { /* Use the default MSS. */ tp->t_maxseg = V_tcp_v6mssdflt; /* * Disable Path MTU Discovery when we switch * to minmss. */ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) { /* Use the sysctl tuneable blackhole MSS. */ tp->t_maxseg = V_tcp_pmtud_blackhole_mss; TCPSTAT_INC(tcps_pmtud_blackhole_activated); } else { /* Use the default MSS. */ tp->t_maxseg = V_tcp_mssdflt; /* * Disable Path MTU Discovery when we switch * to minmss. */ tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss); } #endif } else { /* * If further retransmissions are still unsuccessful * with a lowered MTU, maybe this isn't a blackhole * and we restore the previous MSS and blackhole * detection flags. The limit '6' is determined by * giving each probe stage (1448, 1188, 524) 2 * chances to recover. */ if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && (tp->t_rxtshift >= 6)) { tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; tp->t_maxseg = tp->t_pmtud_saved_maxseg; TCPSTAT_INC(tcps_pmtud_blackhole_failed); } } } /* * Disable RFC1323 and SACK if we haven't got any response to our * third SYN to work-around some broken terminal servers (most of * which have hopefully been retired) that have bad VJ header * compression code which trashes TCP segments containing * unknown-to-them TCP options. */ if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) tp->t_flags &= ~(TF_REQ_SCALE | TF_REQ_TSTMP | TF_SACK_PERMIT); /* * If we backed off this far, our srtt estimate is probably bogus. * Clobber it so we'll take the next rtt measurement as our srtt; * move the current srtt into rttvar to keep the current retransmit * times until then. */ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) in6_losing(tp->t_inpcb); else #endif in_losing(tp->t_inpcb); tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); tp->t_srtt = 0; } if (rack_use_sack_filter) sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una); tp->snd_recover = tp->snd_max; tp->t_flags |= TF_ACKNOW; tp->t_rtttime = 0; rack_cong_signal(tp, NULL, CC_RTO); out: return (retval); } static int rack_process_timers(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8_t hpts_calling) { int32_t ret = 0; int32_t timers = (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK); if (timers == 0) { return (0); } if (tp->t_state == TCPS_LISTEN) { /* no timers on listen sockets */ if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) return (0); return (1); } if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) { uint32_t left; if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) { ret = -1; rack_log_to_processing(rack, cts, ret, 0); return (0); } if (hpts_calling == 0) { ret = -2; rack_log_to_processing(rack, cts, ret, 0); return (0); } /* * Ok our timer went off early and we are not paced false * alarm, go back to sleep. */ ret = -3; left = rack->r_ctl.rc_timer_exp - cts; tcp_hpts_insert(tp->t_inpcb, HPTS_MS_TO_SLOTS(left)); rack_log_to_processing(rack, cts, ret, left); rack->rc_last_pto_set = 0; return (1); } rack->rc_tmr_stopped = 0; rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_MASK; if (timers & PACE_TMR_DELACK) { ret = rack_timeout_delack(tp, rack, cts); } else if (timers & PACE_TMR_RACK) { ret = rack_timeout_rack(tp, rack, cts); } else if (timers & PACE_TMR_TLP) { ret = rack_timeout_tlp(tp, rack, cts); } else if (timers & PACE_TMR_RXT) { ret = rack_timeout_rxt(tp, rack, cts); } else if (timers & PACE_TMR_PERSIT) { ret = rack_timeout_persist(tp, rack, cts); } else if (timers & PACE_TMR_KEEP) { ret = rack_timeout_keepalive(tp, rack, cts); } rack_log_to_processing(rack, cts, ret, timers); return (ret); } static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line) { uint8_t hpts_removed = 0; if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) && TSTMP_GEQ(cts, rack->r_ctl.rc_last_output_to)) { tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT); hpts_removed = 1; } if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) { rack->rc_tmr_stopped = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK; if (rack->rc_inp->inp_in_hpts && ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0)) { /* * Canceling timer's when we have no output being * paced. We also must remove ourselves from the * hpts. */ tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT); hpts_removed = 1; } rack_log_to_cancel(rack, hpts_removed, line); rack->r_ctl.rc_hpts_flags &= ~(PACE_TMR_MASK); } } static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type) { return; } static int rack_stopall(struct tcpcb *tp) { struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; rack->t_timers_stopped = 1; return (0); } static void rack_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta) { return; } static int rack_timer_active(struct tcpcb *tp, uint32_t timer_type) { return (0); } static void rack_stop_all_timers(struct tcpcb *tp) { struct tcp_rack *rack; /* * Assure no timers are running. */ if (tcp_timer_active(tp, TT_PERSIST)) { /* We enter in persists, set the flag appropriately */ rack = (struct tcp_rack *)tp->t_fb_ptr; rack->rc_in_persist = 1; } tcp_timer_suspend(tp, TT_PERSIST); tcp_timer_suspend(tp, TT_REXMT); tcp_timer_suspend(tp, TT_KEEP); tcp_timer_suspend(tp, TT_DELACK); } static void rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t ts) { int32_t idx; rsm->r_rtr_cnt++; rsm->r_sndcnt++; if (rsm->r_rtr_cnt > RACK_NUM_OF_RETRANS) { rsm->r_rtr_cnt = RACK_NUM_OF_RETRANS; rsm->r_flags |= RACK_OVERMAX; } if ((rsm->r_rtr_cnt > 1) && (rack->r_tlp_running == 0)) { rack->r_ctl.rc_holes_rxt += (rsm->r_end - rsm->r_start); rsm->r_rtr_bytes += (rsm->r_end - rsm->r_start); } idx = rsm->r_rtr_cnt - 1; rsm->r_tim_lastsent[idx] = ts; if (rsm->r_flags & RACK_ACKED) { /* Problably MTU discovery messing with us */ rsm->r_flags &= ~RACK_ACKED; rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start); } if (rsm->r_in_tmap) { TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext); } TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext); rsm->r_in_tmap = 1; if (rsm->r_flags & RACK_SACK_PASSED) { /* We have retransmitted due to the SACK pass */ rsm->r_flags &= ~RACK_SACK_PASSED; rsm->r_flags |= RACK_WAS_SACKPASS; } /* Update memory for next rtr */ rack->r_ctl.rc_next = TAILQ_NEXT(rsm, r_next); } static uint32_t rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp) { /* * We (re-)transmitted starting at rsm->r_start for some length * (possibly less than r_end. */ struct rack_sendmap *nrsm; uint32_t c_end; int32_t len; int32_t idx; len = *lenp; c_end = rsm->r_start + len; if (SEQ_GEQ(c_end, rsm->r_end)) { /* * We retransmitted the whole piece or more than the whole * slopping into the next rsm. */ rack_update_rsm(tp, rack, rsm, ts); if (c_end == rsm->r_end) { *lenp = 0; return (0); } else { int32_t act_len; /* Hangs over the end return whats left */ act_len = rsm->r_end - rsm->r_start; *lenp = (len - act_len); return (rsm->r_end); } /* We don't get out of this block. */ } /* * Here we retransmitted less than the whole thing which means we * have to split this into what was transmitted and what was not. */ nrsm = rack_alloc(rack); if (nrsm == NULL) { /* * We can't get memory, so lets not proceed. */ *lenp = 0; return (0); } /* * So here we are going to take the original rsm and make it what we * retransmitted. nrsm will be the tail portion we did not * retransmit. For example say the chunk was 1, 11 (10 bytes). And * we retransmitted 5 bytes i.e. 1, 5. The original piece shrinks to * 1, 6 and the new piece will be 6, 11. */ nrsm->r_start = c_end; nrsm->r_end = rsm->r_end; nrsm->r_rtr_cnt = rsm->r_rtr_cnt; nrsm->r_flags = rsm->r_flags; nrsm->r_sndcnt = rsm->r_sndcnt; nrsm->r_rtr_bytes = 0; rsm->r_end = c_end; for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) { nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx]; } TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next); if (rsm->r_in_tmap) { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); nrsm->r_in_tmap = 1; } rsm->r_flags &= (~RACK_HAS_FIN); rack_update_rsm(tp, rack, rsm, ts); *lenp = 0; return (0); } static void rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts, uint8_t pass, struct rack_sendmap *hintrsm) { struct tcp_rack *rack; struct rack_sendmap *rsm, *nrsm; register uint32_t snd_max, snd_una; int32_t idx; /* * Add to the RACK log of packets in flight or retransmitted. If * there is a TS option we will use the TS echoed, if not we will * grab a TS. * * Retransmissions will increment the count and move the ts to its * proper place. Note that if options do not include TS's then we * won't be able to effectively use the ACK for an RTT on a retran. * * Notes about r_start and r_end. Lets consider a send starting at * sequence 1 for 10 bytes. In such an example the r_start would be * 1 (starting sequence) but the r_end would be r_start+len i.e. 11. * This means that r_end is actually the first sequence for the next * slot (11). * */ /* * If err is set what do we do XXXrrs? should we not add the thing? * -- i.e. return if err != 0 or should we pretend we sent it? -- * i.e. proceed with add ** do this for now. */ INP_WLOCK_ASSERT(tp->t_inpcb); if (err) /* * We don't log errors -- we could but snd_max does not * advance in this case either. */ return; if (th_flags & TH_RST) { /* * We don't log resets and we return immediately from * sending */ return; } rack = (struct tcp_rack *)tp->t_fb_ptr; snd_una = tp->snd_una; if (SEQ_LEQ((seq_out + len), snd_una)) { /* Are sending an old segment to induce an ack (keep-alive)? */ return; } if (SEQ_LT(seq_out, snd_una)) { /* huh? should we panic? */ uint32_t end; end = seq_out + len; seq_out = snd_una; len = end - seq_out; } snd_max = tp->snd_max; if (th_flags & (TH_SYN | TH_FIN)) { /* * The call to rack_log_output is made before bumping * snd_max. This means we can record one extra byte on a SYN * or FIN if seq_out is adding more on and a FIN is present * (and we are not resending). */ if (th_flags & TH_SYN) len++; if (th_flags & TH_FIN) len++; if (SEQ_LT(snd_max, tp->snd_nxt)) { /* * The add/update as not been done for the FIN/SYN * yet. */ snd_max = tp->snd_nxt; } } if (len == 0) { /* We don't log zero window probes */ return; } rack->r_ctl.rc_time_last_sent = ts; if (IN_RECOVERY(tp->t_flags)) { rack->r_ctl.rc_prr_out += len; } /* First question is it a retransmission? */ if (seq_out == snd_max) { again: rsm = rack_alloc(rack); if (rsm == NULL) { /* * Hmm out of memory and the tcb got destroyed while * we tried to wait. */ #ifdef INVARIANTS panic("Out of memory when we should not be rack:%p", rack); #endif return; } if (th_flags & TH_FIN) { rsm->r_flags = RACK_HAS_FIN; } else { rsm->r_flags = 0; } rsm->r_tim_lastsent[0] = ts; rsm->r_rtr_cnt = 1; rsm->r_rtr_bytes = 0; rsm->r_start = seq_out; rsm->r_end = rsm->r_start + len; rsm->r_sndcnt = 0; TAILQ_INSERT_TAIL(&rack->r_ctl.rc_map, rsm, r_next); TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext); rsm->r_in_tmap = 1; return; } /* * If we reach here its a retransmission and we need to find it. */ more: if (hintrsm && (hintrsm->r_start == seq_out)) { rsm = hintrsm; hintrsm = NULL; } else if (rack->r_ctl.rc_next) { /* We have a hint from a previous run */ rsm = rack->r_ctl.rc_next; } else { /* No hints sorry */ rsm = NULL; } if ((rsm) && (rsm->r_start == seq_out)) { /* * We used rc_next or hintrsm to retransmit, hopefully the * likely case. */ seq_out = rack_update_entry(tp, rack, rsm, ts, &len); if (len == 0) { return; } else { goto more; } } /* Ok it was not the last pointer go through it the hard way. */ TAILQ_FOREACH(rsm, &rack->r_ctl.rc_map, r_next) { if (rsm->r_start == seq_out) { seq_out = rack_update_entry(tp, rack, rsm, ts, &len); rack->r_ctl.rc_next = TAILQ_NEXT(rsm, r_next); if (len == 0) { return; } else { continue; } } if (SEQ_GEQ(seq_out, rsm->r_start) && SEQ_LT(seq_out, rsm->r_end)) { /* Transmitted within this piece */ /* * Ok we must split off the front and then let the * update do the rest */ nrsm = rack_alloc(rack); if (nrsm == NULL) { #ifdef INVARIANTS panic("Ran out of memory that was preallocated? rack:%p", rack); #endif rack_update_rsm(tp, rack, rsm, ts); return; } /* * copy rsm to nrsm and then trim the front of rsm * to not include this part. */ nrsm->r_start = seq_out; nrsm->r_end = rsm->r_end; nrsm->r_rtr_cnt = rsm->r_rtr_cnt; nrsm->r_flags = rsm->r_flags; nrsm->r_sndcnt = rsm->r_sndcnt; nrsm->r_rtr_bytes = 0; for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) { nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx]; } rsm->r_end = nrsm->r_start; TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next); if (rsm->r_in_tmap) { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); nrsm->r_in_tmap = 1; } rsm->r_flags &= (~RACK_HAS_FIN); seq_out = rack_update_entry(tp, rack, nrsm, ts, &len); if (len == 0) { return; } } } /* * Hmm not found in map did they retransmit both old and on into the * new? */ if (seq_out == tp->snd_max) { goto again; } else if (SEQ_LT(seq_out, tp->snd_max)) { #ifdef INVARIANTS printf("seq_out:%u len:%d snd_una:%u snd_max:%u -- but rsm not found?\n", seq_out, len, tp->snd_una, tp->snd_max); printf("Starting Dump of all rack entries\n"); TAILQ_FOREACH(rsm, &rack->r_ctl.rc_map, r_next) { printf("rsm:%p start:%u end:%u\n", rsm, rsm->r_start, rsm->r_end); } printf("Dump complete\n"); panic("seq_out not found rack:%p tp:%p", rack, tp); #endif } else { #ifdef INVARIANTS /* * Hmm beyond sndmax? (only if we are using the new rtt-pack * flag) */ panic("seq_out:%u(%d) is beyond snd_max:%u tp:%p", seq_out, len, tp->snd_max, tp); #endif } } /* * Record one of the RTT updates from an ack into * our sample structure. */ static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt) { if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) || (rack->r_ctl.rack_rs.rs_rtt_lowest > rtt)) { rack->r_ctl.rack_rs.rs_rtt_lowest = rtt; } if ((rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) || (rack->r_ctl.rack_rs.rs_rtt_highest < rtt)) { rack->r_ctl.rack_rs.rs_rtt_highest = rtt; } rack->r_ctl.rack_rs.rs_flags = RACK_RTT_VALID; rack->r_ctl.rack_rs.rs_rtt_tot += rtt; rack->r_ctl.rack_rs.rs_rtt_cnt++; } /* * Collect new round-trip time estimate * and update averages and current timeout. */ static void tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp) { int32_t delta; uint32_t o_srtt, o_var; int32_t rtt; if (rack->r_ctl.rack_rs.rs_flags & RACK_RTT_EMPTY) /* No valid sample */ return; if (rack->r_ctl.rc_rate_sample_method == USE_RTT_LOW) { /* We are to use the lowest RTT seen in a single ack */ rtt = rack->r_ctl.rack_rs.rs_rtt_lowest; } else if (rack->r_ctl.rc_rate_sample_method == USE_RTT_HIGH) { /* We are to use the highest RTT seen in a single ack */ rtt = rack->r_ctl.rack_rs.rs_rtt_highest; } else if (rack->r_ctl.rc_rate_sample_method == USE_RTT_AVG) { /* We are to use the average RTT seen in a single ack */ rtt = (int32_t)(rack->r_ctl.rack_rs.rs_rtt_tot / (uint64_t)rack->r_ctl.rack_rs.rs_rtt_cnt); } else { #ifdef INVARIANTS panic("Unknown rtt variant %d", rack->r_ctl.rc_rate_sample_method); #endif return; } if (rtt == 0) rtt = 1; rack_log_rtt_sample(rack, rtt); o_srtt = tp->t_srtt; o_var = tp->t_rttvar; rack = (struct tcp_rack *)tp->t_fb_ptr; if (tp->t_srtt != 0) { /* * srtt is stored as fixed point with 5 bits after the * binary point (i.e., scaled by 8). The following magic is * equivalent to the smoothing algorithm in rfc793 with an * alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed point). * Adjust rtt to origin 0. */ delta = ((rtt - 1) << TCP_DELTA_SHIFT) - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); tp->t_srtt += delta; if (tp->t_srtt <= 0) tp->t_srtt = 1; /* * We accumulate a smoothed rtt variance (actually, a * smoothed mean difference), then set the retransmit timer * to smoothed rtt + 4 times the smoothed variance. rttvar * is stored as fixed point with 4 bits after the binary * point (scaled by 16). The following is equivalent to * rfc793 smoothing with an alpha of .75 (rttvar = * rttvar*3/4 + |delta| / 4). This replaces rfc793's * wired-in beta. */ if (delta < 0) delta = -delta; delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); tp->t_rttvar += delta; if (tp->t_rttvar <= 0) tp->t_rttvar = 1; if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar) tp->t_rttbest = tp->t_srtt + tp->t_rttvar; } else { /* * No rtt measurement yet - use the unsmoothed rtt. Set the * variance to half the rtt (so our first retransmit happens * at 3*rtt). */ tp->t_srtt = rtt << TCP_RTT_SHIFT; tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); tp->t_rttbest = tp->t_srtt + tp->t_rttvar; } TCPSTAT_INC(tcps_rttupdated); rack_log_rtt_upd(tp, rack, rtt, o_srtt, o_var); tp->t_rttupdated++; #ifdef NETFLIX_STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt)); #endif tp->t_rxtshift = 0; /* * the retransmit should happen at rtt + 4 * rttvar. Because of the * way we do the smoothing, srtt and rttvar will each average +1/2 * tick of bias. When we compute the retransmit timer, we want 1/2 * tick of rounding and 1 extra tick because of +-1/2 tick * uncertainty in the firing of the timer. The bias will give us * exactly the 1.5 tick we need. But, because the bias is * statistical, we have to test that we don't drop below the minimum * feasible timer (which is 2 ticks). */ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), max(MSEC_2_TICKS(rack_rto_min), rtt + 2), MSEC_2_TICKS(rack_rto_max)); tp->t_softerror = 0; } static void rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm, uint32_t t, uint32_t cts) { /* * For this RSM, we acknowledged the data from a previous * transmission, not the last one we made. This means we did a false * retransmit. */ struct tcp_rack *rack; if (rsm->r_flags & RACK_HAS_FIN) { /* * The sending of the FIN often is multiple sent when we * have everything outstanding ack'd. We ignore this case * since its over now. */ return; } if (rsm->r_flags & RACK_TLP) { /* * We expect TLP's to have this occur. */ return; } rack = (struct tcp_rack *)tp->t_fb_ptr; /* should we undo cc changes and exit recovery? */ if (IN_RECOVERY(tp->t_flags)) { if (rack->r_ctl.rc_rsm_start == rsm->r_start) { /* * Undo what we ratched down and exit recovery if * possible */ EXIT_RECOVERY(tp->t_flags); tp->snd_recover = tp->snd_una; if (rack->r_ctl.rc_cwnd_at > tp->snd_cwnd) tp->snd_cwnd = rack->r_ctl.rc_cwnd_at; if (rack->r_ctl.rc_ssthresh_at > tp->snd_ssthresh) tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at; } } if (rsm->r_flags & RACK_WAS_SACKPASS) { /* * We retransmitted based on a sack and the earlier * retransmission ack'd it - re-ordering is occuring. */ counter_u64_add(rack_reorder_seen, 1); rack->r_ctl.rc_reorder_ts = cts; } counter_u64_add(rack_badfr, 1); counter_u64_add(rack_badfr_bytes, (rsm->r_end - rsm->r_start)); } static int rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type) { int32_t i; uint32_t t; if (rsm->r_flags & RACK_ACKED) /* Already done */ return (0); if ((rsm->r_rtr_cnt == 1) || ((ack_type == CUM_ACKED) && (to->to_flags & TOF_TS) && (to->to_tsecr) && (rsm->r_tim_lastsent[rsm->r_rtr_cnt - 1] == to->to_tsecr)) ) { /* * We will only find a matching timestamp if its cum-acked. * But if its only one retransmission its for-sure matching * :-) */ t = cts - rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]; if ((int)t <= 0) t = 1; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; if (!rack->r_ctl.rc_rack_min_rtt || SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) { rack->r_ctl.rc_rack_min_rtt = t; if (rack->r_ctl.rc_rack_min_rtt == 0) { rack->r_ctl.rc_rack_min_rtt = 1; } } tcp_rack_xmit_timer(rack, TCP_TS_TO_TICKS(t) + 1); if ((rsm->r_flags & RACK_TLP) && (!IN_RECOVERY(tp->t_flags))) { /* Segment was a TLP and our retrans matched */ if (rack->r_ctl.rc_tlp_cwnd_reduce) { rack->r_ctl.rc_rsm_start = tp->snd_max; rack->r_ctl.rc_cwnd_at = tp->snd_cwnd; rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh; rack_cong_signal(tp, NULL, CC_NDUPACK); /* * When we enter recovery we need to assure * we send one packet. */ rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; } else rack->r_ctl.rc_tlp_rtx_out = 0; } if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) { /* New more recent rack_tmit_time */ rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]; rack->rc_rack_rtt = t; } return (1); } /* * We clear the soft/rxtshift since we got an ack. * There is no assurance we will call the commit() function * so we need to clear these to avoid incorrect handling. */ tp->t_rxtshift = 0; tp->t_softerror = 0; if ((to->to_flags & TOF_TS) && (ack_type == CUM_ACKED) && (to->to_tsecr) && ((rsm->r_flags & (RACK_DEFERRED | RACK_OVERMAX)) == 0)) { /* * Now which timestamp does it match? In this block the ACK * must be coming from a previous transmission. */ for (i = 0; i < rsm->r_rtr_cnt; i++) { if (rsm->r_tim_lastsent[i] == to->to_tsecr) { t = cts - rsm->r_tim_lastsent[i]; if ((int)t <= 0) t = 1; if ((i + 1) < rsm->r_rtr_cnt) { /* Likely */ rack_earlier_retran(tp, rsm, t, cts); } if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; if (!rack->r_ctl.rc_rack_min_rtt || SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) { rack->r_ctl.rc_rack_min_rtt = t; if (rack->r_ctl.rc_rack_min_rtt == 0) { rack->r_ctl.rc_rack_min_rtt = 1; } } /* * Note the following calls to * tcp_rack_xmit_timer() are being commented * out for now. They give us no more accuracy * and often lead to a wrong choice. We have * enough samples that have not been * retransmitted. I leave the commented out * code in here in case in the future we * decide to add it back (though I can't forsee * doing that). That way we will easily see * where they need to be placed. */ if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) { /* New more recent rack_tmit_time */ rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)]; rack->rc_rack_rtt = t; } return (1); } } goto ts_not_found; } else { /* * Ok its a SACK block that we retransmitted. or a windows * machine without timestamps. We can tell nothing from the * time-stamp since its not there or the time the peer last * recieved a segment that moved forward its cum-ack point. */ ts_not_found: i = rsm->r_rtr_cnt - 1; t = cts - rsm->r_tim_lastsent[i]; if ((int)t <= 0) t = 1; if (rack->r_ctl.rc_rack_min_rtt && SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) { /* * We retransmitted and the ack came back in less * than the smallest rtt we have observed. We most * likey did an improper retransmit as outlined in * 4.2 Step 3 point 2 in the rack-draft. */ i = rsm->r_rtr_cnt - 2; t = cts - rsm->r_tim_lastsent[i]; rack_earlier_retran(tp, rsm, t, cts); } else if (rack->r_ctl.rc_rack_min_rtt) { /* * We retransmitted it and the retransmit did the * job. */ if (!rack->r_ctl.rc_rack_min_rtt || SEQ_LT(t, rack->r_ctl.rc_rack_min_rtt)) { rack->r_ctl.rc_rack_min_rtt = t; if (rack->r_ctl.rc_rack_min_rtt == 0) { rack->r_ctl.rc_rack_min_rtt = 1; } } if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, rsm->r_tim_lastsent[i])) { /* New more recent rack_tmit_time */ rack->r_ctl.rc_rack_tmit_time = rsm->r_tim_lastsent[i]; rack->rc_rack_rtt = t; } return (1); } } return (0); } /* * Mark the SACK_PASSED flag on all entries prior to rsm send wise. */ static void rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm) { struct rack_sendmap *nrsm; uint32_t ts; int32_t idx; idx = rsm->r_rtr_cnt - 1; ts = rsm->r_tim_lastsent[idx]; nrsm = rsm; TAILQ_FOREACH_REVERSE_FROM(nrsm, &rack->r_ctl.rc_tmap, rack_head, r_tnext) { if (nrsm == rsm) { /* Skip orginal segment he is acked */ continue; } if (nrsm->r_flags & RACK_ACKED) { /* Skip ack'd segments */ continue; } idx = nrsm->r_rtr_cnt - 1; if (ts == nrsm->r_tim_lastsent[idx]) { /* * For this case lets use seq no, if we sent in a * big block (TSO) we would have a bunch of segments * sent at the same time. * * We would only get a report if its SEQ is earlier. * If we have done multiple retransmits the times * would not be equal. */ if (SEQ_LT(nrsm->r_start, rsm->r_start)) { nrsm->r_flags |= RACK_SACK_PASSED; nrsm->r_flags &= ~RACK_WAS_SACKPASS; } } else { /* * Here they were sent at different times, not a big * block. Since we transmitted this one later and * see it sack'd then this must also be missing (or * we would have gotten a sack block for it) */ nrsm->r_flags |= RACK_SACK_PASSED; nrsm->r_flags &= ~RACK_WAS_SACKPASS; } } } static uint32_t rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts) { int32_t idx; int32_t times = 0; uint32_t start, end, changed = 0; struct rack_sendmap *rsm, *nrsm; int32_t used_ref = 1; start = sack->start; end = sack->end; rsm = *prsm; if (rsm && SEQ_LT(start, rsm->r_start)) { TAILQ_FOREACH_REVERSE_FROM(rsm, &rack->r_ctl.rc_map, rack_head, r_next) { if (SEQ_GEQ(start, rsm->r_start) && SEQ_LT(start, rsm->r_end)) { goto do_rest_ofb; } } } if (rsm == NULL) { start_at_beginning: rsm = NULL; used_ref = 0; } /* First lets locate the block where this guy is */ TAILQ_FOREACH_FROM(rsm, &rack->r_ctl.rc_map, r_next) { if (SEQ_GEQ(start, rsm->r_start) && SEQ_LT(start, rsm->r_end)) { break; } } do_rest_ofb: if (rsm == NULL) { /* * This happens when we get duplicate sack blocks with the * same end. For example SACK 4: 100 SACK 3: 100 The sort * will not change there location so we would just start at * the end of the first one and get lost. */ if (tp->t_flags & TF_SENTFIN) { /* * Check to see if we have not logged the FIN that * went out. */ nrsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_map, rack_sendmap, r_next); if (nrsm && (nrsm->r_end + 1) == tp->snd_max) { /* * Ok we did not get the FIN logged. */ nrsm->r_end++; rsm = nrsm; goto do_rest_ofb; } } if (times == 1) { #ifdef INVARIANTS panic("tp:%p rack:%p sack:%p to:%p prsm:%p", tp, rack, sack, to, prsm); #else goto out; #endif } times++; counter_u64_add(rack_sack_proc_restart, 1); goto start_at_beginning; } /* Ok we have an ACK for some piece of rsm */ if (rsm->r_start != start) { /* * Need to split this in two pieces the before and after. */ nrsm = rack_alloc(rack); if (nrsm == NULL) { /* * failed XXXrrs what can we do but loose the sack * info? */ goto out; } nrsm->r_start = start; nrsm->r_rtr_bytes = 0; nrsm->r_end = rsm->r_end; nrsm->r_rtr_cnt = rsm->r_rtr_cnt; nrsm->r_flags = rsm->r_flags; nrsm->r_sndcnt = rsm->r_sndcnt; for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) { nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx]; } rsm->r_end = nrsm->r_start; TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next); if (rsm->r_in_tmap) { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); nrsm->r_in_tmap = 1; } rsm->r_flags &= (~RACK_HAS_FIN); rsm = nrsm; } if (SEQ_GEQ(end, rsm->r_end)) { /* * The end of this block is either beyond this guy or right * at this guy. */ if ((rsm->r_flags & RACK_ACKED) == 0) { rack_update_rtt(tp, rack, rsm, to, cts, SACKED); changed += (rsm->r_end - rsm->r_start); rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); rack_log_sack_passed(tp, rack, rsm); /* Is Reordering occuring? */ if (rsm->r_flags & RACK_SACK_PASSED) { counter_u64_add(rack_reorder_seen, 1); rack->r_ctl.rc_reorder_ts = cts; } rsm->r_flags |= RACK_ACKED; rsm->r_flags &= ~RACK_TLP; if (rsm->r_in_tmap) { TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext); rsm->r_in_tmap = 0; } } if (end == rsm->r_end) { /* This block only - done */ goto out; } /* There is more not coverend by this rsm move on */ start = rsm->r_end; nrsm = TAILQ_NEXT(rsm, r_next); rsm = nrsm; times = 0; goto do_rest_ofb; } /* Ok we need to split off this one at the tail */ nrsm = rack_alloc(rack); if (nrsm == NULL) { /* failed rrs what can we do but loose the sack info? */ goto out; } /* Clone it */ nrsm->r_start = end; nrsm->r_end = rsm->r_end; nrsm->r_rtr_bytes = 0; nrsm->r_rtr_cnt = rsm->r_rtr_cnt; nrsm->r_flags = rsm->r_flags; nrsm->r_sndcnt = rsm->r_sndcnt; for (idx = 0; idx < nrsm->r_rtr_cnt; idx++) { nrsm->r_tim_lastsent[idx] = rsm->r_tim_lastsent[idx]; } /* The sack block does not cover this guy fully */ rsm->r_flags &= (~RACK_HAS_FIN); rsm->r_end = end; TAILQ_INSERT_AFTER(&rack->r_ctl.rc_map, rsm, nrsm, r_next); if (rsm->r_in_tmap) { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, rsm, nrsm, r_tnext); nrsm->r_in_tmap = 1; } if (rsm->r_flags & RACK_ACKED) { /* Been here done that */ goto out; } rack_update_rtt(tp, rack, rsm, to, cts, SACKED); changed += (rsm->r_end - rsm->r_start); rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); rack_log_sack_passed(tp, rack, rsm); /* Is Reordering occuring? */ if (rsm->r_flags & RACK_SACK_PASSED) { counter_u64_add(rack_reorder_seen, 1); rack->r_ctl.rc_reorder_ts = cts; } rsm->r_flags |= RACK_ACKED; rsm->r_flags &= ~RACK_TLP; if (rsm->r_in_tmap) { TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext); rsm->r_in_tmap = 0; } out: if (used_ref == 0) { counter_u64_add(rack_sack_proc_all, 1); } else { counter_u64_add(rack_sack_proc_short, 1); } /* Save off where we last were */ if (rsm) rack->r_ctl.rc_sacklast = TAILQ_NEXT(rsm, r_next); else rack->r_ctl.rc_sacklast = NULL; *prsm = rsm; return (changed); } static void inline rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ack) { struct rack_sendmap *tmap; tmap = NULL; while (rsm && (rsm->r_flags & RACK_ACKED)) { /* Its no longer sacked, mark it so */ rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start); #ifdef INVARIANTS if (rsm->r_in_tmap) { panic("rack:%p rsm:%p flags:0x%x in tmap?", rack, rsm, rsm->r_flags); } #endif rsm->r_flags &= ~(RACK_ACKED|RACK_SACK_PASSED|RACK_WAS_SACKPASS); /* Rebuild it into our tmap */ if (tmap == NULL) { TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext); tmap = rsm; } else { TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, tmap, rsm, r_tnext); tmap = rsm; } tmap->r_in_tmap = 1; rsm = TAILQ_NEXT(rsm, r_next); } /* * Now lets possibly clear the sack filter so we start * recognizing sacks that cover this area. */ if (rack_use_sack_filter) sack_filter_clear(&rack->r_ctl.rack_sf, th_ack); } static void rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th) { uint32_t changed, last_seq, entered_recovery = 0; struct tcp_rack *rack; struct rack_sendmap *rsm; struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1]; register uint32_t th_ack; int32_t i, j, k, num_sack_blks = 0; uint32_t cts, acked, ack_point, sack_changed = 0; INP_WLOCK_ASSERT(tp->t_inpcb); if (th->th_flags & TH_RST) { /* We don't log resets */ return; } rack = (struct tcp_rack *)tp->t_fb_ptr; cts = tcp_ts_getticks(); rsm = TAILQ_FIRST(&rack->r_ctl.rc_map); changed = 0; th_ack = th->th_ack; if (SEQ_GT(th_ack, tp->snd_una)) { rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__); tp->t_acktime = ticks; } if (rsm && SEQ_GT(th_ack, rsm->r_start)) changed = th_ack - rsm->r_start; if (changed) { /* * The ACK point is advancing to th_ack, we must drop off * the packets in the rack log and calculate any eligble * RTT's. */ rack->r_wanted_output++; more: rsm = TAILQ_FIRST(&rack->r_ctl.rc_map); if (rsm == NULL) { if ((th_ack - 1) == tp->iss) { /* * For the SYN incoming case we will not * have called tcp_output for the sending of * the SYN, so there will be no map. All * other cases should probably be a panic. */ goto proc_sack; } if (tp->t_flags & TF_SENTFIN) { /* if we send a FIN we will not hav a map */ goto proc_sack; } #ifdef INVARIANTS panic("No rack map tp:%p for th:%p state:%d rack:%p snd_una:%u snd_max:%u snd_nxt:%u chg:%d\n", tp, th, tp->t_state, rack, tp->snd_una, tp->snd_max, tp->snd_nxt, changed); #endif goto proc_sack; } if (SEQ_LT(th_ack, rsm->r_start)) { /* Huh map is missing this */ #ifdef INVARIANTS printf("Rack map starts at r_start:%u for th_ack:%u huh? ts:%d rs:%d\n", rsm->r_start, th_ack, tp->t_state, rack->r_state); #endif goto proc_sack; } rack_update_rtt(tp, rack, rsm, to, cts, CUM_ACKED); /* Now do we consume the whole thing? */ if (SEQ_GEQ(th_ack, rsm->r_end)) { /* Its all consumed. */ uint32_t left; rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; rsm->r_rtr_bytes = 0; TAILQ_REMOVE(&rack->r_ctl.rc_map, rsm, r_next); if (rsm->r_in_tmap) { TAILQ_REMOVE(&rack->r_ctl.rc_tmap, rsm, r_tnext); rsm->r_in_tmap = 0; } if (rack->r_ctl.rc_next == rsm) { /* scoot along the marker */ rack->r_ctl.rc_next = TAILQ_FIRST(&rack->r_ctl.rc_map); } if (rsm->r_flags & RACK_ACKED) { /* * It was acked on the scoreboard -- remove * it from total */ rack->r_ctl.rc_sacked -= (rsm->r_end - rsm->r_start); } else if (rsm->r_flags & RACK_SACK_PASSED) { /* * There are acked segments ACKED on the * scoreboard further up. We are seeing * reordering. */ counter_u64_add(rack_reorder_seen, 1); rsm->r_flags |= RACK_ACKED; rack->r_ctl.rc_reorder_ts = cts; } left = th_ack - rsm->r_end; if (rsm->r_rtr_cnt > 1) { /* * Technically we should make r_rtr_cnt be * monotonicly increasing and just mod it to * the timestamp it is replacing.. that way * we would have the last 3 retransmits. Now * rc_loss_count will be wrong if we * retransmit something more than 2 times in * recovery :( */ rack->r_ctl.rc_loss_count += (rsm->r_rtr_cnt - 1); } /* Free back to zone */ rack_free(rack, rsm); if (left) { goto more; } goto proc_sack; } if (rsm->r_flags & RACK_ACKED) { /* * It was acked on the scoreboard -- remove it from * total for the part being cum-acked. */ rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start); } rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; rsm->r_rtr_bytes = 0; rsm->r_start = th_ack; } proc_sack: /* Check for reneging */ rsm = TAILQ_FIRST(&rack->r_ctl.rc_map); if (rsm && (rsm->r_flags & RACK_ACKED) && (th_ack == rsm->r_start)) { /* * The peer has moved snd_una up to * the edge of this send, i.e. one * that it had previously acked. The only * way that can be true if the peer threw * away data (space issues) that it had * previously sacked (else it would have * given us snd_una up to (rsm->r_end). * We need to undo the acked markings here. * * Note we have to look to make sure th_ack is * our rsm->r_start in case we get an old ack * where th_ack is behind snd_una. */ rack_peer_reneges(rack, rsm, th->th_ack); } if ((to->to_flags & TOF_SACK) == 0) { /* We are done nothing left to log */ goto out; } rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_map, rack_sendmap, r_next); if (rsm) { last_seq = rsm->r_end; } else { last_seq = tp->snd_max; } /* Sack block processing */ if (SEQ_GT(th_ack, tp->snd_una)) ack_point = th_ack; else ack_point = tp->snd_una; for (i = 0; i < to->to_nsacks; i++) { bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack)); sack.start = ntohl(sack.start); sack.end = ntohl(sack.end); if (SEQ_GT(sack.end, sack.start) && SEQ_GT(sack.start, ack_point) && SEQ_LT(sack.start, tp->snd_max) && SEQ_GT(sack.end, ack_point) && SEQ_LEQ(sack.end, tp->snd_max)) { if ((rack->r_ctl.rc_num_maps_alloced > rack_sack_block_limit) && (SEQ_LT(sack.end, last_seq)) && ((sack.end - sack.start) < (tp->t_maxseg / 8))) { /* * Not the last piece and its smaller than * 1/8th of a MSS. We ignore this. */ counter_u64_add(rack_runt_sacks, 1); continue; } sack_blocks[num_sack_blks] = sack; num_sack_blks++; #ifdef NETFLIX_STATS } else if (SEQ_LEQ(sack.start, th_ack) && SEQ_LEQ(sack.end, th_ack)) { /* * Its a D-SACK block. */ tcp_record_dsack(sack.start, sack.end); #endif } } if (num_sack_blks == 0) goto out; /* * Sort the SACK blocks so we can update the rack scoreboard with * just one pass. */ if (rack_use_sack_filter) { num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks, num_sack_blks, th->th_ack); } if (num_sack_blks < 2) { goto do_sack_work; } /* Sort the sacks */ for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { sack = sack_blocks[i]; sack_blocks[i] = sack_blocks[j]; sack_blocks[j] = sack; } } } /* * Now are any of the sack block ends the same (yes some * implememtations send these)? */ again: if (num_sack_blks > 1) { for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { if (sack_blocks[i].end == sack_blocks[j].end) { /* * Ok these two have the same end we * want the smallest end and then * throw away the larger and start * again. */ if (SEQ_LT(sack_blocks[j].start, sack_blocks[i].start)) { /* * The second block covers * more area use that */ sack_blocks[i].start = sack_blocks[j].start; } /* * Now collapse out the dup-sack and * lower the count */ for (k = (j + 1); k < num_sack_blks; k++) { sack_blocks[j].start = sack_blocks[k].start; sack_blocks[j].end = sack_blocks[k].end; j++; } num_sack_blks--; goto again; } } } } do_sack_work: rsm = rack->r_ctl.rc_sacklast; for (i = 0; i < num_sack_blks; i++) { acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts); if (acked) { rack->r_wanted_output++; changed += acked; sack_changed += acked; } } out: if (changed) { /* Something changed cancel the rack timer */ rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); } if ((sack_changed) && (!IN_RECOVERY(tp->t_flags))) { /* * Ok we have a high probability that we need to go in to * recovery since we have data sack'd */ struct rack_sendmap *rsm; uint32_t tsused; tsused = tcp_ts_getticks(); rsm = tcp_rack_output(tp, rack, tsused); if (rsm) { /* Enter recovery */ rack->r_ctl.rc_rsm_start = rsm->r_start; rack->r_ctl.rc_cwnd_at = tp->snd_cwnd; rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh; entered_recovery = 1; rack_cong_signal(tp, NULL, CC_NDUPACK); /* * When we enter recovery we need to assure we send * one packet. */ rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; rack->r_timer_override = 1; } } if (IN_RECOVERY(tp->t_flags) && (entered_recovery == 0)) { /* Deal with changed an PRR here (in recovery only) */ uint32_t pipe, snd_una; rack->r_ctl.rc_prr_delivered += changed; /* Compute prr_sndcnt */ if (SEQ_GT(tp->snd_una, th_ack)) { snd_una = tp->snd_una; } else { snd_una = th_ack; } pipe = ((tp->snd_max - snd_una) - rack->r_ctl.rc_sacked) + rack->r_ctl.rc_holes_rxt; if (pipe > tp->snd_ssthresh) { long sndcnt; sndcnt = rack->r_ctl.rc_prr_delivered * tp->snd_ssthresh; if (rack->r_ctl.rc_prr_recovery_fs > 0) sndcnt /= (long)rack->r_ctl.rc_prr_recovery_fs; else { rack->r_ctl.rc_prr_sndcnt = 0; sndcnt = 0; } sndcnt++; if (sndcnt > (long)rack->r_ctl.rc_prr_out) sndcnt -= rack->r_ctl.rc_prr_out; else sndcnt = 0; rack->r_ctl.rc_prr_sndcnt = sndcnt; } else { uint32_t limit; if (rack->r_ctl.rc_prr_delivered > rack->r_ctl.rc_prr_out) limit = (rack->r_ctl.rc_prr_delivered - rack->r_ctl.rc_prr_out); else limit = 0; if (changed > limit) limit = changed; limit += tp->t_maxseg; if (tp->snd_ssthresh > pipe) { rack->r_ctl.rc_prr_sndcnt = min((tp->snd_ssthresh - pipe), limit); } else { rack->r_ctl.rc_prr_sndcnt = min(0, limit); } } if (rack->r_ctl.rc_prr_sndcnt >= tp->t_maxseg) { rack->r_timer_override = 1; } } } /* * Return value of 1, we do not need to call rack_process_data(). * return value of 0, rack_process_data can be called. * For ret_val if its 0 the TCP is locked, if its non-zero * its unlocked and probably unsafe to touch the TCB. */ static int rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t * ti_locked, uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val) { int32_t ourfinisacked = 0; int32_t nsegs, acked_amount; int32_t acked; struct mbuf *mfree; struct tcp_rack *rack; int32_t recovery = 0; rack = (struct tcp_rack *)tp->t_fb_ptr; if (SEQ_GT(th->th_ack, tp->snd_max)) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, ret_val); return (1); } if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) { rack_log_ack(tp, to, th); } if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) { /* * Old ack, behind (or duplicate to) the last one rcv'd * Note: Should mark reordering is occuring! We should also * look for sack blocks arriving e.g. ack 1, 4-4 then ack 1, * 3-3, 4-4 would be reording. As well as ack 1, 3-3 ack 3 */ return (0); } /* * If we reach this point, ACK is not a duplicate, i.e., it ACKs * something we sent. */ if (tp->t_flags & TF_NEEDSYN) { /* * T/TCP: Connection was half-synchronized, and our SYN has * been ACK'd (so connection is now fully synchronized). Go * to non-starred state, increment snd_una for ACK of SYN, * and check if we can do window scaling. */ tp->t_flags &= ~TF_NEEDSYN; tp->snd_una++; /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; /* Send window already scaled. */ } } nsegs = max(1, m->m_pkthdr.lro_nsegs); INP_WLOCK_ASSERT(tp->t_inpcb); acked = BYTES_THIS_ACK(tp, th); TCPSTAT_ADD(tcps_rcvackpack, nsegs); TCPSTAT_ADD(tcps_rcvackbyte, acked); /* * If we just performed our first retransmit, and the ACK arrives * within our recovery window, then it was a mistake to do the * retransmit in the first place. Recover our original cwnd and * ssthresh, and proceed to transmit where we left off. */ if (tp->t_flags & TF_PREVVALID) { tp->t_flags &= ~TF_PREVVALID; if (tp->t_rxtshift == 1 && (int)(ticks - tp->t_badrxtwin) < 0) rack_cong_signal(tp, th, CC_RTO_ERR); } /* * If we have a timestamp reply, update smoothed round trip time. If * no timestamp is present but transmit timer is running and timed * sequence number was acked, update smoothed round trip time. Since * we now have an rtt measurement, cancel the timer backoff (cf., * Phil Karn's retransmit alg.). Recompute the initial retransmit * timer. * * Some boxes send broken timestamp replies during the SYN+ACK * phase, ignore timestamps of 0 or we could calculate a huge RTT * and blow up the retransmit timer. */ /* * If all outstanding data is acked, stop retransmit timer and * remember to restart (more output or persist). If there is more * data to be acked, restart retransmit timer, using current * (possibly backed-off) value. */ if (th->th_ack == tp->snd_max) { rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); rack->r_wanted_output++; } /* * If no data (only SYN) was ACK'd, skip rest of ACK processing. */ if (acked == 0) { if (ofia) *ofia = ourfinisacked; return (0); } if (rack->r_ctl.rc_early_recovery) { if (IN_FASTRECOVERY(tp->t_flags)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { tcp_rack_partialack(tp, th); } else { rack_post_recovery(tp, th); recovery = 1; } } } /* * Let the congestion control algorithm update congestion control * related information. This typically means increasing the * congestion window. */ rack_ack_received(tp, rack, th, nsegs, CC_ACK, recovery); SOCKBUF_LOCK(&so->so_snd); acked_amount = min(acked, (int)sbavail(&so->so_snd)); tp->snd_wnd -= acked_amount; mfree = sbcut_locked(&so->so_snd, acked_amount); if ((sbused(&so->so_snd) == 0) && (acked > acked_amount) && (tp->t_state >= TCPS_FIN_WAIT_1)) { ourfinisacked = 1; } /* NB: sowwakeup_locked() does an implicit unlock. */ sowwakeup_locked(so); m_freem(mfree); if (rack->r_ctl.rc_early_recovery == 0) { if (IN_FASTRECOVERY(tp->t_flags)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { tcp_rack_partialack(tp, th); } else { rack_post_recovery(tp, th); } } } tp->snd_una = th->th_ack; if (SEQ_GT(tp->snd_una, tp->snd_recover)) tp->snd_recover = tp->snd_una; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) { tp->snd_nxt = tp->snd_una; } if (tp->snd_una == tp->snd_max) { /* Nothing left outstanding */ rack_log_progress_event(rack, tp, 0, PROGRESS_CLEAR, __LINE__); tp->t_acktime = 0; rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); /* Set need output so persist might get set */ rack->r_wanted_output++; if (rack_use_sack_filter) sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una); if ((tp->t_state >= TCPS_FIN_WAIT_1) && (sbavail(&so->so_snd) == 0) && (tp->t_flags2 & TF2_DROP_AF_DATA)) { /* * The socket was gone and the * peer sent data, time to * reset him. */ *ret_val = 1; tp = tcp_close(tp); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_UNLIMITED, tlen); return (1); } } if (ofia) *ofia = ourfinisacked; return (0); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { /* * Update window information. Don't look at window if no ACK: TAC's * send garbage on first SYN. */ int32_t nsegs; #ifdef TCP_RFC7413 int32_t tfo_syn; #else #define tfo_syn (FALSE) #endif struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; INP_WLOCK_ASSERT(tp->t_inpcb); nsegs = max(1, m->m_pkthdr.lro_nsegs); if ((thflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) || (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ if (tlen == 0 && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) TCPSTAT_INC(tcps_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; rack->r_wanted_output++; } else if (thflags & TH_ACK) { if ((tp->snd_wl2 == th->th_ack) && (tiwin < tp->snd_wnd)) { tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; } } /* Was persist timer active and now we have window space? */ if ((rack->rc_in_persist != 0) && tp->snd_wnd) { rack_exit_persist(tp, rack); tp->snd_nxt = tp->snd_max; /* Make sure we output to start the timer */ rack->r_wanted_output++; } /* * Process segments with URG. */ if ((thflags & TH_URG) && th->th_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { /* * This is a kludge, but if we receive and accept random * urgent pointers, we'll crash in soreceive. It's hard to * imagine someone actually wanting to send this much urgent * data. */ SOCKBUF_LOCK(&so->so_rcv); if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ goto dodata; /* XXX */ } /* * If this segment advances the known urgent pointer, then * mark the data stream. This should not happen in * CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since a * FIN has been received from the remote side. In these * states we ignore the URG. * * According to RFC961 (Assigned Protocols), the urgent * pointer points to the last octet of urgent data. We * continue, however, to consider it to indicate the first * octet of data past the urgent section as the original * spec states (in one of two places). */ if (SEQ_GT(th->th_seq + th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; so->so_oobmark = sbavail(&so->so_rcv) + (tp->rcv_up - tp->rcv_nxt) - 1; if (so->so_oobmark == 0) so->so_rcv.sb_state |= SBS_RCVATMARK; sohasoutofband(so); tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); } SOCKBUF_UNLOCK(&so->so_rcv); /* * Remove out of band data so doesn't get presented to user. * This can happen independent of advancing the URG pointer, * but if two URG's are pending at once, some out-of-band * data may creep in... ick. */ if (th->th_urp <= (uint32_t) tlen && !(so->so_options & SO_OOBINLINE)) { /* hdr drop is delayed */ tcp_pulloutofband(so, th, m, drop_hdrlen); } } else { /* * If no out of band data is expected, pull receive urgent * pointer along with the receive window. */ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ INP_WLOCK_ASSERT(tp->t_inpcb); /* * Process the segment text, merging it into the TCP sequencing * queue, and arranging for acknowledgment of receipt if necessary. * This process logically involves adjusting tp->rcv_wnd as data is * presented to the user (this happens in tcp_usrreq.c, case * PRU_RCVD). If a FIN has already been received on this connection * then we just ignore the text. */ #ifdef TCP_RFC7413 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_flags & TF_FASTOPEN)); #endif if ((tlen || (thflags & TH_FIN) || tfo_syn) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; m_adj(m, drop_hdrlen); /* delayed header drop */ /* * Insert segment which includes th into TCP reassembly * queue with control block tp. Set thflags to whether * reassembly now includes a segment with FIN. This handles * the common case inline (segment is the next to be * received on an established connection, and the queue is * empty), avoiding linkage into and removal from the queue * and repetition of various conversions. Set DELACK for * segments received in order, but ack immediately when * segments are out of order (so fast retransmit can work). */ if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq) && (TCPS_HAVEESTABLISHED(tp->t_state) || tfo_syn)) { if (DELAY_ACK(tp, tlen) || tfo_syn) { rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; } else { rack->r_wanted_output++; tp->t_flags |= TF_ACKNOW; } tp->rcv_nxt += tlen; thflags = th->th_flags & TH_FIN; TCPSTAT_ADD(tcps_rcvpack, nsegs); TCPSTAT_ADD(tcps_rcvbyte, tlen); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) m_freem(m); else sbappendstream_locked(&so->so_rcv, m, 0); /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); } else { /* * XXX: Due to the header drop above "th" is * theoretically invalid by now. Fortunately * m_adj() doesn't actually frees any mbufs when * trimming from the head. */ thflags = tcp_reass(tp, th, &tlen, m); tp->t_flags |= TF_ACKNOW; } if (tlen > 0) tcp_update_sack_list(tp, save_start, save_start + tlen); } else { m_freem(m); thflags &= ~TH_FIN; } /* * If FIN is received ACK the FIN and let the user know that the * connection is closing. */ if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); /* * If connection is half-synchronized (ie NEEDSYN * flag on) then delay ACK, so it may be piggybacked * when SYN is sent. Otherwise, since we received a * FIN then no more input can be expected, send ACK * now. */ if (tp->t_flags & TF_NEEDSYN) { rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; } tp->rcv_nxt++; } switch (tp->t_state) { /* * In SYN_RECEIVED and ESTABLISHED STATES enter the * CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); tcp_state_change(tp, TCPS_CLOSE_WAIT); break; /* * If still in FIN_WAIT_1 STATE FIN has not been * acked so enter the CLOSING state. */ case TCPS_FIN_WAIT_1: rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); tcp_state_change(tp, TCPS_CLOSING); break; /* * In FIN_WAIT_2 state enter the TIME_WAIT state, * starting the time-wait timer, turning off the * other standard timers. */ case TCPS_FIN_WAIT_2: rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); KASSERT(*ti_locked == TI_RLOCKED, ("%s: dodata " "TCP_FIN_WAIT_2 ti_locked: %d", __func__, *ti_locked)); tcp_twstart(tp); *ti_locked = TI_UNLOCKED; INP_INFO_RUNLOCK(&V_tcbinfo); return (1); } } if (*ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; } /* * Return any desired output. */ if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) { rack->r_wanted_output++; } KASSERT(*ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, *ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); return (0); } /* * Here nothing is really faster, its just that we * have broken out the fast-data path also just like * the fast-ack. */ static int rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt) { int32_t nsegs; int32_t newsize = 0; /* automatic sockbuf scaling */ struct tcp_rack *rack; #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif /* * If last ACK falls within this segment's sequence numbers, record * the timestamp. NOTE that the test is modified according to the * latest proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if (__predict_false(th->th_seq != tp->rcv_nxt)) { return (0); } if (__predict_false(tp->snd_nxt != tp->snd_max)) { return (0); } if (tiwin && tiwin != tp->snd_wnd) { return (0); } if (__predict_false((tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN)))) { return (0); } if (__predict_false((to->to_flags & TOF_TS) && (TSTMP_LT(to->to_tsval, tp->ts_recent)))) { return (0); } if (__predict_false((th->th_ack != tp->snd_una))) { return (0); } if (__predict_false(tlen > sbspace(&so->so_rcv))) { return (0); } if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } rack = (struct tcp_rack *)tp->t_fb_ptr; /* * This is a pure, in-sequence data packet with nothing on the * reassembly queue and we have enough buffer space to take it. */ if (*ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; } nsegs = max(1, m->m_pkthdr.lro_nsegs); /* Clean receiver SACK report if present */ if (tp->rcv_numsacks) tcp_clean_sackreport(tp); TCPSTAT_INC(tcps_preddat); tp->rcv_nxt += tlen; /* * Pull snd_wl1 up to prevent seq wrap relative to th_seq. */ tp->snd_wl1 = th->th_seq; /* * Pull rcv_up up to prevent seq wrap relative to rcv_nxt. */ tp->rcv_up = tp->rcv_nxt; TCPSTAT_ADD(tcps_rcvpack, nsegs); TCPSTAT_ADD(tcps_rcvbyte, tlen); #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif newsize = tcp_autorcvbuf(m, th, so, tp, tlen); /* Add data to socket buffer. */ SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { m_freem(m); } else { /* * Set new socket buffer size. Give up when limit is * reached. */ if (newsize) if (!sbreserve_locked(&so->so_rcv, newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); rack_calc_rwin(so, tp); } /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); if (DELAY_ACK(tp, tlen)) { rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; rack->r_wanted_output++; } if ((tp->snd_una == tp->snd_max) && rack_use_sack_filter) sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una); return (1); } /* * This subfunction is used to try to highly optimize the * fast path. We again allow window updates that are * in sequence to remain in the fast-path. We also add * in the __predict's to attempt to help the compiler. * Note that if we return a 0, then we can *not* process * it and the caller should push the packet into the * slow-path. */ static int rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt, uint32_t cts) { int32_t acked; int32_t nsegs; #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, * now IPv6. */ u_char tcp_saveipgen[IP6_HDR_LEN]; struct tcphdr tcp_savetcp; short ostate = 0; #endif struct tcp_rack *rack; if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) { /* Old ack, behind (or duplicate to) the last one rcv'd */ return (0); } if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) { /* Above what we have sent? */ return (0); } if (__predict_false(tp->snd_nxt != tp->snd_max)) { /* We are retransmitting */ return (0); } if (__predict_false(tiwin == 0)) { /* zero window */ return (0); } if (__predict_false(tp->t_flags & (TF_NEEDSYN | TF_NEEDFIN))) { /* We need a SYN or a FIN, unlikely.. */ return (0); } if ((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) { /* Timestamp is behind .. old ack with seq wrap? */ return (0); } if (__predict_false(IN_RECOVERY(tp->t_flags))) { /* Still recovering */ return (0); } rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->r_ctl.rc_sacked) { /* We have sack holes on our scoreboard */ return (0); } /* Ok if we reach here, we can process a fast-ack */ nsegs = max(1, m->m_pkthdr.lro_nsegs); rack_log_ack(tp, to, th); /* Did the window get updated? */ if (tiwin != tp->snd_wnd) { tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; if (tp->snd_wnd > tp->max_sndwnd) tp->max_sndwnd = tp->snd_wnd; } if ((rack->rc_in_persist != 0) && (tp->snd_wnd >= tp->t_maxseg)) { rack_exit_persist(tp, rack); } /* * If last ACK falls within this segment's sequence numbers, record * the timestamp. NOTE that the test is modified according to the * latest proposal of the tcplw@cray.com list (Braden 1993/04/26). */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * This is a pure ack for outstanding data. */ if (*ti_locked == TI_RLOCKED) { INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; } TCPSTAT_INC(tcps_predack); /* * "bad retransmit" recovery. */ if (tp->t_flags & TF_PREVVALID) { tp->t_flags &= ~TF_PREVVALID; if (tp->t_rxtshift == 1 && (int)(ticks - tp->t_badrxtwin) < 0) rack_cong_signal(tp, th, CC_RTO_ERR); } /* * Recalculate the transmit timer / rtt. * * Some boxes send broken timestamp replies during the SYN+ACK * phase, ignore timestamps of 0 or we could calculate a huge RTT * and blow up the retransmit timer. */ acked = BYTES_THIS_ACK(tp, th); #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, to); #endif TCPSTAT_ADD(tcps_rcvackpack, nsegs); TCPSTAT_ADD(tcps_rcvackbyte, acked); sbdrop(&so->so_snd, acked); /* * Let the congestion control algorithm update congestion control * related information. This typically means increasing the * congestion window. */ rack_ack_received(tp, rack, th, nsegs, CC_ACK, 0); tp->snd_una = th->th_ack; /* * Pull snd_wl2 up to prevent seq wrap relative to th_ack. */ tp->snd_wl2 = th->th_ack; tp->t_dupacks = 0; m_freem(m); /* ND6_HINT(tp); *//* Some progress has been made. */ /* * If all outstanding data are acked, stop retransmit timer, * otherwise restart timer using current (possibly backed-off) * value. If process is waiting for space, wakeup/selwakeup/signal. * If data are ready to send, let tcp_output decide between more * output or persist. */ #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif if (tp->snd_una == tp->snd_max) { rack_log_progress_event(rack, tp, 0, PROGRESS_CLEAR, __LINE__); tp->t_acktime = 0; rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); } /* Wake up the socket if we have room to write more */ sowwakeup(so); if (sbavail(&so->so_snd)) { rack->r_wanted_output++; } return (1); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t todrop; int32_t ourfinisacked = 0; rack_calc_rwin(so, tp); /* * If the state is SYN_SENT: if seg contains an ACK, but not for our * SYN, drop the input. if seg contains a RST, then drop the * connection. if seg does not contain SYN, then drop it. Otherwise * this is an acceptable SYN segment initialize tp->rcv_nxt and * tp->irs if seg contains ack then advance tp->snd_una if seg * contains an ECE and ECN support is enabled, the stream is ECN * capable. if SYN has been acked change to ESTABLISHED else * SYN_RCVD state arrange for segment to be acked (eventually) * continue processing rest of data/controls, beginning with URG */ if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { TCP_PROBE5(connect__refused, NULL, tp, mtod(m, const char *), tp, th); tp = tcp_drop(tp, ECONNREFUSED); rack_do_drop(m, tp, ti_locked); return (1); } if (thflags & TH_RST) { rack_do_drop(m, tp, ti_locked); return (1); } if (!(thflags & TH_SYN)) { rack_do_drop(m, tp, ti_locked); return (1); } tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC mac_socketpeer_set_from_mbuf(m, so); #endif /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; } tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale); /* * If there's data, delay ACK; if there's also a FIN ACKNOW * will be turned on later. */ if (DELAY_ACK(tp, tlen) && tlen != 0) { rack_timer_cancel(tp, (struct tcp_rack *)tp->t_fb_ptr, ((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; } else { ((struct tcp_rack *)tp->t_fb_ptr)->r_wanted_output++; tp->t_flags |= TF_ACKNOW; } if ((thflags & TH_ECE) && V_tcp_do_ecn) { tp->t_flags |= TF_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } /* * Received in SYN_SENT[*] state. Transitions: * SYN_SENT --> ESTABLISHED SYN_SENT* --> FIN_WAIT_1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; thflags &= ~TH_SYN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(connect__established, NULL, tp, mtod(m, const char *), tp, th); cc_conn_init(tp); } } else { /* * Received initial SYN in SYN-SENT[*] state => simultaneous * open. If segment contains CC option and there is a * cached CC, apply TAO test. If it succeeds, connection is * * half-synchronized. Otherwise, do 3-way handshake: * SYN-SENT -> SYN-RECEIVED SYN-SENT* -> SYN-RECEIVED* If * there was no CC option, clear cached CC value. */ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); tcp_state_change(tp, TCPS_SYN_RECEIVED); } KASSERT(*ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " "ti_locked %d", __func__, *ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); /* * Advance th->th_seq to correspond to first data byte. If data, * trim to stay within window, dropping FIN if necessary. */ th->th_seq++; if (tlen > tp->rcv_wnd) { todrop = tlen - tp->rcv_wnd; m_adj(m, -todrop); tlen = tp->rcv_wnd; thflags &= ~TH_FIN; TCPSTAT_INC(tcps_rcvpackafterwin); TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); } tp->snd_wl1 = th->th_seq - 1; tp->rcv_up = th->th_seq; /* * Client side of transaction: already sent SYN and data. If the * remote host used T/TCP to validate the SYN, our data will be * ACK'd; if so, enter normal data segment processing in the middle * of step 5, ack processing. Otherwise, goto step 6. */ if (thflags & TH_ACK) { if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) return (ret_val); /* We may have changed to FIN_WAIT_1 above */ if (tp->t_state == TCPS_FIN_WAIT_1) { /* * In FIN_WAIT_1 STATE in addition to the processing * for the ESTABLISHED state if our FIN is now * acknowledged then enter FIN_WAIT_2. */ if (ourfinisacked) { /* * If we can't receive any more data, then * closing user can proceed. Starting the * timer is contrary to the specification, * but if we don't get a FIN we'll hang * forever. * * XXXjl: we should release the tp also, and * use a compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); tcp_timer_activate(tp, TT_2MSL, (tcp_fast_finwait2_recycle ? tcp_finwait2_timeout : TP_MAXIDLE(tp))); } tcp_state_change(tp, TCPS_FIN_WAIT_2); } } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; rack_calc_rwin(so, tp); if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } #ifdef TCP_RFC7413 if (tp->t_flags & TF_FASTOPEN) { /* * When a TFO connection is in SYN_RECEIVED, the only valid * packets are the initial SYN, a retransmit/copy of the * initial SYN (possibly with a subset of the original * data), a valid ACK, a FIN, or a RST. */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RXT) || (rack->r_ctl.rc_hpts_flags & PACE_TMR_TLP) || (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK)) { rack_do_drop(m, NULL, ti_locked); return (0); } } else if (!(thflags & (TH_ACK | TH_FIN | TH_RST))) { rack_do_drop(m, NULL, ti_locked); return (0); } } #endif if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } /* * In the SYN-RECEIVED state, validate that the packet belongs to * this connection before trimming the data to fit the receive * window. Check the sequence number versus IRS since we know the * sequence numbers haven't wrapped. This is a partial fix for the * "LAND" DoS attack. */ if (SEQ_LT(th->th_seq, tp->irs)) { rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { #ifdef TCP_RFC7413 if (tp->t_flags & TF_FASTOPEN) { tp->snd_wnd = tiwin; cc_conn_init(tp); } #endif return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } TCPSTAT_INC(tcps_connects); soisconnected(so); /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_wnd = tiwin; } /* * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* -> * FIN-WAIT-1 */ tp->t_starttime = ticks; if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; } else { tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(accept__established, NULL, tp, mtod(m, const char *), tp, th); #ifdef TCP_RFC7413 if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; /* * Account for the ACK of our SYN prior to regular * ACK processing below. */ tp->snd_una++; } /* * TFO connections call cc_conn_init() during SYN * processing. Calling it again here for such connections * is not harmless as it would undo the snd_cwnd reduction * that occurs when a TFO SYN|ACK is retransmitted. */ if (!(tp->t_flags & TF_FASTOPEN)) #endif cc_conn_init(tp); } /* * If segment contains data or ACK, will call tcp_reass() later; if * not, do so now to pass queued data to user. */ if (tlen == 0 && (thflags & TH_FIN) == 0) (void)tcp_reass(tp, (struct tcphdr *)0, 0, (struct mbuf *)0); tp->snd_wl1 = th->th_seq - 1; if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (tp->t_state == TCPS_FIN_WAIT_1) { /* We could have went to FIN_WAIT_1 (or EST) above */ /* * In FIN_WAIT_1 STATE in addition to the processing for the * ESTABLISHED state if our FIN is now acknowledged then * enter FIN_WAIT_2. */ if (ourfinisacked) { /* * If we can't receive any more data, then closing * user can proceed. Starting the timer is contrary * to the specification, but if we don't get a FIN * we'll hang forever. * * XXXjl: we should release the tp also, and use a * compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); tcp_timer_activate(tp, TT_2MSL, (tcp_fast_finwait2_recycle ? tcp_finwait2_timeout : TP_MAXIDLE(tp))); } tcp_state_change(tp, TCPS_FIN_WAIT_2); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; /* * Header prediction: check for the two common cases of a * uni-directional data xfer. If the packet has no control flags, * is in-sequence, the window didn't change and we're not * retransmitting, it's a candidate. If the length is zero and the * ack moved forward, we're the sender side of the xfer. Just free * the data acked & wake any higher level process that was blocked * waiting for space. If the length is non-zero and the ack didn't * move, we're the receiver side. If we're getting packets in-order * (the reassembly queue is empty), add the data toc The socket * buffer and note that we need a delayed ack. Make sure that the * hidden state-flags are also off. Since we check for * TCPS_ESTABLISHED first, it can only be TH_NEEDSYN. */ if (__predict_true(((to->to_flags & TOF_SACK) == 0)) && __predict_true((thflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK) && __predict_true(LIST_EMPTY(&tp->t_segq)) && __predict_true(th->th_seq == tp->rcv_nxt)) { struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; if (tlen == 0) { if (rack_fastack(m, th, so, tp, to, drop_hdrlen, tlen, ti_locked, tiwin, nxt_pkt, rack->r_ctl.rc_rcvtime)) { return (0); } } else { if (rack_do_fastnewdata(m, th, so, tp, to, drop_hdrlen, tlen, ti_locked, tiwin, nxt_pkt)) { return (0); } } } rack_calc_rwin(so, tp); if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); return (ret_val); } else { rack_do_drop(m, NULL, ti_locked); return (0); } } /* * Ack processing. */ if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, NULL, thflags, &ret_val)) { return (ret_val); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } } /* State changes only happen in rack_process_data() */ return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; rack_calc_rwin(so, tp); if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); return (ret_val); } else { rack_do_drop(m, NULL, ti_locked); return (0); } } /* * Ack processing. */ if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, NULL, thflags, &ret_val)) { return (ret_val); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } static int rack_check_data_after_close(struct mbuf *m, struct tcpcb *tp, int32_t *ti_locked, int32_t *tlen, struct tcphdr *th, struct socket *so) { struct tcp_rack *rack; KASSERT(*ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " "CLOSE_WAIT && tlen ti_locked %d", __func__, *ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->rc_allow_data_af_clo == 0) { close_now: tp = tcp_close(tp); TCPSTAT_INC(tcps_rcvafterclose); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_UNLIMITED, (*tlen)); return (1); } if (sbavail(&so->so_snd) == 0) goto close_now; /* Ok we allow data that is ignored and a followup reset */ tp->rcv_nxt = th->th_seq + *tlen; tp->t_flags2 |= TF2_DROP_AF_DATA; rack->r_wanted_output = 1; *tlen = 0; return (0); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; rack_calc_rwin(so, tp); if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If new data are received on a connection after the user processes * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) return (1); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); return (ret_val); } else { rack_do_drop(m, NULL, ti_locked); return (0); } } /* * Ack processing. */ if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (ourfinisacked) { /* * If we can't receive any more data, then closing user can * proceed. Starting the timer is contrary to the * specification, but if we don't get a FIN we'll hang * forever. * * XXXjl: we should release the tp also, and use a * compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { soisdisconnected(so); tcp_timer_activate(tp, TT_2MSL, (tcp_fast_finwait2_recycle ? tcp_finwait2_timeout : TP_MAXIDLE(tp))); } tcp_state_change(tp, TCPS_FIN_WAIT_2); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; rack_calc_rwin(so, tp); if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If new data are received on a connection after the user processes * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) return (1); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); return (ret_val); } else { rack_do_drop(m, NULL, ti_locked); return (0); } } /* * Ack processing. */ if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tcp_twstart(tp); INP_INFO_RUNLOCK(&V_tcbinfo); *ti_locked = TI_UNLOCKED; m_freem(m); return (1); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; rack_calc_rwin(so, tp); if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If new data are received on a connection after the user processes * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) return (1); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); return (ret_val); } else { rack_do_drop(m, NULL, ti_locked); return (0); } } /* * case TCPS_LAST_ACK: Ack processing. */ if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (ourfinisacked) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_close(tp); rack_do_drop(m, tp, ti_locked); return (1); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } /* * Return value of 1, the TCB is unlocked and most * likely gone, return value of 0, the TCP is still * locked. */ static int rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) { int32_t ret_val = 0; int32_t ourfinisacked = 0; rack_calc_rwin(so, tp); /* Reset receive buffer auto scaling when not in bulk receive mode. */ if (thflags & TH_RST) return (rack_process_rst(m, th, so, tp, ti_locked)); /* * RFC5961 Section 4.2 Send challenge ACK for any SYN in * synchronized state. */ if (thflags & TH_SYN) { rack_challenge_ack(m, th, tp, ti_locked, &ret_val); return (ret_val); } /* * RFC 1323 PAWS: If we have a timestamp reply on this segment and * it's less than ts_recent, drop it. */ if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to->to_tsval, tp->ts_recent)) { if (rack_ts_check(m, th, tp, ti_locked, tlen, thflags, &ret_val)) return (ret_val); } if (rack_drop_checks(to, m, th, tp, &tlen, ti_locked, &thflags, &drop_hdrlen, &ret_val)) { return (ret_val); } /* * If new data are received on a connection after the user processes * are gone, then RST the other end. */ if ((so->so_state & SS_NOFDREF) && tlen) { if (rack_check_data_after_close(m, tp, ti_locked, &tlen, th, so)) return (1); } /* * If last ACK falls within this segment's sequence numbers, record * its timestamp. NOTE: 1) That the test incorporates suggestions * from the latest proposal of the tcplw@cray.com list (Braden * 1993/04/26). 2) That updating only on newer timestamps interferes * with our earlier PAWS tests, so this check should be solely * predicated on the sequence space of this segment. 3) That we * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ * + SEG.Len instead of RFC1323's Last.ACK.Sent < SEG.SEQ + * SEG.Len, This modified check allows us to overcome RFC1323's * limitations as described in Stevens TCP/IP Illustrated Vol. 2 * p.869. In such cases, we can still calculate the RTT correctly * when RCV.NXT == Last.ACK.Sent. */ if ((to->to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN | TH_FIN)) != 0))) { tp->ts_recent_age = tcp_ts_getticks(); tp->ts_recent = to->to_tsval; } /* * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN flag * is on (half-synchronized state), then queue data for later * processing; else drop segment and return. */ if ((thflags & TH_ACK) == 0) { if (tp->t_flags & TF_NEEDSYN) { return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } else if (tp->t_flags & TF_ACKNOW) { rack_do_dropafterack(m, tp, th, ti_locked, thflags, tlen, &ret_val); return (ret_val); } else { rack_do_drop(m, NULL, ti_locked); return (0); } } /* * Ack processing. */ if (rack_process_ack(m, th, so, tp, to, ti_locked, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) { return (ret_val); } if (sbavail(&so->so_snd)) { if (rack_progress_timeout_check(tp)) { tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); rack_do_dropwithreset(m, tp, th, ti_locked, BANDLIM_RST_OPENPORT, tlen); return (1); } } return (rack_process_data(m, th, so, tp, drop_hdrlen, tlen, ti_locked, tiwin, thflags, nxt_pkt)); } static void inline rack_clear_rate_sample(struct tcp_rack *rack) { rack->r_ctl.rack_rs.rs_flags = RACK_RTT_EMPTY; rack->r_ctl.rack_rs.rs_rtt_cnt = 0; rack->r_ctl.rack_rs.rs_rtt_tot = 0; } static int rack_init(struct tcpcb *tp) { struct tcp_rack *rack = NULL; tp->t_fb_ptr = uma_zalloc(rack_pcb_zone, M_NOWAIT); if (tp->t_fb_ptr == NULL) { /* * We need to allocate memory but cant. The INP and INP_INFO * locks and they are recusive (happens during setup. So a * scheme to drop the locks fails :( * */ return (ENOMEM); } memset(tp->t_fb_ptr, 0, sizeof(struct tcp_rack)); rack = (struct tcp_rack *)tp->t_fb_ptr; TAILQ_INIT(&rack->r_ctl.rc_map); TAILQ_INIT(&rack->r_ctl.rc_free); TAILQ_INIT(&rack->r_ctl.rc_tmap); rack->rc_tp = tp; if (tp->t_inpcb) { rack->rc_inp = tp->t_inpcb; } /* Probably not needed but lets be sure */ rack_clear_rate_sample(rack); rack->r_cpu = 0; rack->r_ctl.rc_reorder_fade = rack_reorder_fade; rack->rc_allow_data_af_clo = rack_ignore_data_after_close; rack->r_ctl.rc_tlp_threshold = rack_tlp_thresh; rack->rc_pace_reduce = rack_slot_reduction; if (V_tcp_delack_enabled) tp->t_delayed_ack = 1; else tp->t_delayed_ack = 0; rack->rc_pace_max_segs = rack_hptsi_segments; rack->r_ctl.rc_early_recovery_segs = rack_early_recovery_max_seg; rack->r_ctl.rc_reorder_shift = rack_reorder_thresh; rack->r_ctl.rc_pkt_delay = rack_pkt_delay; rack->r_ctl.rc_prop_reduce = rack_use_proportional_reduce; rack->r_idle_reduce_largest = rack_reduce_largest_on_idle; rack->r_enforce_min_pace = rack_min_pace_time; rack->r_min_pace_seg_thresh = rack_min_pace_time_seg_req; rack->r_ctl.rc_prop_rate = rack_proportional_rate; rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp; rack->r_ctl.rc_early_recovery = rack_early_recovery; rack->rc_always_pace = rack_pace_every_seg; rack->r_ctl.rc_rate_sample_method = rack_rate_sample_method; rack->rack_tlp_threshold_use = rack_tlp_threshold_use; rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr; rack->r_ctl.rc_min_to = rack_min_to; rack->r_ctl.rc_prr_inc_var = rack_inc_var; rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0); if (tp->snd_una != tp->snd_max) { /* Create a send map for the current outstanding data */ struct rack_sendmap *rsm; rsm = rack_alloc(rack); if (rsm == NULL) { uma_zfree(rack_pcb_zone, tp->t_fb_ptr); tp->t_fb_ptr = NULL; return (ENOMEM); } rsm->r_flags = RACK_OVERMAX; rsm->r_tim_lastsent[0] = tcp_ts_getticks(); rsm->r_rtr_cnt = 1; rsm->r_rtr_bytes = 0; rsm->r_start = tp->snd_una; rsm->r_end = tp->snd_max; rsm->r_sndcnt = 0; TAILQ_INSERT_TAIL(&rack->r_ctl.rc_map, rsm, r_next); TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext); rsm->r_in_tmap = 1; } return (0); } static int rack_handoff_ok(struct tcpcb *tp) { if ((tp->t_state == TCPS_CLOSED) || (tp->t_state == TCPS_LISTEN)) { /* Sure no problem though it may not stick */ return (0); } if ((tp->t_state == TCPS_SYN_SENT) || (tp->t_state == TCPS_SYN_RECEIVED)) { /* * We really don't know you have to get to ESTAB or beyond * to tell. */ return (EAGAIN); } if (tp->t_flags & TF_SACK_PERMIT) { return (0); } /* * If we reach here we don't do SACK on this connection so we can * never do rack. */ return (EINVAL); } static void rack_fini(struct tcpcb *tp, int32_t tcb_is_purged) { if (tp->t_fb_ptr) { struct tcp_rack *rack; struct rack_sendmap *rsm; rack = (struct tcp_rack *)tp->t_fb_ptr; #ifdef TCP_BLACKBOX tcp_log_flowend(tp); #endif rsm = TAILQ_FIRST(&rack->r_ctl.rc_map); while (rsm) { TAILQ_REMOVE(&rack->r_ctl.rc_map, rsm, r_next); uma_zfree(rack_zone, rsm); rsm = TAILQ_FIRST(&rack->r_ctl.rc_map); } rsm = TAILQ_FIRST(&rack->r_ctl.rc_free); while (rsm) { TAILQ_REMOVE(&rack->r_ctl.rc_free, rsm, r_next); uma_zfree(rack_zone, rsm); rsm = TAILQ_FIRST(&rack->r_ctl.rc_free); } rack->rc_free_cnt = 0; uma_zfree(rack_pcb_zone, tp->t_fb_ptr); tp->t_fb_ptr = NULL; } } static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack) { switch (tp->t_state) { case TCPS_SYN_SENT: rack->r_state = TCPS_SYN_SENT; rack->r_substate = rack_do_syn_sent; break; case TCPS_SYN_RECEIVED: rack->r_state = TCPS_SYN_RECEIVED; rack->r_substate = rack_do_syn_recv; break; case TCPS_ESTABLISHED: rack->r_state = TCPS_ESTABLISHED; rack->r_substate = rack_do_established; break; case TCPS_CLOSE_WAIT: rack->r_state = TCPS_CLOSE_WAIT; rack->r_substate = rack_do_close_wait; break; case TCPS_FIN_WAIT_1: rack->r_state = TCPS_FIN_WAIT_1; rack->r_substate = rack_do_fin_wait_1; break; case TCPS_CLOSING: rack->r_state = TCPS_CLOSING; rack->r_substate = rack_do_closing; break; case TCPS_LAST_ACK: rack->r_state = TCPS_LAST_ACK; rack->r_substate = rack_do_lastack; break; case TCPS_FIN_WAIT_2: rack->r_state = TCPS_FIN_WAIT_2; rack->r_substate = rack_do_fin_wait_2; break; case TCPS_LISTEN: case TCPS_CLOSED: case TCPS_TIME_WAIT: default: #ifdef INVARIANTS panic("tcp tp:%p state:%d sees impossible state?", tp, tp->t_state); #endif break; }; } static void rack_timer_audit(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb) { /* * We received an ack, and then did not * call send or were bounced out due to the * hpts was running. Now a timer is up as well, is * it the right timer? */ struct rack_sendmap *rsm; int tmr_up; tmr_up = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK; if (rack->rc_in_persist && (tmr_up == PACE_TMR_PERSIT)) return; rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); if (((rsm == NULL) || (tp->t_state < TCPS_ESTABLISHED)) && (tmr_up == PACE_TMR_RXT)) { /* Should be an RXT */ return; } if (rsm == NULL) { /* Nothing outstanding? */ if (tp->t_flags & TF_DELACK) { if (tmr_up == PACE_TMR_DELACK) /* We are supposed to have delayed ack up and we do */ return; } else if (sbavail(&tp->t_inpcb->inp_socket->so_snd) && (tmr_up == PACE_TMR_RXT)) { /* * if we hit enobufs then we would expect the possiblity * of nothing outstanding and the RXT up (and the hptsi timer). */ return; } else if (((tcp_always_keepalive || rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) && (tp->t_state <= TCPS_CLOSING)) && (tmr_up == PACE_TMR_KEEP) && (tp->snd_max == tp->snd_una)) { /* We should have keep alive up and we do */ return; } } if (rsm && (rsm->r_flags & RACK_SACK_PASSED)) { if ((tp->t_flags & TF_SENTFIN) && ((tp->snd_max - tp->snd_una) == 1) && (rsm->r_flags & RACK_HAS_FIN)) { /* needs to be a RXT */ if (tmr_up == PACE_TMR_RXT) return; } else if (tmr_up == PACE_TMR_RACK) return; } else if (SEQ_GT(tp->snd_max,tp->snd_una) && ((tmr_up == PACE_TMR_TLP) || (tmr_up == PACE_TMR_RXT))) { /* * Either a TLP or RXT is fine if no sack-passed * is in place and data is outstanding. */ return; } else if (tmr_up == PACE_TMR_DELACK) { /* * If the delayed ack was going to go off * before the rtx/tlp/rack timer were going to * expire, then that would be the timer in control. * Note we don't check the time here trusting the * code is correct. */ return; } /* * Ok the timer originally started is not what we want now. * We will force the hpts to be stopped if any, and restart * with the slot set to what was in the saved slot. */ rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0); } static void rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, int32_t ti_locked, int32_t nxt_pkt, struct timeval *tv) { int32_t thflags, retval, did_out = 0; int32_t way_out = 0; uint32_t cts; uint32_t tiwin; struct tcpopt to; struct tcp_rack *rack; struct rack_sendmap *rsm; int32_t prev_state = 0; cts = tcp_tv_to_mssectick(tv); rack = (struct tcp_rack *)tp->t_fb_ptr; kern_prefetch(rack, &prev_state); prev_state = 0; thflags = th->th_flags; /* * If this is either a state-changing packet or current state isn't * established, we require a read lock on tcbinfo. Otherwise, we * allow the tcbinfo to be in either locked or unlocked, as the * caller may have unnecessarily acquired a lock due to a race. */ if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || tp->t_state != TCPS_ESTABLISHED) { KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " "SYN/FIN/RST/!EST", __func__, ti_locked)); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS if (ti_locked == TI_RLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " "ti_locked: %d", __func__, ti_locked)); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); } #endif } INP_WLOCK_ASSERT(tp->t_inpcb); KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", __func__)); KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", __func__)); { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0, tlen, &log, true); } /* * Segment received on connection. Reset idle time and keep-alive * timer. XXX: This should be done after segment validation to * ignore broken/spoofed segs. */ if (tp->t_idle_reduce && (tp->snd_max == tp->snd_una)) { #ifdef NETFLIX_CWV if ((tp->cwv_enabled) && ((tp->cwv_cwnd_valid == 0) && TCPS_HAVEESTABLISHED(tp->t_state) && (tp->snd_cwnd > tp->snd_cwv.init_cwnd))) { tcp_newcwv_nvp_closedown(tp); } else #endif if ((ticks - tp->t_rcvtime) >= tp->t_rxtcur) { counter_u64_add(rack_input_idle_reduces, 1); rack_cc_after_idle(tp, (rack->r_idle_reduce_largest ? 1 :0)); } } rack->r_ctl.rc_rcvtime = cts; tp->t_rcvtime = ticks; #ifdef NETFLIX_CWV if (tp->cwv_enabled) { if ((tp->cwv_cwnd_valid == 0) && TCPS_HAVEESTABLISHED(tp->t_state) && (tp->snd_cwnd > tp->snd_cwv.init_cwnd)) tcp_newcwv_nvp_closedown(tp); } #endif /* * Unscale the window into a 32-bit value. For the SYN_SENT state * the scale is zero. */ tiwin = th->th_win << tp->snd_scale; #ifdef NETFLIX_STATS stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin); #endif /* * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move * this to occur after we've validated the segment. */ if (tp->t_flags & TF_ECN_PERMIT) { if (thflags & TH_CWR) tp->t_flags &= ~TF_ECN_SND_ECE; switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: tp->t_flags |= TF_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: TCPSTAT_INC(tcps_ecn_ect0); break; case IPTOS_ECN_ECT1: TCPSTAT_INC(tcps_ecn_ect1); break; } /* Congestion experienced. */ if (thflags & TH_ECE) { rack_cong_signal(tp, th, CC_ECN); } } /* * Parse options on any incoming segment. */ tcp_dooptions(&to, (u_char *)(th + 1), (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); /* * If echoed timestamp is later than the current time, fall back to * non RFC1323 RTT calculation. Normalize timestamp if syncookies * were used when this connection was established. */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; if (TSTMP_GT(to.to_tsecr, cts)) to.to_tsecr = 0; } /* * If its the first time in we need to take care of options and * verify we can do SACK for rack! */ if (rack->r_state == 0) { /* Should be init'd by rack_init() */ KASSERT(rack->rc_inp != NULL, ("%s: rack->rc_inp unexpectedly NULL", __func__)); if (rack->rc_inp == NULL) { rack->rc_inp = tp->t_inpcb; } /* * Process options only when we get SYN/ACK back. The SYN * case for incoming connections is handled in tcp_syncache. * According to RFC1323 the window field in a SYN (i.e., a * or ) segment itself is never scaled. XXX * this is traditional behavior, may need to be cleaned up. */ rack->r_cpu = inp_to_cpuid(tp->t_inpcb); if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; } /* * Initial send window. It will be updated with the * next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = cts; } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; } /* * At this point we are at the initial call. Here we decide * if we are doing RACK or not. We do this by seeing if * TF_SACK_PERMIT is set, if not rack is *not* possible and * we switch to the default code. */ if ((tp->t_flags & TF_SACK_PERMIT) == 0) { tcp_switch_back_to_default(tp); (*tp->t_fb->tfb_tcp_do_segment) (m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); return; } /* Set the flag */ rack->r_is_v6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; tcp_set_hpts(tp->t_inpcb); rack_stop_all_timers(tp); sack_filter_clear(&rack->r_ctl.rack_sf, th->th_ack); } /* * This is the one exception case where we set the rack state * always. All other times (timers etc) we must have a rack-state * set (so we assure we have done the checks above for SACK). */ if (rack->r_state != tp->t_state) rack_set_state(tp, rack); if (SEQ_GT(th->th_ack, tp->snd_una) && (rsm = TAILQ_FIRST(&rack->r_ctl.rc_map)) != NULL) kern_prefetch(rsm, &prev_state); prev_state = rack->r_state; rack->r_ctl.rc_tlp_send_cnt = 0; rack_clear_rate_sample(rack); retval = (*rack->r_substate) (m, th, so, tp, &to, drop_hdrlen, tlen, &ti_locked, tiwin, thflags, nxt_pkt); #ifdef INVARIANTS if ((retval == 0) && (tp->t_inpcb == NULL)) { panic("retval:%d tp:%p t_inpcb:NULL state:%d", retval, tp, prev_state); } #endif if (ti_locked != TI_UNLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_INFO_RUNLOCK(&V_tcbinfo); ti_locked = TI_UNLOCKED; } if (retval == 0) { /* * If retval is 1 the tcb is unlocked and most likely the tp * is gone. */ INP_WLOCK_ASSERT(tp->t_inpcb); tcp_rack_xmit_timer_commit(rack, tp); if (((tp->snd_max - tp->snd_una) > tp->snd_wnd) && (rack->rc_in_persist == 0)){ /* * The peer shrunk its window on us to the point * where we have sent too much. The only thing * we can do here is stop any timers and * enter persist. We most likely lost the last * bytes we sent but oh well, we will have to * retransmit them after the peer is caught up. */ if (rack->rc_inp->inp_in_hpts) tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT); rack_timer_cancel(tp, rack, cts, __LINE__); rack_enter_persist(tp, rack, cts); rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0); way_out = 3; goto done_with_input; } if (nxt_pkt == 0) { if (rack->r_wanted_output != 0) { did_out = 1; (void)tp->t_fb->tfb_tcp_output(tp); } rack_start_hpts_timer(rack, tp, cts, __LINE__, 0, 0, 0); } if (((rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) == 0) && (SEQ_GT(tp->snd_max, tp->snd_una) || (tp->t_flags & TF_DELACK) || ((tcp_always_keepalive || rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) && (tp->t_state <= TCPS_CLOSING)))) { /* We could not send (probably in the hpts but stopped the timer earlier)? */ if ((tp->snd_max == tp->snd_una) && ((tp->t_flags & TF_DELACK) == 0) && (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) { /* keep alive not needed if we are hptsi output yet */ ; } else { if (rack->rc_inp->inp_in_hpts) tcp_hpts_remove(rack->rc_inp, HPTS_REMOVE_OUTPUT); rack_start_hpts_timer(rack, tp, tcp_ts_getticks(), __LINE__, 0, 0, 0); } way_out = 1; } else { /* Do we have the correct timer running? */ rack_timer_audit(tp, rack, &so->so_snd); way_out = 2; } done_with_input: rack_log_doseg_done(rack, cts, nxt_pkt, did_out, way_out); if (did_out) rack->r_wanted_output = 0; #ifdef INVARIANTS if (tp->t_inpcb == NULL) { panic("OP:%d retval:%d tp:%p t_inpcb:NULL state:%d", did_out, retval, tp, prev_state); } #endif INP_WUNLOCK(tp->t_inpcb); } } void rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, int32_t ti_locked) { struct timeval tv; #ifdef RSS struct tcp_function_block *tfb; struct tcp_rack *rack; struct inpcb *inp; rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->r_state == 0) { /* * Initial input (ACK to SYN-ACK etc)lets go ahead and get * it processed */ if (ti_locked != TI_RLOCKED && INP_INFO_TRY_RLOCK(&V_tcbinfo)) ti_locked = TI_RLOCKED; if (ti_locked != TI_RLOCKED) { inp = tp->t_inpcb; tfb = tp->t_fb; in_pcbref(inp); INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); ti_locked = TI_RLOCKED; INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) inp = NULL; if (inp == NULL || (inp->inp_flags2 & INP_FREED) || (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) { /* The TCPCB went away. Free the packet. */ INP_INFO_RUNLOCK(&V_tcbinfo); if (inp) INP_WUNLOCK(inp); m_freem(m); return; } /* If the stack changed, call the correct stack. */ if (tp->t_fb != tfb) { tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); return; } } tcp_get_usecs(&tv); rack_hpts_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked, 0, &tv); return; } if (ti_locked == TI_RLOCKED) INP_INFO_RUNLOCK(&V_tcbinfo); tcp_queue_to_input(tp, m, th, tlen, drop_hdrlen, iptos, (uint8_t) ti_locked); INP_WUNLOCK(tp->t_inpcb); #else tcp_get_usecs(&tv); rack_hpts_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked, 0, &tv); #endif } struct rack_sendmap * tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused) { struct rack_sendmap *rsm = NULL; int32_t idx; uint32_t srtt_cur, srtt = 0, thresh = 0, ts_low = 0; /* Return the next guy to be re-transmitted */ if (TAILQ_EMPTY(&rack->r_ctl.rc_map)) { return (NULL); } if (tp->t_flags & TF_SENTFIN) { /* retran the end FIN? */ return (NULL); } /* ok lets look at this one */ rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap); if (rsm && ((rsm->r_flags & RACK_ACKED) == 0)) { goto check_it; } rsm = rack_find_lowest_rsm(rack); if (rsm == NULL) { return (NULL); } check_it: srtt_cur = tp->t_srtt >> TCP_RTT_SHIFT; srtt = TICKS_2_MSEC(srtt_cur); if (rack->rc_rack_rtt && (srtt > rack->rc_rack_rtt)) srtt = rack->rc_rack_rtt; if (rsm->r_flags & RACK_ACKED) { return (NULL); } if ((rsm->r_flags & RACK_SACK_PASSED) == 0) { /* Its not yet ready */ return (NULL); } idx = rsm->r_rtr_cnt - 1; ts_low = rsm->r_tim_lastsent[idx]; thresh = rack_calc_thresh_rack(rack, srtt, tsused); if (tsused <= ts_low) { return (NULL); } if ((tsused - ts_low) >= thresh) { return (rsm); } return (NULL); } static int rack_output(struct tcpcb *tp) { struct socket *so; uint32_t recwin, sendwin; uint32_t sb_offset; int32_t len, flags, error = 0; struct mbuf *m; struct mbuf *mb; uint32_t if_hw_tsomaxsegcount = 0; uint32_t if_hw_tsomaxsegsize; long tot_len_this_send = 0; struct ip *ip = NULL; #ifdef TCPDEBUG struct ipovly *ipov = NULL; #endif struct udphdr *udp = NULL; struct tcp_rack *rack; struct tcphdr *th; uint8_t pass = 0; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen, ulen=0; uint32_t rack_seq; #if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; #endif int32_t idle, sendalot; int32_t sub_from_prr = 0; volatile int32_t sack_rxmit; struct rack_sendmap *rsm = NULL; int32_t tso, mtu, would_have_fin = 0; struct tcpopt to; int32_t slot = 0; uint32_t cts; uint8_t hpts_calling, doing_tlp = 0; int32_t do_a_prefetch; int32_t prefetch_rsm = 0; int32_t prefetch_so_done = 0; struct tcp_log_buffer *lgb = NULL; struct inpcb *inp; struct sockbuf *sb; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int32_t isipv6; #endif /* setup and take the cache hits here */ rack = (struct tcp_rack *)tp->t_fb_ptr; inp = rack->rc_inp; so = inp->inp_socket; sb = &so->so_snd; kern_prefetch(sb, &do_a_prefetch); do_a_prefetch = 1; INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) return (tcp_offload_output(tp)); #endif #ifdef TCP_RFC7413 /* * For TFO connections in SYN_RECEIVED, only allow the initial * SYN|ACK and those sent by the retransmit timer. */ if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED) && SEQ_GT(tp->snd_max, tp->snd_una) && /* inital SYN|ACK sent */ (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ return (0); #endif #ifdef INET6 if (rack->r_state) { /* Use the cache line loaded if possible */ isipv6 = rack->r_is_v6; } else { isipv6 = (inp->inp_vflag & INP_IPV6) != 0; } #endif cts = tcp_ts_getticks(); if (((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) && inp->inp_in_hpts) { /* * We are on the hpts for some timer but not hptsi output. * Remove from the hpts unconditionally. */ rack_timer_cancel(tp, rack, cts, __LINE__); } /* Mark that we have called rack_output(). */ if ((rack->r_timer_override) || (tp->t_flags & TF_FORCEDATA) || (tp->t_state < TCPS_ESTABLISHED)) { if (tp->t_inpcb->inp_in_hpts) tcp_hpts_remove(tp->t_inpcb, HPTS_REMOVE_OUTPUT); } else if (tp->t_inpcb->inp_in_hpts) { /* * On the hpts you can't pass even if ACKNOW is on, we will * when the hpts fires. */ counter_u64_add(rack_out_size[TCP_MSS_ACCT_INPACE], 1); return (0); } hpts_calling = inp->inp_hpts_calls; inp->inp_hpts_calls = 0; if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) { if (rack_process_timers(tp, rack, cts, hpts_calling)) { counter_u64_add(rack_out_size[TCP_MSS_ACCT_ATIMER], 1); return (0); } } rack->r_wanted_output = 0; rack->r_timer_override = 0; /* * Determine length of data that should be transmitted, and flags * that will be used. If there is some data or critical controls * (SYN, RST) to send, then transmit; otherwise, investigate * further. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); #ifdef NETFLIX_CWV if (tp->cwv_enabled) { if ((tp->cwv_cwnd_valid == 0) && TCPS_HAVEESTABLISHED(tp->t_state) && (tp->snd_cwnd > tp->snd_cwv.init_cwnd)) tcp_newcwv_nvp_closedown(tp); } else #endif if (tp->t_idle_reduce) { if (idle && ((ticks - tp->t_rcvtime) >= tp->t_rxtcur)) rack_cc_after_idle(tp, (rack->r_idle_reduce_largest ? 1 :0)); } tp->t_flags &= ~TF_LASTIDLE; if (idle) { if (tp->t_flags & TF_MORETOCOME) { tp->t_flags |= TF_LASTIDLE; idle = 0; } } again: /* * If we've recently taken a timeout, snd_max will be greater than * snd_nxt. There may be SACK information that allows us to avoid * resending already delivered data. Adjust snd_nxt accordingly. */ sendalot = 0; cts = tcp_ts_getticks(); tso = 0; mtu = 0; sb_offset = tp->snd_max - tp->snd_una; sendwin = min(tp->snd_wnd, tp->snd_cwnd); flags = tcp_outflags[tp->t_state]; /* * Send any SACK-generated retransmissions. If we're explicitly * trying to send out new data (when sendalot is 1), bypass this * function. If we retransmit in fast recovery mode, decrement * snd_cwnd, since we're replacing a (future) new transmission with * a retransmission now, and we previously incremented snd_cwnd in * tcp_input(). */ /* * Still in sack recovery , reset rxmit flag to zero. */ while (rack->rc_free_cnt < rack_free_cache) { rsm = rack_alloc(rack); if (rsm == NULL) { if (inp->inp_hpts_calls) /* Retry in a ms */ slot = 1; goto just_return_nolock; } TAILQ_INSERT_TAIL(&rack->r_ctl.rc_free, rsm, r_next); rack->rc_free_cnt++; rsm = NULL; } if (inp->inp_hpts_calls) inp->inp_hpts_calls = 0; sack_rxmit = 0; len = 0; rsm = NULL; if (flags & TH_RST) { SOCKBUF_LOCK(sb); goto send; } if (rack->r_ctl.rc_tlpsend) { /* Tail loss probe */ long cwin; long tlen; doing_tlp = 1; rsm = rack->r_ctl.rc_tlpsend; rack->r_ctl.rc_tlpsend = NULL; sack_rxmit = 1; tlen = rsm->r_end - rsm->r_start; if (tlen > tp->t_maxseg) tlen = tp->t_maxseg; #ifdef INVARIANTS if (SEQ_GT(tp->snd_una, rsm->r_start)) { panic("tp:%p rack:%p snd_una:%u rsm:%p r_start:%u", tp, rack, tp->snd_una, rsm, rsm->r_start); } #endif sb_offset = rsm->r_start - tp->snd_una; cwin = min(tp->snd_wnd, tlen); len = cwin; } else if (rack->r_ctl.rc_resend) { /* Retransmit timer */ rsm = rack->r_ctl.rc_resend; rack->r_ctl.rc_resend = NULL; len = rsm->r_end - rsm->r_start; sack_rxmit = 1; sendalot = 0; sb_offset = rsm->r_start - tp->snd_una; if (len >= tp->t_maxseg) { len = tp->t_maxseg; } KASSERT(sb_offset >= 0, ("%s: sack block to the left of una : %d", __func__, sb_offset)); } else if ((rack->rc_in_persist == 0) && ((rsm = tcp_rack_output(tp, rack, cts)) != NULL)) { long tlen; if ((!IN_RECOVERY(tp->t_flags)) && ((tp->t_flags & (TF_WASFRECOVERY | TF_WASCRECOVERY)) == 0)) { /* Enter recovery if not induced by a time-out */ rack->r_ctl.rc_rsm_start = rsm->r_start; rack->r_ctl.rc_cwnd_at = tp->snd_cwnd; rack->r_ctl.rc_ssthresh_at = tp->snd_ssthresh; rack_cong_signal(tp, NULL, CC_NDUPACK); /* * When we enter recovery we need to assure we send * one packet. */ rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; } #ifdef INVARIANTS if (SEQ_LT(rsm->r_start, tp->snd_una)) { panic("Huh, tp:%p rack:%p rsm:%p start:%u < snd_una:%u\n", tp, rack, rsm, rsm->r_start, tp->snd_una); } #endif tlen = rsm->r_end - rsm->r_start; sb_offset = rsm->r_start - tp->snd_una; if (tlen > rack->r_ctl.rc_prr_sndcnt) { len = rack->r_ctl.rc_prr_sndcnt; } else { len = tlen; } if (len >= tp->t_maxseg) { sendalot = 1; len = tp->t_maxseg; } else { sendalot = 0; if ((rack->rc_timer_up == 0) && (len < tlen)) { /* * If its not a timer don't send a partial * segment. */ len = 0; goto just_return_nolock; } } KASSERT(sb_offset >= 0, ("%s: sack block to the left of una : %d", __func__, sb_offset)); if (len > 0) { sub_from_prr = 1; sack_rxmit = 1; TCPSTAT_INC(tcps_sack_rexmits); TCPSTAT_ADD(tcps_sack_rexmit_bytes, min(len, tp->t_maxseg)); counter_u64_add(rack_rtm_prr_retran, 1); } } if (rsm && (rsm->r_flags & RACK_HAS_FIN)) { /* we are retransmitting the fin */ len--; if (len) { /* * When retransmitting data do *not* include the * FIN. This could happen from a TLP probe. */ flags &= ~TH_FIN; } } #ifdef INVARIANTS /* For debugging */ rack->r_ctl.rc_rsm_at_retran = rsm; #endif /* * Get standard flags, and add SYN or FIN if requested by 'hidden' * state flags. */ if (tp->t_flags & TF_NEEDFIN) flags |= TH_FIN; if (tp->t_flags & TF_NEEDSYN) flags |= TH_SYN; if ((sack_rxmit == 0) && (prefetch_rsm == 0)) { void *end_rsm; end_rsm = TAILQ_LAST_FAST(&rack->r_ctl.rc_tmap, rack_sendmap, r_tnext); if (end_rsm) kern_prefetch(end_rsm, &prefetch_rsm); prefetch_rsm = 1; } SOCKBUF_LOCK(sb); /* * If in persist timeout with window of 0, send 1 byte. Otherwise, * if window is small but nonzero and time TF_SENTFIN expired, we * will send what we can and go to transmit state. */ if (tp->t_flags & TF_FORCEDATA) { if (sendwin == 0) { /* * If we still have some data to send, then clear * the FIN bit. Usually this would happen below * when it realizes that we aren't sending all the * data. However, if we have exactly 1 byte of * unsent data, then it won't clear the FIN bit * below, and if we are in persist state, we wind up * sending the packet without recording that we sent * the FIN bit. * * We can't just blindly clear the FIN bit, because * if we don't have any more data to send then the * probe will be the FIN itself. */ if (sb_offset < sbused(sb)) flags &= ~TH_FIN; sendwin = 1; } else { if (rack->rc_in_persist) rack_exit_persist(tp, rack); /* * If we are dropping persist mode then we need to * correct snd_nxt/snd_max and off. */ tp->snd_nxt = tp->snd_max; sb_offset = tp->snd_nxt - tp->snd_una; } } /* * If snd_nxt == snd_max and we have transmitted a FIN, the * sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a * negative length. This can also occur when TCP opens up its * congestion window while receiving additional duplicate acks after * fast-retransmit because TCP will reset snd_nxt to snd_max after * the fast-retransmit. * * In the normal retransmit-FIN-only case, however, snd_nxt will be * set to snd_una, the sb_offset will be 0, and the length may wind * up 0. * * If sack_rxmit is true we are retransmitting from the scoreboard * in which case len is already set. */ if (sack_rxmit == 0) { uint32_t avail; avail = sbavail(sb); if (SEQ_GT(tp->snd_nxt, tp->snd_una)) sb_offset = tp->snd_nxt - tp->snd_una; else sb_offset = 0; if (IN_RECOVERY(tp->t_flags) == 0) { if (rack->r_ctl.rc_tlp_new_data) { /* TLP is forcing out new data */ if (rack->r_ctl.rc_tlp_new_data > (uint32_t) (avail - sb_offset)) { rack->r_ctl.rc_tlp_new_data = (uint32_t) (avail - sb_offset); } if (rack->r_ctl.rc_tlp_new_data > tp->snd_wnd) len = tp->snd_wnd; else len = rack->r_ctl.rc_tlp_new_data; rack->r_ctl.rc_tlp_new_data = 0; doing_tlp = 1; } else { if (sendwin > avail) { /* use the available */ if (avail > sb_offset) { len = (int32_t)(avail - sb_offset); } else { len = 0; } } else { if (sendwin > sb_offset) { len = (int32_t)(sendwin - sb_offset); } else { len = 0; } } } } else { uint32_t outstanding; /* * We are inside of a SACK recovery episode and are * sending new data, having retransmitted all the * data possible so far in the scoreboard. */ outstanding = tp->snd_max - tp->snd_una; if ((rack->r_ctl.rc_prr_sndcnt + outstanding) > tp->snd_wnd) len = 0; else if (avail > sb_offset) len = avail - sb_offset; else len = 0; if (len > 0) { if (len > rack->r_ctl.rc_prr_sndcnt) len = rack->r_ctl.rc_prr_sndcnt; if (len > 0) { sub_from_prr = 1; counter_u64_add(rack_rtm_prr_newdata, 1); } } if (len > tp->t_maxseg) { /* * We should never send more than a MSS when * retransmitting or sending new data in prr * mode unless the override flag is on. Most * likely the PRR algorithm is not going to * let us send a lot as well :-) */ if (rack->r_ctl.rc_prr_sendalot == 0) len = tp->t_maxseg; } else if (len < tp->t_maxseg) { /* * Do we send any? The idea here is if the * send empty's the socket buffer we want to * do it. However if not then lets just wait * for our prr_sndcnt to get bigger. */ long leftinsb; leftinsb = sbavail(sb) - sb_offset; if (leftinsb > len) { /* This send does not empty the sb */ len = 0; } } } } if (prefetch_so_done == 0) { kern_prefetch(so, &prefetch_so_done); prefetch_so_done = 1; } /* * Lop off SYN bit if it has already been sent. However, if this is * SYN-SENT state and if segment contains data and if we don't know * that foreign host supports TAO, suppress sending segment. */ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { if ((tp->t_state != TCPS_SYN_RECEIVED) && (tp->t_state != TCPS_SYN_SENT)) flags &= ~TH_SYN; #ifdef TCP_RFC7413 /* * When sending additional segments following a TFO SYN|ACK, * do not include the SYN bit. */ if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED)) flags &= ~TH_SYN; #endif sb_offset--, len++; if (sbavail(sb) == 0) len = 0; } /* * Be careful not to send data and/or FIN on SYN segments. This * measure is needed to prevent interoperability problems with not * fully conformant TCP implementations. */ if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) { len = 0; flags &= ~TH_FIN; } #ifdef TCP_RFC7413 /* * When retransmitting SYN|ACK on a passively-created TFO socket, * don't include data, as the presence of data may have caused the * original SYN|ACK to have been dropped by a middlebox. */ if ((tp->t_flags & TF_FASTOPEN) && ((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0))) len = 0; #endif if (len <= 0) { /* * If FIN has been sent but not acked, but we haven't been * called to retransmit, len will be < 0. Otherwise, window * shrank after we sent into it. If window shrank to 0, * cancel pending retransmit, pull snd_nxt back to (closed) * window, and set the persist timer if it isn't already * going. If the window didn't close completely, just wait * for an ACK. * * We also do a general check here to ensure that we will * set the persist timer when we have data to send, but a * 0-byte window. This makes sure the persist timer is set * even if the packet hits one of the "goto send" lines * below. */ len = 0; if ((tp->snd_wnd == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) && (sb_offset < (int)sbavail(sb))) { tp->snd_nxt = tp->snd_una; rack_enter_persist(tp, rack, cts); } } /* len will be >= 0 after this point. */ KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); tcp_sndbuf_autoscale(tp, so, sendwin); /* * Decide if we can use TCP Segmentation Offloading (if supported by * hardware). * * TSO may only be used if we are in a pure bulk sending state. The * presence of TCP-MD5, SACK retransmits, SACK advertizements and IP * options prevent using TSO. With TSO the TCP header is the same * (except for the sequence number) for all generated packets. This * makes it impossible to transmit any options which vary per * generated segment or packet. * * IPv4 handling has a clear separation of ip options and ip header * flags while IPv6 combines both in in6p_outputopts. ip6_optlen() does * the right thing below to provide length of just ip options and thus * checking for ipoptlen is enough to decide if ip options are present. */ #ifdef INET6 if (isipv6) ipoptlen = ip6_optlen(tp->t_inpcb); else #endif if (tp->t_inpcb->inp_options) ipoptlen = tp->t_inpcb->inp_options->m_len - offsetof(struct ipoption, ipopt_list); else ipoptlen = 0; #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Pre-calculate here as we save another lookup into the darknesses * of IPsec that way and can actually decide if TSO is ok. */ #ifdef INET6 if (isipv6 && IPSEC_ENABLED(ipv6)) ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->t_inpcb); #ifdef INET else #endif #endif /* INET6 */ #ifdef INET if (IPSEC_ENABLED(ipv4)) ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->t_inpcb); #endif /* INET */ #endif #if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; #endif if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && (tp->t_port == 0) && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && sack_rxmit == 0 && ipoptlen == 0) tso = 1; { uint32_t outstanding; outstanding = tp->snd_max - tp->snd_una; if (tp->t_flags & TF_SENTFIN) { /* * If we sent a fin, snd_max is 1 higher than * snd_una */ outstanding--; } if (outstanding > 0) { /* * This is sub-optimal. We only send a stand alone * FIN on its own segment. */ if (flags & TH_FIN) { flags &= ~TH_FIN; would_have_fin = 1; } } else if (sack_rxmit) { if ((rsm->r_flags & RACK_HAS_FIN) == 0) flags &= ~TH_FIN; } else { if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + sbused(sb))) flags &= ~TH_FIN; } } recwin = sbspace(&so->so_rcv); /* * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: * * - We have a full segment (or more with TSO) - This is the last * buffer in a write()/send() and we are either idle or running * NODELAY - we've timed out (e.g. persist timer) - we have more * then 1/2 the maximum send window's worth of data (receiver may be * limited the window size) - we need to retransmit */ if (len) { if (len >= tp->t_maxseg) { pass = 1; goto send; } /* * NOTE! on localhost connections an 'ack' from the remote * end may occur synchronously with the output and cause us * to flush a buffer queued with moretocome. XXX * */ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ (idle || (tp->t_flags & TF_NODELAY)) && ((uint32_t)len + (uint32_t)sb_offset >= sbavail(&so->so_snd)) && (tp->t_flags & TF_NOPUSH) == 0) { pass = 2; goto send; } if (tp->t_flags & TF_FORCEDATA) { /* typ. timeout case */ pass = 3; goto send; } if ((tp->snd_una == tp->snd_max) && len) { /* Nothing outstanding */ goto send; } if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) { pass = 4; goto send; } if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { /* retransmit case */ pass = 5; goto send; } if (sack_rxmit) { pass = 6; goto send; } } /* * Sending of standalone window updates. * * Window updates are important when we close our window due to a * full socket buffer and are opening it again after the application * reads data from it. Once the window has opened again and the * remote end starts to send again the ACK clock takes over and * provides the most current window information. * * We must avoid the silly window syndrome whereas every read from * the receive buffer, no matter how small, causes a window update * to be sent. We also should avoid sending a flurry of window * updates when the socket buffer had queued a lot of data and the * application is doing small reads. * * Prevent a flurry of pointless window updates by only sending an * update when we can increase the advertized window by more than * 1/4th of the socket buffer capacity. When the buffer is getting * full or is very small be more aggressive and send an update * whenever we can increase by two mss sized segments. In all other * situations the ACK's to new incoming data will carry further * window increases. * * Don't send an independent window update if a delayed ACK is * pending (it will get piggy-backed on it) or the remote side * already has done a half-close and won't send more data. Skip * this if the connection is in T/TCP half-open state. */ if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) && !(tp->t_flags & TF_DELACK) && !TCPS_HAVERCVDFIN(tp->t_state)) { /* * "adv" is the amount we could increase the window, taking * into account that we are limited by TCP_MAXWIN << * tp->rcv_scale. */ int32_t adv; int oldwin; adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale); if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { oldwin = (tp->rcv_adv - tp->rcv_nxt); adv -= oldwin; } else oldwin = 0; /* * If the new window size ends up being the same as the old * size when it is scaled, then don't force a window update. */ if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale) goto dontupdate; if (adv >= (int32_t)(2 * tp->t_maxseg) && (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) || recwin <= (int32_t)(so->so_rcv.sb_hiwat / 8) || so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg)) { pass = 7; goto send; } if (2 * adv >= (int32_t) so->so_rcv.sb_hiwat) goto send; } dontupdate: /* * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW * is also a catch-all for the retransmit timer timeout case. */ if (tp->t_flags & TF_ACKNOW) { pass = 8; goto send; } if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) { pass = 9; goto send; } if (SEQ_GT(tp->snd_up, tp->snd_una)) { pass = 10; goto send; } /* * If our state indicates that FIN should be sent and we have not * yet done so, then we need to send. */ if (flags & TH_FIN) { if ((tp->t_flags & TF_SENTFIN) || (((tp->t_flags & TF_SENTFIN) == 0) && (tp->snd_nxt == tp->snd_una))) { pass = 11; goto send; } } /* * No reason to send a segment, just return. */ just_return: SOCKBUF_UNLOCK(sb); just_return_nolock: if (tot_len_this_send == 0) counter_u64_add(rack_out_size[TCP_MSS_ACCT_JUSTRET], 1); rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, tot_len_this_send, 1); rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling); tp->t_flags &= ~TF_FORCEDATA; return (0); send: if (doing_tlp == 0) { /* * Data not a TLP, and its not the rxt firing. If it is the * rxt firing, we want to leave the tlp_in_progress flag on * so we don't send another TLP. It has to be a rack timer * or normal send (response to acked data) to clear the tlp * in progress flag. */ rack->rc_tlp_in_progress = 0; } SOCKBUF_LOCK_ASSERT(sb); if (len > 0) { if (len >= tp->t_maxseg) tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT; else tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT; } /* * Before ESTABLISHED, force sending of initial options unless TCP * set not to do any options. NOTE: we assume that the IP/TCP header * plus TCP options always fit in a single mbuf, leaving room for a * maximum link header, i.e. max_linkhdr + sizeof (struct tcpiphdr) * + optlen <= MCLBYTES */ optlen = 0; #ifdef INET6 if (isipv6) hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else #endif hdrlen = sizeof(struct tcpiphdr); /* * Compute options for segment. We only have to care about SYN and * established connection segments. Options for SYN-ACK segments * are handled in TCP syncache. */ to.to_flags = 0; if ((tp->t_flags & TF_NOOPT) == 0) { /* Maximum segment size. */ if (flags & TH_SYN) { tp->snd_nxt = tp->iss; to.to_mss = tcp_mssopt(&inp->inp_inc); #ifdef NETFLIX_TCPOUDP if (tp->t_port) to.to_mss -= V_tcp_udp_tunneling_overhead; #endif to.to_flags |= TOF_MSS; #ifdef TCP_RFC7413 /* * Only include the TFO option on the first * transmission of the SYN|ACK on a * passively-created TFO socket, as the presence of * the TFO option may have caused the original * SYN|ACK to have been dropped by a middlebox. */ if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift == 0)) { to.to_tfo_len = TCP_FASTOPEN_MAX_COOKIE_LEN; to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie; to.to_flags |= TOF_FASTOPEN; } #endif } /* Window scaling. */ if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { to.to_wscale = tp->request_r_scale; to.to_flags |= TOF_SCALE; } /* Timestamps. */ if ((tp->t_flags & TF_RCVD_TSTMP) || ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { to.to_tsval = cts + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; } /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE)) tp->rfbuf_ts = tcp_ts_getticks(); /* Selective ACK's. */ if (flags & TH_SYN) to.to_flags |= TOF_SACKPERM; else if (TCPS_HAVEESTABLISHED(tp->t_state) && tp->rcv_numsacks > 0) { to.to_flags |= TOF_SACK; to.to_nsacks = tp->rcv_numsacks; to.to_sacks = (u_char *)tp->sackblks; } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* TCP_SIGNATURE */ /* Processing the options. */ hdrlen += optlen = tcp_addoptions(&to, opt); } #ifdef NETFLIX_TCPOUDP if (tp->t_port) { if (V_tcp_udp_tunneling_port == 0) { /* The port was removed?? */ SOCKBUF_UNLOCK(&so->so_snd); return (EHOSTUNREACH); } hdrlen += sizeof(struct udphdr); } #endif ipoptlen = 0; #if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; #endif /* * Adjust data length if insertion of options will bump the packet * length beyond the t_maxseg length. Clear the FIN bit because we * cut off the tail of the segment. */ if (len + optlen + ipoptlen > tp->t_maxseg) { if (flags & TH_FIN) { would_have_fin = 1; flags &= ~TH_FIN; } if (tso) { uint32_t if_hw_tsomax; uint32_t moff; int32_t max_len; /* extract TSO information */ if_hw_tsomax = tp->t_tsomax; if_hw_tsomaxsegcount = tp->t_tsomaxsegcount; if_hw_tsomaxsegsize = tp->t_tsomaxsegsize; KASSERT(ipoptlen == 0, ("%s: TSO can't do IP options", __func__)); /* * Check if we should limit by maximum payload * length: */ if (if_hw_tsomax != 0) { /* compute maximum TSO length */ max_len = (if_hw_tsomax - hdrlen - max_linkhdr); if (max_len <= 0) { len = 0; } else if (len > max_len) { sendalot = 1; len = max_len; } } /* * Prevent the last segment from being fractional * unless the send sockbuf can be emptied: */ max_len = (tp->t_maxseg - optlen); if ((sb_offset + len) < sbavail(sb)) { moff = len % (u_int)max_len; if (moff != 0) { len -= moff; sendalot = 1; } } /* * In case there are too many small fragments don't * use TSO: */ if (len <= max_len) { len = max_len; sendalot = 1; tso = 0; } /* * Send the FIN in a separate segment after the bulk * sending is done. We don't trust the TSO * implementations to clear the FIN flag on all but * the last segment. */ if (tp->t_flags & TF_NEEDFIN) sendalot = 1; } else { len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; } } else tso = 0; KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET, ("%s: len > IP_MAXPACKET", __func__)); #ifdef DIAGNOSTIC #ifdef INET6 if (max_linkhdr + hdrlen > MCLBYTES) #else if (max_linkhdr + hdrlen > MHLEN) #endif panic("tcphdr too big"); #endif /* * This KASSERT is here to catch edge cases at a well defined place. * Before, those had triggered (random) panic conditions further * down. */ KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); if ((len == 0) && (flags & TH_FIN) && (sbused(sb))) { /* * We have outstanding data, don't send a fin by itself!. */ goto just_return; } /* * Grab a header mbuf, attaching a copy of data to be transmitted, * and initialize the header from the template for sends on this * connection. */ if (len) { uint32_t max_val; uint32_t moff; if (rack->rc_pace_max_segs) max_val = rack->rc_pace_max_segs * tp->t_maxseg; else max_val = len; /* * We allow a limit on sending with hptsi. */ if (len > max_val) { len = max_val; } #ifdef INET6 if (MHLEN < hdrlen + max_linkhdr) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else #endif m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { SOCKBUF_UNLOCK(sb); error = ENOBUFS; sack_rxmit = 0; goto out; } m->m_data += max_linkhdr; m->m_len = hdrlen; /* * Start the m_copy functions from the closest mbuf to the * sb_offset in the socket buffer chain. */ mb = sbsndptr_noadv(sb, sb_offset, &moff); if (len <= MHLEN - hdrlen - max_linkhdr) { m_copydata(mb, moff, (int)len, mtod(m, caddr_t)+hdrlen); if (SEQ_LT(tp->snd_nxt, tp->snd_max)) sbsndptr_adv(sb, mb, len); m->m_len += len; } else { struct sockbuf *msb; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) msb = NULL; else msb = sb; m->m_next = tcp_m_copym(mb, moff, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb); if (len <= (tp->t_maxseg - optlen)) { /* * Must have ran out of mbufs for the copy * shorten it to no longer need tso. Lets * not put on sendalot since we are low on * mbufs. */ tso = 0; } if (m->m_next == NULL) { SOCKBUF_UNLOCK(sb); (void)m_free(m); error = ENOBUFS; sack_rxmit = 0; goto out; } } if ((tp->t_flags & TF_FORCEDATA) && len == 1) { TCPSTAT_INC(tcps_sndprobe); #ifdef NETFLIX_STATS if (SEQ_LT(tp->snd_nxt, tp->snd_max)) stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB, len); else stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB, len); #endif } else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { if (rsm && (rsm->r_flags & RACK_TLP)) { /* * TLP should not count in retran count, but * in its own bin */ counter_u64_add(rack_tlp_retran, 1); counter_u64_add(rack_tlp_retran_bytes, len); } else { tp->t_sndrexmitpack++; TCPSTAT_INC(tcps_sndrexmitpack); TCPSTAT_ADD(tcps_sndrexmitbyte, len); } #ifdef NETFLIX_STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB, len); #endif } else { TCPSTAT_INC(tcps_sndpack); TCPSTAT_ADD(tcps_sndbyte, len); #ifdef NETFLIX_STATS stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB, len); #endif } /* * If we're sending everything we've got, set PUSH. (This * will keep happy those implementations which only give * data to the user when a buffer fills or a PUSH comes in.) */ if (sb_offset + len == sbused(sb) && sbused(sb) && !(flags & TH_SYN)) flags |= TH_PUSH; /* * Are we doing hptsi, if so we must calculate the slot. We * only do hptsi in ESTABLISHED and with no RESET being * sent where we have data to send. */ if (((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_CLOSE_WAIT) || ((tp->t_state == TCPS_FIN_WAIT_1) && ((tp->t_flags & TF_SENTFIN) == 0) && ((flags & TH_FIN) == 0))) && ((flags & TH_RST) == 0) && (rack->rc_always_pace)) { /* * We use the most optimistic possible cwnd/srtt for * sending calculations. This will make our * calculation anticipate getting more through * quicker then possible. But thats ok we don't want * the peer to have a gap in data sending. */ uint32_t srtt, cwnd, tr_perms = 0; if (rack->r_ctl.rc_rack_min_rtt) srtt = rack->r_ctl.rc_rack_min_rtt; else srtt = TICKS_2_MSEC((tp->t_srtt >> TCP_RTT_SHIFT)); if (rack->r_ctl.rc_rack_largest_cwnd) cwnd = rack->r_ctl.rc_rack_largest_cwnd; else cwnd = tp->snd_cwnd; tr_perms = cwnd / srtt; if (tr_perms == 0) { tr_perms = tp->t_maxseg; } tot_len_this_send += len; /* * Calculate how long this will take to drain, if * the calculation comes out to zero, thats ok we * will use send_a_lot to possibly spin around for * more increasing tot_len_this_send to the point * that its going to require a pace, or we hit the * cwnd. Which in that case we are just waiting for * a ACK. */ slot = tot_len_this_send / tr_perms; /* Now do we reduce the time so we don't run dry? */ if (slot && rack->rc_pace_reduce) { int32_t reduce; reduce = (slot / rack->rc_pace_reduce); if (reduce < slot) { slot -= reduce; } else slot = 0; } if (rack->r_enforce_min_pace && (slot == 0) && (tot_len_this_send >= (rack->r_min_pace_seg_thresh * tp->t_maxseg))) { /* We are enforcing a minimum pace time of 1ms */ slot = rack->r_enforce_min_pace; } } SOCKBUF_UNLOCK(sb); } else { SOCKBUF_UNLOCK(sb); if (tp->t_flags & TF_ACKNOW) TCPSTAT_INC(tcps_sndacks); else if (flags & (TH_SYN | TH_FIN | TH_RST)) TCPSTAT_INC(tcps_sndctrl); else if (SEQ_GT(tp->snd_up, tp->snd_una)) TCPSTAT_INC(tcps_sndurg); else TCPSTAT_INC(tcps_sndwinup); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { error = ENOBUFS; sack_rxmit = 0; goto out; } #ifdef INET6 if (isipv6 && (MHLEN < hdrlen + max_linkhdr) && MHLEN >= hdrlen) { M_ALIGN(m, hdrlen); } else #endif m->m_data += max_linkhdr; m->m_len = hdrlen; } SOCKBUF_UNLOCK_ASSERT(sb); m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); #ifdef NETFLIX_TCPOUDP if (tp->t_port) { udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr)); udp->uh_sport = htons(V_tcp_udp_tunneling_port); udp->uh_dport = tp->t_port; ulen = hdrlen + len - sizeof(struct ip6_hdr); udp->uh_ulen = htons(ulen); th = (struct tcphdr *)(udp + 1); } else #endif th = (struct tcphdr *)(ip6 + 1); tcpip_fillheaders(inp, ip6, th); } else #endif /* INET6 */ { ip = mtod(m, struct ip *); #ifdef TCPDEBUG ipov = (struct ipovly *)ip; #endif #ifdef NETFLIX_TCPOUDP if (tp->t_port) { udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip)); udp->uh_sport = htons(V_tcp_udp_tunneling_port); udp->uh_dport = tp->t_port; ulen = hdrlen + len - sizeof(struct ip); udp->uh_ulen = htons(ulen); th = (struct tcphdr *)(udp + 1); } else #endif th = (struct tcphdr *)(ip + 1); tcpip_fillheaders(inp, ip, th); } /* * Fill in fields, remembering maximum advertised window for use in * delaying messages about window sizes. If resending a FIN, be sure * not to use a new sequence number. */ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && tp->snd_nxt == tp->snd_max) tp->snd_nxt--; /* * If we are starting a connection, send ECN setup SYN packet. If we * are on a retransmit, we may resend those bits a number of times * as per RFC 3168. */ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) { if (tp->t_rxtshift >= 1) { if (tp->t_rxtshift <= V_tcp_ecn_maxretries) flags |= TH_ECE | TH_CWR; } else flags |= TH_ECE | TH_CWR; } if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags & TF_ECN_PERMIT)) { /* * If the peer has ECN, mark data packets with ECN capable * transmission (ECT). Ignore pure ack packets, * retransmissions and window probes. */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && !((tp->t_flags & TF_FORCEDATA) && len == 1)) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); else #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); } /* * Reply with proper ECN notifications. */ if (tp->t_flags & TF_ECN_SND_CWR) { flags |= TH_CWR; tp->t_flags &= ~TF_ECN_SND_CWR; } if (tp->t_flags & TF_ECN_SND_ECE) flags |= TH_ECE; } /* * If we are doing retransmissions, then snd_nxt will not reflect * the first unsent octet. For ACK only packets, we do not want the * sequence number of the retransmitted packet, we want the sequence * number of the next unsent octet. So, if there is no data (and no * SYN or FIN), use snd_max instead of snd_nxt when filling in * ti_seq. But if we are in persist state, snd_max might reflect * one byte beyond the right edge of the window, so use snd_nxt in * that case, since we know we aren't doing a retransmission. * (retransmit and persist are mutually exclusive...) */ if (sack_rxmit == 0) { if (len || (flags & (TH_SYN | TH_FIN)) || rack->rc_in_persist) { th->th_seq = htonl(tp->snd_nxt); rack_seq = tp->snd_nxt; } else if (flags & TH_RST) { /* * For a Reset send the last cum ack in sequence * (this like any other choice may still generate a * challenge ack, if a ack-update packet is in * flight). */ th->th_seq = htonl(tp->snd_una); rack_seq = tp->snd_una; } else { th->th_seq = htonl(tp->snd_max); rack_seq = tp->snd_max; } } else { th->th_seq = htonl(rsm->r_start); rack_seq = rsm->r_start; } th->th_ack = htonl(tp->rcv_nxt); if (optlen) { bcopy(opt, th + 1, optlen); th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; } th->th_flags = flags; /* * Calculate receive window. Don't shrink window, but avoid silly * window syndrome. */ if (recwin < (long)(so->so_rcv.sb_hiwat / 4) && recwin < (long)tp->t_maxseg) recwin = 0; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) recwin = (long)(tp->rcv_adv - tp->rcv_nxt); if (recwin > (long)TCP_MAXWIN << tp->rcv_scale) recwin = (long)TCP_MAXWIN << tp->rcv_scale; /* * According to RFC1323 the window field in a SYN (i.e., a or * ) segment itself is never scaled. The case is * handled in syncache. */ if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(&so->so_rcv), TCP_MAXWIN))); else th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); /* * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0 * window. This may cause the remote transmitter to stall. This * flag tells soreceive() to disable delayed acknowledgements when * draining the buffer. This can occur if the receiver is * attempting to read more data than can be buffered prior to * transmitting on the connection. */ if (th->th_win == 0) { tp->t_sndzerowin++; tp->t_flags |= TF_RXWIN0SENT; } else tp->t_flags &= ~TF_RXWIN0SENT; if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); th->th_flags |= TH_URG; } else /* * If no urgent pointer to send, then we pull the urgent * pointer to the left edge of the send window so that it * doesn't drift into the send window on sequence number * wraparound. */ tp->snd_up = tp->snd_una; /* drag it along */ #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (to.to_flags & TOF_SIGNATURE) { /* * Calculate MD5 signature and put it into the place * determined before. * NOTE: since TCP options buffer doesn't point into * mbuf's data, calculate offset and use it. */ if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, th, (u_char *)(th + 1) + (to.to_signature - opt)) != 0) { /* * Do not send segment if the calculation of MD5 * digest has failed. */ goto out; } } #endif /* * Put TCP length in extended header, and then checksum extended * header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ #ifdef INET6 if (isipv6) { /* * ip6_plen is not need to be filled now, and will be filled * in ip6_output. */ if (tp->t_port) { m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0); th->th_sum = htons(0); } else { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP, 0); } } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET { if (tp->t_port) { m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); th->th_sum = htons(0); } else { m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); } /* IP version must be set here for ipv4/ipv6 checking later */ KASSERT(ip->ip_v == IPVERSION, ("%s: IP version incorrect: %d", __func__, ip->ip_v)); } #endif /* * Enable TSO and specify the size of the segments. The TCP pseudo * header checksum is always provided. XXX: Fixme: This is currently * not the case for IPv6. */ if (tso) { KASSERT(len > tp->t_maxseg - optlen, ("%s: len <= tso_segsz", __func__)); m->m_pkthdr.csum_flags |= CSUM_TSO; m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; } #if defined(IPSEC) || defined(IPSEC_SUPPORT) KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u", __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); #else KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL), ("%s: mbuf chain shorter than expected: %d + %u + %u != %u", __func__, len, hdrlen, ipoptlen, m_length(m, NULL))); #endif #ifdef TCP_HHOOK /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */ hhook_run_tcp_est_out(tp, th, &to, len, tso); #endif #ifdef TCPDEBUG /* * Trace. */ if (so->so_options & SO_DEBUG) { u_short save = 0; #ifdef INET6 if (!isipv6) #endif { save = ipov->ih_len; ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + * (th->th_off << 2) */ ); } tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0); #ifdef INET6 if (!isipv6) #endif ipov->ih_len = save; } #endif /* TCPDEBUG */ /* We're getting ready to send; log now. */ if (tp->t_logstate != TCP_LOG_STATE_OFF) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.inhpts = rack->rc_inp->inp_in_hpts; log.u_bbr.ininput = rack->rc_inp->inp_in_input; log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt; if (rsm || sack_rxmit) { log.u_bbr.flex8 = 1; } else { log.u_bbr.flex8 = 0; } lgb = tcp_log_event_(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK, len, &log, false, NULL, NULL, 0, NULL); } else lgb = NULL; /* * Fill in IP length and desired time to live and send to IP level. * There should be a better way to handle ttl and tos; we could keep * them in the template, but need a way to checksum without them. */ /* * m->m_pkthdr.len should have been set before cksum calcuration, * because in6_cksum() need it. */ #ifdef INET6 if (isipv6) { /* * we separately set hoplimit for every segment, since the * user might want to change the value via setsockopt. Also, * desired default hop limit might be changed via Neighbor * Discovery. */ ip6->ip6_hlim = in6_selecthlim(inp, NULL); /* * Set the packet size here for the benefit of DTrace * probes. ip6_output() will set it properly; it's supposed * to include the option header lengths as well. */ ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) tp->t_flags2 |= TF2_PLPMTU_PMTUD; else tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; if (tp->t_state == TCPS_SYN_SENT) TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th); TCP_PROBE5(send, NULL, tp, ip6, tp, th); /* TODO: IPv6 IP6TOS_ECT bit on */ error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &inp->inp_route6, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL, NULL, inp); if (error == EMSGSIZE && inp->inp_route6.ro_rt != NULL) mtu = inp->inp_route6.ro_rt->rt_mtu; } #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip->ip_len = htons(m->m_pkthdr.len); #ifdef INET6 if (inp->inp_vflag & INP_IPV6PROTO) ip->ip_ttl = in6_selecthlim(inp, NULL); #endif /* INET6 */ /* * If we do path MTU discovery, then we set DF on every * packet. This might not be the best thing to do according * to RFC3390 Section 2. However the tcp hostcache migitates * the problem so it affects only the first tcp connection * with a host. * * NB: Don't set DF on small MTU/MSS to have a safe * fallback. */ if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) { tp->t_flags2 |= TF2_PLPMTU_PMTUD; if (tp->t_port == 0 || len < V_tcp_minmss) { ip->ip_off |= htons(IP_DF); } } else { tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; } if (tp->t_state == TCPS_SYN_SENT) TCP_PROBE5(connect__request, NULL, tp, ip, tp, th); TCP_PROBE5(send, NULL, tp, ip, tp, th); error = ip_output(m, tp->t_inpcb->inp_options, &inp->inp_route, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, inp); if (error == EMSGSIZE && inp->inp_route.ro_rt != NULL) mtu = inp->inp_route.ro_rt->rt_mtu; } #endif /* INET */ out: if (lgb) { lgb->tlb_errno = error; lgb = NULL; } /* * In transmit state, time the transmission and arrange for the * retransmit. In persist state, just set snd_max. */ if (error == 0) { if (len == 0) counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1); else if (len == 1) { counter_u64_add(rack_out_size[TCP_MSS_ACCT_PERSIST], 1); } else if (len > 1) { int idx; idx = (len / tp->t_maxseg) + 3; if (idx >= TCP_MSS_ACCT_ATIMER) counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1); else counter_u64_add(rack_out_size[idx], 1); } } if (sub_from_prr && (error == 0)) { rack->r_ctl.rc_prr_sndcnt -= len; } sub_from_prr = 0; rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error, cts, pass, rsm); if ((tp->t_flags & TF_FORCEDATA) == 0 || (rack->rc_in_persist == 0)) { #ifdef NETFLIX_STATS tcp_seq startseq = tp->snd_nxt; #endif /* * Advance snd_nxt over sequence space of this segment. */ if (error) /* We don't log or do anything with errors */ goto timer; if (flags & (TH_SYN | TH_FIN)) { if (flags & TH_SYN) tp->snd_nxt++; if (flags & TH_FIN) { tp->snd_nxt++; tp->t_flags |= TF_SENTFIN; } } /* In the ENOBUFS case we do *not* update snd_max */ if (sack_rxmit) goto timer; tp->snd_nxt += len; if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { if (tp->snd_una == tp->snd_max) { /* * Update the time we just added data since * none was outstanding. */ rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__); tp->t_acktime = ticks; } tp->snd_max = tp->snd_nxt; #ifdef NETFLIX_STATS if (!(tp->t_flags & TF_GPUTINPROG) && len) { tp->t_flags |= TF_GPUTINPROG; tp->gput_seq = startseq; tp->gput_ack = startseq + ulmin(sbavail(sb) - sb_offset, sendwin); tp->gput_ts = tcp_ts_getticks(); } #endif } /* * Set retransmit timer if not currently set, and not doing * a pure ack or a keep-alive probe. Initial value for * retransmit timer is smoothed round-trip time + 2 * * round-trip time variance. Initialize shift counter which * is used for backoff of retransmit time. */ timer: if ((tp->snd_wnd == 0) && TCPS_HAVEESTABLISHED(tp->t_state)) { /* * If the persists timer was set above (right before * the goto send), and still needs to be on. Lets * make sure all is canceled. If the persist timer * is not running, we want to get it up. */ if (rack->rc_in_persist == 0) { rack_enter_persist(tp, rack, cts); } } } else { /* * Persist case, update snd_max but since we are in persist * mode (no window) we do not update snd_nxt. */ int32_t xlen = len; if (error) goto nomore; if (flags & TH_SYN) ++xlen; if (flags & TH_FIN) { ++xlen; tp->t_flags |= TF_SENTFIN; } /* In the ENOBUFS case we do *not* update snd_max */ if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) { if (tp->snd_una == tp->snd_max) { /* * Update the time we just added data since * none was outstanding. */ rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__); tp->t_acktime = ticks; } tp->snd_max = tp->snd_nxt + len; } } nomore: if (error) { SOCKBUF_UNLOCK_ASSERT(sb); /* Check gotos. */ /* * Failures do not advance the seq counter above. For the * case of ENOBUFS we will fall out and retry in 1ms with * the hpts. Everything else will just have to retransmit * with the timer. * * In any case, we do not want to loop around for another * send without a good reason. */ sendalot = 0; switch (error) { case EPERM: tp->t_flags &= ~TF_FORCEDATA; tp->t_softerror = error; return (error); case ENOBUFS: if (slot == 0) { /* * Pace us right away to retry in a some * time */ slot = 1 + rack->rc_enobuf; if (rack->rc_enobuf < 255) rack->rc_enobuf++; if (slot > (rack->rc_rack_rtt / 2)) { slot = rack->rc_rack_rtt / 2; } if (slot < 10) slot = 10; } counter_u64_add(rack_saw_enobuf, 1); error = 0; goto enobufs; case EMSGSIZE: /* * For some reason the interface we used initially * to send segments changed to another or lowered * its MTU. If TSO was active we either got an * interface without TSO capabilits or TSO was * turned off. If we obtained mtu from ip_output() * then update it and try again. */ if (tso) tp->t_flags &= ~TF_TSO; if (mtu != 0) { tcp_mss_update(tp, -1, mtu, NULL, NULL); goto again; } slot = 10; rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, 0, 1); tp->t_flags &= ~TF_FORCEDATA; return (error); case ENETUNREACH: counter_u64_add(rack_saw_enetunreach, 1); case EHOSTDOWN: case EHOSTUNREACH: case ENETDOWN: if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_softerror = error; } /* FALLTHROUGH */ default: slot = 10; rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, 0, 1); tp->t_flags &= ~TF_FORCEDATA; return (error); } } else { rack->rc_enobuf = 0; } TCPSTAT_INC(tcps_sndtotal); /* * Data sent (as far as we can tell). If this advertises a larger * window than any other segment, then remember the size of the * advertised window. Any pending ACK has now been sent. */ if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + recwin; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); enobufs: rack->r_tlp_running = 0; if ((flags & TH_RST) || (would_have_fin == 1)) { /* * We don't send again after a RST. We also do *not* send * again if we would have had a find, but now have * outstanding data. */ slot = 0; sendalot = 0; } if (slot) { /* set the rack tcb into the slot N */ counter_u64_add(rack_paced_segments, 1); } else if (sendalot) { if (len) counter_u64_add(rack_unpaced_segments, 1); sack_rxmit = 0; tp->t_flags &= ~TF_FORCEDATA; goto again; } else if (len) { counter_u64_add(rack_unpaced_segments, 1); } tp->t_flags &= ~TF_FORCEDATA; rack_start_hpts_timer(rack, tp, cts, __LINE__, slot, tot_len_this_send, 1); return (error); } /* * rack_ctloutput() must drop the inpcb lock before performing copyin on * socket option arguments. When it re-acquires the lock after the copy, it * has to revalidate that the connection is still valid for the socket * option. */ static int rack_set_sockopt(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack) { int32_t error = 0, optval; switch (sopt->sopt_name) { case TCP_RACK_PROP_RATE: case TCP_RACK_PROP: case TCP_RACK_TLP_REDUCE: case TCP_RACK_EARLY_RECOV: case TCP_RACK_PACE_ALWAYS: case TCP_DELACK: case TCP_RACK_PACE_REDUCE: case TCP_RACK_PACE_MAX_SEG: case TCP_RACK_PRR_SENDALOT: case TCP_RACK_MIN_TO: case TCP_RACK_EARLY_SEG: case TCP_RACK_REORD_THRESH: case TCP_RACK_REORD_FADE: case TCP_RACK_TLP_THRESH: case TCP_RACK_PKT_DELAY: case TCP_RACK_TLP_USE: case TCP_RACK_TLP_INC_VAR: case TCP_RACK_IDLE_REDUCE_HIGH: case TCP_RACK_MIN_PACE: case TCP_RACK_MIN_PACE_SEG: case TCP_BBR_RACK_RTT_USE: case TCP_DATA_AFTER_CLOSE: break; default: return (tcp_default_ctloutput(so, sopt, inp, tp)); break; } INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) return (error); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); rack = (struct tcp_rack *)tp->t_fb_ptr; switch (sopt->sopt_name) { case TCP_RACK_PROP_RATE: if ((optval <= 0) || (optval >= 100)) { error = EINVAL; break; } RACK_OPTS_INC(tcp_rack_prop_rate); rack->r_ctl.rc_prop_rate = optval; break; case TCP_RACK_TLP_USE: if ((optval < TLP_USE_ID) || (optval > TLP_USE_TWO_TWO)) { error = EINVAL; break; } RACK_OPTS_INC(tcp_tlp_use); rack->rack_tlp_threshold_use = optval; break; case TCP_RACK_PROP: /* RACK proportional rate reduction (bool) */ RACK_OPTS_INC(tcp_rack_prop); rack->r_ctl.rc_prop_reduce = optval; break; case TCP_RACK_TLP_REDUCE: /* RACK TLP cwnd reduction (bool) */ RACK_OPTS_INC(tcp_rack_tlp_reduce); rack->r_ctl.rc_tlp_cwnd_reduce = optval; break; case TCP_RACK_EARLY_RECOV: /* Should recovery happen early (bool) */ RACK_OPTS_INC(tcp_rack_early_recov); rack->r_ctl.rc_early_recovery = optval; break; case TCP_RACK_PACE_ALWAYS: /* Use the always pace method (bool) */ RACK_OPTS_INC(tcp_rack_pace_always); if (optval > 0) rack->rc_always_pace = 1; else rack->rc_always_pace = 0; break; case TCP_RACK_PACE_REDUCE: /* RACK Hptsi reduction factor (divisor) */ RACK_OPTS_INC(tcp_rack_pace_reduce); if (optval) /* Must be non-zero */ rack->rc_pace_reduce = optval; else error = EINVAL; break; case TCP_RACK_PACE_MAX_SEG: /* Max segments in a pace */ RACK_OPTS_INC(tcp_rack_max_seg); rack->rc_pace_max_segs = optval; break; case TCP_RACK_PRR_SENDALOT: /* Allow PRR to send more than one seg */ RACK_OPTS_INC(tcp_rack_prr_sendalot); rack->r_ctl.rc_prr_sendalot = optval; break; case TCP_RACK_MIN_TO: /* Minimum time between rack t-o's in ms */ RACK_OPTS_INC(tcp_rack_min_to); rack->r_ctl.rc_min_to = optval; break; case TCP_RACK_EARLY_SEG: /* If early recovery max segments */ RACK_OPTS_INC(tcp_rack_early_seg); rack->r_ctl.rc_early_recovery_segs = optval; break; case TCP_RACK_REORD_THRESH: /* RACK reorder threshold (shift amount) */ RACK_OPTS_INC(tcp_rack_reord_thresh); if ((optval > 0) && (optval < 31)) rack->r_ctl.rc_reorder_shift = optval; else error = EINVAL; break; case TCP_RACK_REORD_FADE: /* Does reordering fade after ms time */ RACK_OPTS_INC(tcp_rack_reord_fade); rack->r_ctl.rc_reorder_fade = optval; break; case TCP_RACK_TLP_THRESH: /* RACK TLP theshold i.e. srtt+(srtt/N) */ RACK_OPTS_INC(tcp_rack_tlp_thresh); if (optval) rack->r_ctl.rc_tlp_threshold = optval; else error = EINVAL; break; case TCP_RACK_PKT_DELAY: /* RACK added ms i.e. rack-rtt + reord + N */ RACK_OPTS_INC(tcp_rack_pkt_delay); rack->r_ctl.rc_pkt_delay = optval; break; case TCP_RACK_TLP_INC_VAR: /* Does TLP include rtt variance in t-o */ RACK_OPTS_INC(tcp_rack_tlp_inc_var); rack->r_ctl.rc_prr_inc_var = optval; break; case TCP_RACK_IDLE_REDUCE_HIGH: RACK_OPTS_INC(tcp_rack_idle_reduce_high); if (optval) rack->r_idle_reduce_largest = 1; else rack->r_idle_reduce_largest = 0; break; case TCP_DELACK: if (optval == 0) tp->t_delayed_ack = 0; else tp->t_delayed_ack = 1; if (tp->t_flags & TF_DELACK) { tp->t_flags &= ~TF_DELACK; tp->t_flags |= TF_ACKNOW; rack_output(tp); } break; case TCP_RACK_MIN_PACE: RACK_OPTS_INC(tcp_rack_min_pace); if (optval > 3) rack->r_enforce_min_pace = 3; else rack->r_enforce_min_pace = optval; break; case TCP_RACK_MIN_PACE_SEG: RACK_OPTS_INC(tcp_rack_min_pace_seg); if (optval >= 16) rack->r_min_pace_seg_thresh = 15; else rack->r_min_pace_seg_thresh = optval; break; case TCP_BBR_RACK_RTT_USE: if ((optval != USE_RTT_HIGH) && (optval != USE_RTT_LOW) && (optval != USE_RTT_AVG)) error = EINVAL; else rack->r_ctl.rc_rate_sample_method = optval; break; case TCP_DATA_AFTER_CLOSE: if (optval) rack->rc_allow_data_af_clo = 1; else rack->rc_allow_data_af_clo = 0; break; default: return (tcp_default_ctloutput(so, sopt, inp, tp)); break; } #ifdef NETFLIX_STATS tcp_log_socket_option(tp, sopt->sopt_name, optval, error); #endif INP_WUNLOCK(inp); return (error); } static int rack_get_sockopt(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack) { int32_t error, optval; /* * Because all our options are either boolean or an int, we can just * pull everything into optval and then unlock and copy. If we ever * add a option that is not a int, then this will have quite an * impact to this routine. */ switch (sopt->sopt_name) { case TCP_RACK_PROP_RATE: optval = rack->r_ctl.rc_prop_rate; break; case TCP_RACK_PROP: /* RACK proportional rate reduction (bool) */ optval = rack->r_ctl.rc_prop_reduce; break; case TCP_RACK_TLP_REDUCE: /* RACK TLP cwnd reduction (bool) */ optval = rack->r_ctl.rc_tlp_cwnd_reduce; break; case TCP_RACK_EARLY_RECOV: /* Should recovery happen early (bool) */ optval = rack->r_ctl.rc_early_recovery; break; case TCP_RACK_PACE_REDUCE: /* RACK Hptsi reduction factor (divisor) */ optval = rack->rc_pace_reduce; break; case TCP_RACK_PACE_MAX_SEG: /* Max segments in a pace */ optval = rack->rc_pace_max_segs; break; case TCP_RACK_PACE_ALWAYS: /* Use the always pace method */ optval = rack->rc_always_pace; break; case TCP_RACK_PRR_SENDALOT: /* Allow PRR to send more than one seg */ optval = rack->r_ctl.rc_prr_sendalot; break; case TCP_RACK_MIN_TO: /* Minimum time between rack t-o's in ms */ optval = rack->r_ctl.rc_min_to; break; case TCP_RACK_EARLY_SEG: /* If early recovery max segments */ optval = rack->r_ctl.rc_early_recovery_segs; break; case TCP_RACK_REORD_THRESH: /* RACK reorder threshold (shift amount) */ optval = rack->r_ctl.rc_reorder_shift; break; case TCP_RACK_REORD_FADE: /* Does reordering fade after ms time */ optval = rack->r_ctl.rc_reorder_fade; break; case TCP_RACK_TLP_THRESH: /* RACK TLP theshold i.e. srtt+(srtt/N) */ optval = rack->r_ctl.rc_tlp_threshold; break; case TCP_RACK_PKT_DELAY: /* RACK added ms i.e. rack-rtt + reord + N */ optval = rack->r_ctl.rc_pkt_delay; break; case TCP_RACK_TLP_USE: optval = rack->rack_tlp_threshold_use; break; case TCP_RACK_TLP_INC_VAR: /* Does TLP include rtt variance in t-o */ optval = rack->r_ctl.rc_prr_inc_var; break; case TCP_RACK_IDLE_REDUCE_HIGH: optval = rack->r_idle_reduce_largest; break; case TCP_RACK_MIN_PACE: optval = rack->r_enforce_min_pace; break; case TCP_RACK_MIN_PACE_SEG: optval = rack->r_min_pace_seg_thresh; break; case TCP_BBR_RACK_RTT_USE: optval = rack->r_ctl.rc_rate_sample_method; break; case TCP_DELACK: optval = tp->t_delayed_ack; break; case TCP_DATA_AFTER_CLOSE: optval = rack->rc_allow_data_af_clo; break; default: return (tcp_default_ctloutput(so, sopt, inp, tp)); break; } INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); return (error); } static int rack_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp) { int32_t error = EINVAL; struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack == NULL) { /* Huh? */ goto out; } if (sopt->sopt_dir == SOPT_SET) { return (rack_set_sockopt(so, sopt, inp, tp, rack)); } else if (sopt->sopt_dir == SOPT_GET) { return (rack_get_sockopt(so, sopt, inp, tp, rack)); } out: INP_WUNLOCK(inp); return (error); } struct tcp_function_block __tcp_rack = { .tfb_tcp_block_name = __XSTRING(STACKNAME), .tfb_tcp_output = rack_output, .tfb_tcp_do_segment = rack_do_segment, .tfb_tcp_hpts_do_segment = rack_hpts_do_segment, .tfb_tcp_ctloutput = rack_ctloutput, .tfb_tcp_fb_init = rack_init, .tfb_tcp_fb_fini = rack_fini, .tfb_tcp_timer_stop_all = rack_stopall, .tfb_tcp_timer_activate = rack_timer_activate, .tfb_tcp_timer_active = rack_timer_active, .tfb_tcp_timer_stop = rack_timer_stop, .tfb_tcp_rexmit_tmr = rack_remxt_tmr, .tfb_tcp_handoff_ok = rack_handoff_ok }; static const char *rack_stack_names[] = { __XSTRING(STACKNAME), #ifdef STACKALIAS __XSTRING(STACKALIAS), #endif }; static int rack_ctor(void *mem, int32_t size, void *arg, int32_t how) { memset(mem, 0, size); return (0); } static void rack_dtor(void *mem, int32_t size, void *arg) { } static bool rack_mod_inited = false; static int tcp_addrack(module_t mod, int32_t type, void *data) { int32_t err = 0; int num_stacks; switch (type) { case MOD_LOAD: rack_zone = uma_zcreate(__XSTRING(MODNAME) "_map", sizeof(struct rack_sendmap), rack_ctor, rack_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); rack_pcb_zone = uma_zcreate(__XSTRING(MODNAME) "_pcb", sizeof(struct tcp_rack), rack_ctor, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); sysctl_ctx_init(&rack_sysctl_ctx); rack_sysctl_root = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_STATIC_CHILDREN(_net_inet_tcp), OID_AUTO, __XSTRING(STACKNAME), CTLFLAG_RW, 0, ""); if (rack_sysctl_root == NULL) { printf("Failed to add sysctl node\n"); err = EFAULT; goto free_uma; } rack_init_sysctls(); num_stacks = nitems(rack_stack_names); err = register_tcp_functions_as_names(&__tcp_rack, M_WAITOK, rack_stack_names, &num_stacks); if (err) { printf("Failed to register %s stack name for " "%s module\n", rack_stack_names[num_stacks], __XSTRING(MODNAME)); sysctl_ctx_free(&rack_sysctl_ctx); free_uma: uma_zdestroy(rack_zone); uma_zdestroy(rack_pcb_zone); rack_counter_destroy(); printf("Failed to register rack module -- err:%d\n", err); return (err); } rack_mod_inited = true; break; case MOD_QUIESCE: err = deregister_tcp_functions(&__tcp_rack, true, false); break; case MOD_UNLOAD: err = deregister_tcp_functions(&__tcp_rack, false, true); if (err == EBUSY) break; if (rack_mod_inited) { uma_zdestroy(rack_zone); uma_zdestroy(rack_pcb_zone); sysctl_ctx_free(&rack_sysctl_ctx); rack_counter_destroy(); rack_mod_inited = false; } err = 0; break; default: return (EOPNOTSUPP); } return (err); } static moduledata_t tcp_rack = { .name = __XSTRING(MODNAME), .evhand = tcp_addrack, .priv = 0 }; MODULE_VERSION(MODNAME, 1); DECLARE_MODULE(MODNAME, tcp_rack, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_DEPEND(MODNAME, tcphpts, 1, 1, 1); Index: projects/pnfs-planb-server/sys/sys/mouse.h =================================================================== --- projects/pnfs-planb-server/sys/sys/mouse.h (revision 334966) +++ projects/pnfs-planb-server/sys/sys/mouse.h (revision 334967) @@ -1,398 +1,383 @@ /*- * SPDX-License-Identifier: BSD-1-Clause * * Copyright (c) 1992, 1993 Erik Forsberg. * Copyright (c) 1996, 1997 Kazutaka YOKOTA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL I BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_MOUSE_H_ #define _SYS_MOUSE_H_ #include #include /* ioctls */ #define MOUSE_GETSTATUS _IOR('M', 0, mousestatus_t) #define MOUSE_GETHWINFO _IOR('M', 1, mousehw_t) #define MOUSE_GETMODE _IOR('M', 2, mousemode_t) #define MOUSE_SETMODE _IOW('M', 3, mousemode_t) #define MOUSE_GETLEVEL _IOR('M', 4, int) #define MOUSE_SETLEVEL _IOW('M', 5, int) -#define MOUSE_GETVARS _IOR('M', 6, mousevar_t) -#define MOUSE_SETVARS _IOW('M', 7, mousevar_t) #define MOUSE_READSTATE _IOWR('M', 8, mousedata_t) #define MOUSE_READDATA _IOWR('M', 9, mousedata_t) #ifdef notyet #define MOUSE_SETRESOLUTION _IOW('M', 10, int) #define MOUSE_SETSCALING _IOW('M', 11, int) #define MOUSE_SETRATE _IOW('M', 12, int) #define MOUSE_GETHWID _IOR('M', 13, int) #endif #define MOUSE_SYN_GETHWINFO _IOR('M', 100, synapticshw_t) /* mouse status block */ typedef struct mousestatus { int flags; /* state change flags */ int button; /* button status */ int obutton; /* previous button status */ int dx; /* x movement */ int dy; /* y movement */ int dz; /* z movement */ } mousestatus_t; /* button */ #define MOUSE_BUTTON1DOWN 0x0001 /* left */ #define MOUSE_BUTTON2DOWN 0x0002 /* middle */ #define MOUSE_BUTTON3DOWN 0x0004 /* right */ #define MOUSE_BUTTON4DOWN 0x0008 #define MOUSE_BUTTON5DOWN 0x0010 #define MOUSE_BUTTON6DOWN 0x0020 #define MOUSE_BUTTON7DOWN 0x0040 #define MOUSE_BUTTON8DOWN 0x0080 #define MOUSE_MAXBUTTON 31 #define MOUSE_STDBUTTONS 0x0007 /* buttons 1-3 */ #define MOUSE_EXTBUTTONS 0x7ffffff8 /* the others (28 of them!) */ #define MOUSE_BUTTONS (MOUSE_STDBUTTONS | MOUSE_EXTBUTTONS) /* flags */ #define MOUSE_STDBUTTONSCHANGED MOUSE_STDBUTTONS #define MOUSE_EXTBUTTONSCHANGED MOUSE_EXTBUTTONS #define MOUSE_BUTTONSCHANGED MOUSE_BUTTONS #define MOUSE_POSCHANGED 0x80000000 typedef struct mousehw { int buttons; /* -1 if unknown */ int iftype; /* MOUSE_IF_XXX */ int type; /* mouse/track ball/pad... */ int model; /* I/F dependent model ID: MOUSE_MODEL_XXX */ int hwid; /* I/F dependent hardware ID * for the PS/2 mouse, it will be PSM_XXX_ID */ } mousehw_t; typedef struct synapticshw { int infoMajor; int infoMinor; int infoRot180; int infoPortrait; int infoSensor; int infoHardware; int infoNewAbs; int capPen; int infoSimplC; int infoGeometry; int capExtended; int capSleep; int capFourButtons; int capMultiFinger; int capPalmDetect; int capPassthrough; int capMiddle; int capLowPower; int capMultiFingerReport; int capBallistics; int nExtendedButtons; int nExtendedQueries; int capClickPad; int capDeluxeLEDs; int noAbsoluteFilter; int capReportsV; int capUniformClickPad; int capReportsMin; int capInterTouch; int capReportsMax; int capClearPad; int capAdvancedGestures; int multiFingerMode; int capCoveredPad; int verticalScroll; int horizontalScroll; int verticalWheel; int capEWmode; int minimumXCoord; int minimumYCoord; int maximumXCoord; int maximumYCoord; int infoXupmm; int infoYupmm; int forcePad; } synapticshw_t; /* iftype */ #define MOUSE_IF_UNKNOWN (-1) #define MOUSE_IF_SERIAL 0 #define MOUSE_IF_BUS 1 #define MOUSE_IF_INPORT 2 #define MOUSE_IF_PS2 3 #define MOUSE_IF_SYSMOUSE 4 #define MOUSE_IF_USB 5 /* type */ #define MOUSE_UNKNOWN (-1) /* should be treated as a mouse */ #define MOUSE_MOUSE 0 #define MOUSE_TRACKBALL 1 #define MOUSE_STICK 2 #define MOUSE_PAD 3 /* model */ #define MOUSE_MODEL_UNKNOWN (-1) #define MOUSE_MODEL_GENERIC 0 #define MOUSE_MODEL_GLIDEPOINT 1 #define MOUSE_MODEL_NETSCROLL 2 #define MOUSE_MODEL_NET 3 #define MOUSE_MODEL_INTELLI 4 #define MOUSE_MODEL_THINK 5 #define MOUSE_MODEL_EASYSCROLL 6 #define MOUSE_MODEL_MOUSEMANPLUS 7 #define MOUSE_MODEL_KIDSPAD 8 #define MOUSE_MODEL_VERSAPAD 9 #define MOUSE_MODEL_EXPLORER 10 #define MOUSE_MODEL_4D 11 #define MOUSE_MODEL_4DPLUS 12 #define MOUSE_MODEL_SYNAPTICS 13 #define MOUSE_MODEL_TRACKPOINT 14 #define MOUSE_MODEL_ELANTECH 15 typedef struct mousemode { int protocol; /* MOUSE_PROTO_XXX */ int rate; /* report rate (per sec), -1 if unknown */ int resolution; /* MOUSE_RES_XXX, -1 if unknown */ int accelfactor; /* accelation factor (must be 1 or greater) */ int level; /* driver operation level */ int packetsize; /* the length of the data packet */ unsigned char syncmask[2]; /* sync. data bits in the header byte */ } mousemode_t; /* protocol */ /* * Serial protocols: * Microsoft, MouseSystems, Logitech, MM series, MouseMan, Hitachi Tablet, * GlidePoint, IntelliMouse, Thinking Mouse, MouseRemote, Kidspad, * VersaPad * Bus mouse protocols: * bus, InPort * PS/2 mouse protocol: * PS/2 */ #define MOUSE_PROTO_UNKNOWN (-1) #define MOUSE_PROTO_MS 0 /* Microsoft Serial, 3 bytes */ #define MOUSE_PROTO_MSC 1 /* Mouse Systems, 5 bytes */ #define MOUSE_PROTO_LOGI 2 /* Logitech, 3 bytes */ #define MOUSE_PROTO_MM 3 /* MM series, 3 bytes */ #define MOUSE_PROTO_LOGIMOUSEMAN 4 /* Logitech MouseMan 3/4 bytes */ #define MOUSE_PROTO_BUS 5 /* MS/Logitech bus mouse */ #define MOUSE_PROTO_INPORT 6 /* MS/ATI InPort mouse */ #define MOUSE_PROTO_PS2 7 /* PS/2 mouse, 3 bytes */ #define MOUSE_PROTO_HITTAB 8 /* Hitachi Tablet 3 bytes */ #define MOUSE_PROTO_GLIDEPOINT 9 /* ALPS GlidePoint, 3/4 bytes */ #define MOUSE_PROTO_INTELLI 10 /* MS IntelliMouse, 4 bytes */ #define MOUSE_PROTO_THINK 11 /* Kensington Thinking Mouse, 3/4 bytes */ #define MOUSE_PROTO_SYSMOUSE 12 /* /dev/sysmouse */ #define MOUSE_PROTO_X10MOUSEREM 13 /* X10 MouseRemote, 3 bytes */ #define MOUSE_PROTO_KIDSPAD 14 /* Genius Kidspad */ #define MOUSE_PROTO_VERSAPAD 15 /* Interlink VersaPad, 6 bytes */ #define MOUSE_PROTO_JOGDIAL 16 /* Vaio's JogDial */ #define MOUSE_PROTO_GTCO_DIGIPAD 17 #define MOUSE_RES_UNKNOWN (-1) #define MOUSE_RES_DEFAULT 0 #define MOUSE_RES_LOW (-2) #define MOUSE_RES_MEDIUMLOW (-3) #define MOUSE_RES_MEDIUMHIGH (-4) #define MOUSE_RES_HIGH (-5) typedef struct mousedata { int len; /* # of data in the buffer */ int buf[16]; /* data buffer */ } mousedata_t; - -#if (defined(MOUSE_GETVARS)) - -typedef struct mousevar { - int var[16]; -} mousevar_t; - -/* magic numbers in var[0] */ -#define MOUSE_VARS_PS2_SIG 0x00325350 /* 'PS2' */ -#define MOUSE_VARS_BUS_SIG 0x00535542 /* 'BUS' */ -#define MOUSE_VARS_INPORT_SIG 0x00504e49 /* 'INP' */ - -#endif /* MOUSE_GETVARS */ /* Synaptics Touchpad */ #define MOUSE_SYNAPTICS_PACKETSIZE 6 /* '3' works better */ /* Elantech Touchpad */ #define MOUSE_ELANTECH_PACKETSIZE 6 /* Microsoft Serial mouse data packet */ #define MOUSE_MSS_PACKETSIZE 3 #define MOUSE_MSS_SYNCMASK 0x40 #define MOUSE_MSS_SYNC 0x40 #define MOUSE_MSS_BUTTONS 0x30 #define MOUSE_MSS_BUTTON1DOWN 0x20 /* left */ #define MOUSE_MSS_BUTTON2DOWN 0x00 /* no middle button */ #define MOUSE_MSS_BUTTON3DOWN 0x10 /* right */ /* Logitech MouseMan data packet (M+ protocol) */ #define MOUSE_LMAN_BUTTON2DOWN 0x20 /* middle button, the 4th byte */ /* ALPS GlidePoint extension (variant of M+ protocol) */ #define MOUSE_ALPS_BUTTON2DOWN 0x20 /* middle button, the 4th byte */ #define MOUSE_ALPS_TAP 0x10 /* `tapping' action, the 4th byte */ /* Kinsington Thinking Mouse extension (variant of M+ protocol) */ #define MOUSE_THINK_BUTTON2DOWN 0x20 /* lower-left button, the 4th byte */ #define MOUSE_THINK_BUTTON4DOWN 0x10 /* lower-right button, the 4th byte */ /* MS IntelliMouse (variant of MS Serial) */ #define MOUSE_INTELLI_PACKETSIZE 4 #define MOUSE_INTELLI_BUTTON2DOWN 0x10 /* middle button in the 4th byte */ /* Mouse Systems Corp. mouse data packet */ #define MOUSE_MSC_PACKETSIZE 5 #define MOUSE_MSC_SYNCMASK 0xf8 #define MOUSE_MSC_SYNC 0x80 #define MOUSE_MSC_BUTTONS 0x07 #define MOUSE_MSC_BUTTON1UP 0x04 /* left */ #define MOUSE_MSC_BUTTON2UP 0x02 /* middle */ #define MOUSE_MSC_BUTTON3UP 0x01 /* right */ #define MOUSE_MSC_MAXBUTTON 3 /* MM series mouse data packet */ #define MOUSE_MM_PACKETSIZE 3 #define MOUSE_MM_SYNCMASK 0xe0 #define MOUSE_MM_SYNC 0x80 #define MOUSE_MM_BUTTONS 0x07 #define MOUSE_MM_BUTTON1DOWN 0x04 /* left */ #define MOUSE_MM_BUTTON2DOWN 0x02 /* middle */ #define MOUSE_MM_BUTTON3DOWN 0x01 /* right */ #define MOUSE_MM_XPOSITIVE 0x10 #define MOUSE_MM_YPOSITIVE 0x08 /* PS/2 mouse data packet */ #define MOUSE_PS2_PACKETSIZE 3 #define MOUSE_PS2_SYNCMASK 0xc8 #define MOUSE_PS2_SYNC 0x08 #define MOUSE_PS2_BUTTONS 0x07 /* 0x03 for 2 button mouse */ #define MOUSE_PS2_BUTTON1DOWN 0x01 /* left */ #define MOUSE_PS2_BUTTON2DOWN 0x04 /* middle */ #define MOUSE_PS2_BUTTON3DOWN 0x02 /* right */ #define MOUSE_PS2_TAP MOUSE_PS2_SYNC /* GlidePoint (PS/2) `tapping' * Yes! this is the same bit * as SYNC! */ #define MOUSE_PS2_XNEG 0x10 #define MOUSE_PS2_YNEG 0x20 #define MOUSE_PS2_XOVERFLOW 0x40 #define MOUSE_PS2_YOVERFLOW 0x80 /* Logitech MouseMan+ (PS/2) data packet (PS/2++ protocol) */ #define MOUSE_PS2PLUS_SYNCMASK 0x48 #define MOUSE_PS2PLUS_SYNC 0x48 #define MOUSE_PS2PLUS_ZNEG 0x08 /* sign bit */ #define MOUSE_PS2PLUS_BUTTON4DOWN 0x10 /* 4th button on MouseMan+ */ #define MOUSE_PS2PLUS_BUTTON5DOWN 0x20 /* IBM ScrollPoint (PS/2) also uses PS/2++ protocol */ #define MOUSE_SPOINT_ZNEG 0x80 /* sign bits */ #define MOUSE_SPOINT_WNEG 0x08 /* MS IntelliMouse (PS/2) data packet */ #define MOUSE_PS2INTELLI_PACKETSIZE 4 /* some compatible mice have additional buttons */ #define MOUSE_PS2INTELLI_BUTTON4DOWN 0x40 #define MOUSE_PS2INTELLI_BUTTON5DOWN 0x80 /* MS IntelliMouse Explorer (PS/2) data packet (variation of IntelliMouse) */ #define MOUSE_EXPLORER_ZNEG 0x08 /* sign bit */ /* IntelliMouse Explorer has additional button data in the fourth byte */ #define MOUSE_EXPLORER_BUTTON4DOWN 0x10 #define MOUSE_EXPLORER_BUTTON5DOWN 0x20 /* Interlink VersaPad (serial I/F) data packet */ #define MOUSE_VERSA_PACKETSIZE 6 #define MOUSE_VERSA_IN_USE 0x04 #define MOUSE_VERSA_SYNCMASK 0xc3 #define MOUSE_VERSA_SYNC 0xc0 #define MOUSE_VERSA_BUTTONS 0x30 #define MOUSE_VERSA_BUTTON1DOWN 0x20 /* left */ #define MOUSE_VERSA_BUTTON2DOWN 0x00 /* middle */ #define MOUSE_VERSA_BUTTON3DOWN 0x10 /* right */ #define MOUSE_VERSA_TAP 0x08 /* Interlink VersaPad (PS/2 I/F) data packet */ #define MOUSE_PS2VERSA_PACKETSIZE 6 #define MOUSE_PS2VERSA_IN_USE 0x10 #define MOUSE_PS2VERSA_SYNCMASK 0xe8 #define MOUSE_PS2VERSA_SYNC 0xc8 #define MOUSE_PS2VERSA_BUTTONS 0x05 #define MOUSE_PS2VERSA_BUTTON1DOWN 0x04 /* left */ #define MOUSE_PS2VERSA_BUTTON2DOWN 0x00 /* middle */ #define MOUSE_PS2VERSA_BUTTON3DOWN 0x01 /* right */ #define MOUSE_PS2VERSA_TAP 0x02 /* A4 Tech 4D Mouse (PS/2) data packet */ #define MOUSE_4D_PACKETSIZE 3 #define MOUSE_4D_WHEELBITS 0xf0 /* A4 Tech 4D+ Mouse (PS/2) data packet */ #define MOUSE_4DPLUS_PACKETSIZE 3 #define MOUSE_4DPLUS_ZNEG 0x04 /* sign bit */ #define MOUSE_4DPLUS_BUTTON4DOWN 0x08 /* sysmouse extended data packet */ /* * /dev/sysmouse sends data in two formats, depending on the protocol * level. At the level 0, format is exactly the same as MousSystems' * five byte packet. At the level 1, the first five bytes are the same * as at the level 0. There are additional three bytes which shows * `dz' and the states of additional buttons. `dz' is expressed as the * sum of the byte 5 and 6 which contain signed seven bit values. * The states of the button 4 though 10 are in the bit 0 though 6 in * the byte 7 respectively: 1 indicates the button is up. */ #define MOUSE_SYS_PACKETSIZE 8 #define MOUSE_SYS_SYNCMASK 0xf8 #define MOUSE_SYS_SYNC 0x80 #define MOUSE_SYS_BUTTON1UP 0x04 /* left, 1st byte */ #define MOUSE_SYS_BUTTON2UP 0x02 /* middle, 1st byte */ #define MOUSE_SYS_BUTTON3UP 0x01 /* right, 1st byte */ #define MOUSE_SYS_BUTTON4UP 0x0001 /* 7th byte */ #define MOUSE_SYS_BUTTON5UP 0x0002 #define MOUSE_SYS_BUTTON6UP 0x0004 #define MOUSE_SYS_BUTTON7UP 0x0008 #define MOUSE_SYS_BUTTON8UP 0x0010 #define MOUSE_SYS_BUTTON9UP 0x0020 #define MOUSE_SYS_BUTTON10UP 0x0040 #define MOUSE_SYS_MAXBUTTON 10 #define MOUSE_SYS_STDBUTTONS 0x07 #define MOUSE_SYS_EXTBUTTONS 0x7f /* the others */ /* Mouse remote socket */ #define _PATH_MOUSEREMOTE "/var/run/MouseRemote" #endif /* _SYS_MOUSE_H_ */ Index: projects/pnfs-planb-server/sys/x86/x86/nexus.c =================================================================== --- projects/pnfs-planb-server/sys/x86/x86/nexus.c (revision 334966) +++ projects/pnfs-planb-server/sys/x86/x86/nexus.c (revision 334967) @@ -1,907 +1,908 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This code implements a `root nexus' for Intel Architecture * machines. The function of the root nexus is to serve as an * attachment point for both processors and buses, and to manage * resources which are common to all of them. In particular, * this code implements the core resource managers for interrupt * requests, DMA requests (which rightfully should be a part of the * ISA code but it's easier to do it here for now), I/O port addresses, * and I/O memory address space. */ #ifdef __amd64__ #define DEV_APIC #else #include "opt_apic.h" #endif #include "opt_isa.h" +#include "opt_pci.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include "pcib_if.h" #endif #ifdef DEV_ISA #include #include #endif #include #define ELF_KERN_STR ("elf"__XSTRING(__ELF_WORD_SIZE)" kernel") static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); #define DEVTONX(dev) ((struct nexus_device *)device_get_ivars(dev)) struct rman irq_rman, drq_rman, port_rman, mem_rman; static int nexus_probe(device_t); static int nexus_attach(device_t); static int nexus_print_all_resources(device_t dev); static int nexus_print_child(device_t, device_t); static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit); static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, rman_res_t, rman_res_t, rman_res_t, u_int); static int nexus_adjust_resource(device_t, device_t, int, struct resource *, rman_res_t, rman_res_t); #ifdef SMP static int nexus_bind_intr(device_t, device_t, struct resource *, int); #endif static int nexus_config_intr(device_t, int, enum intr_trigger, enum intr_polarity); static int nexus_describe_intr(device_t dev, device_t child, struct resource *irq, void *cookie, const char *descr); static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static int nexus_deactivate_resource(device_t, device_t, int, int, struct resource *); static int nexus_map_resource(device_t bus, device_t child, int type, struct resource *r, struct resource_map_request *argsp, struct resource_map *map); static int nexus_unmap_resource(device_t bus, device_t child, int type, struct resource *r, struct resource_map *map); static int nexus_release_resource(device_t, device_t, int, int, struct resource *); static int nexus_setup_intr(device_t, device_t, struct resource *, int flags, driver_filter_t filter, void (*)(void *), void *, void **); static int nexus_teardown_intr(device_t, device_t, struct resource *, void *); static struct resource_list *nexus_get_reslist(device_t dev, device_t child); static int nexus_set_resource(device_t, device_t, int, int, rman_res_t, rman_res_t); static int nexus_get_resource(device_t, device_t, int, int, rman_res_t *, rman_res_t *); static void nexus_delete_resource(device_t, device_t, int, int); static int nexus_get_cpus(device_t, device_t, enum cpu_sets, size_t, cpuset_t *); -#ifdef DEV_APIC +#if defined(DEV_APIC) && defined(DEV_PCI) static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs); static int nexus_alloc_msix(device_t pcib, device_t dev, int *irq); static int nexus_release_msix(device_t pcib, device_t dev, int irq); static int nexus_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data); #endif static device_method_t nexus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_probe), DEVMETHOD(device_attach, nexus_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, nexus_print_child), DEVMETHOD(bus_add_child, nexus_add_child), DEVMETHOD(bus_alloc_resource, nexus_alloc_resource), DEVMETHOD(bus_adjust_resource, nexus_adjust_resource), DEVMETHOD(bus_release_resource, nexus_release_resource), DEVMETHOD(bus_activate_resource, nexus_activate_resource), DEVMETHOD(bus_deactivate_resource, nexus_deactivate_resource), DEVMETHOD(bus_map_resource, nexus_map_resource), DEVMETHOD(bus_unmap_resource, nexus_unmap_resource), DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), #ifdef SMP DEVMETHOD(bus_bind_intr, nexus_bind_intr), #endif DEVMETHOD(bus_config_intr, nexus_config_intr), DEVMETHOD(bus_describe_intr, nexus_describe_intr), DEVMETHOD(bus_get_resource_list, nexus_get_reslist), DEVMETHOD(bus_set_resource, nexus_set_resource), DEVMETHOD(bus_get_resource, nexus_get_resource), DEVMETHOD(bus_delete_resource, nexus_delete_resource), DEVMETHOD(bus_get_cpus, nexus_get_cpus), /* pcib interface */ -#ifdef DEV_APIC +#if defined(DEV_APIC) && defined(DEV_PCI) DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi), DEVMETHOD(pcib_release_msi, nexus_release_msi), DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix), DEVMETHOD(pcib_release_msix, nexus_release_msix), DEVMETHOD(pcib_map_msi, nexus_map_msi), #endif { 0, 0 } }; DEFINE_CLASS_0(nexus, nexus_driver, nexus_methods, 1); static devclass_t nexus_devclass; DRIVER_MODULE(nexus, root, nexus_driver, nexus_devclass, 0, 0); static int nexus_probe(device_t dev) { device_quiet(dev); /* suppress attach message for neatness */ return (BUS_PROBE_GENERIC); } void nexus_init_resources(void) { int irq; /* * XXX working notes: * * - IRQ resource creation should be moved to the PIC/APIC driver. * - DRQ resource creation should be moved to the DMAC driver. * - The above should be sorted to probe earlier than any child buses. * * - Leave I/O and memory creation here, as child probes may need them. * (especially eg. ACPI) */ /* * IRQ's are on the mainboard on old systems, but on the ISA part * of PCI->ISA bridges. There would be multiple sets of IRQs on * multi-ISA-bus systems. PCI interrupts are routed to the ISA * component, so in a way, PCI can be a partial child of an ISA bus(!). * APIC interrupts are global though. */ irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; irq_rman.rm_end = NUM_IO_INTS - 1; if (rman_init(&irq_rman)) panic("nexus_init_resources irq_rman"); /* * We search for regions of existing IRQs and add those to the IRQ * resource manager. */ for (irq = 0; irq < NUM_IO_INTS; irq++) if (intr_lookup_source(irq) != NULL) if (rman_manage_region(&irq_rman, irq, irq) != 0) panic("nexus_init_resources irq_rman add"); /* * ISA DMA on PCI systems is implemented in the ISA part of each * PCI->ISA bridge and the channels can be duplicated if there are * multiple bridges. (eg: laptops with docking stations) */ drq_rman.rm_start = 0; drq_rman.rm_end = 7; drq_rman.rm_type = RMAN_ARRAY; drq_rman.rm_descr = "DMA request lines"; /* XXX drq 0 not available on some machines */ if (rman_init(&drq_rman) || rman_manage_region(&drq_rman, drq_rman.rm_start, drq_rman.rm_end)) panic("nexus_init_resources drq_rman"); /* * However, IO ports and Memory truely are global at this level, * as are APIC interrupts (however many IO APICS there turn out * to be on large systems..) */ port_rman.rm_start = 0; port_rman.rm_end = 0xffff; port_rman.rm_type = RMAN_ARRAY; port_rman.rm_descr = "I/O ports"; if (rman_init(&port_rman) || rman_manage_region(&port_rman, 0, 0xffff)) panic("nexus_init_resources port_rman"); mem_rman.rm_start = 0; #ifndef PAE mem_rman.rm_end = BUS_SPACE_MAXADDR; #else mem_rman.rm_end = ((1ULL << cpu_maxphyaddr) - 1); #endif mem_rman.rm_type = RMAN_ARRAY; mem_rman.rm_descr = "I/O memory addresses"; if (rman_init(&mem_rman) || rman_manage_region(&mem_rman, 0, mem_rman.rm_end)) panic("nexus_init_resources mem_rman"); } static int nexus_attach(device_t dev) { nexus_init_resources(); bus_generic_probe(dev); /* * Explicitly add the legacy0 device here. Other platform * types (such as ACPI), use their own nexus(4) subclass * driver to override this routine and add their own root bus. */ if (BUS_ADD_CHILD(dev, 10, "legacy", 0) == NULL) panic("legacy: could not attach"); bus_generic_attach(dev); return 0; } static int nexus_print_all_resources(device_t dev) { struct nexus_device *ndev = DEVTONX(dev); struct resource_list *rl = &ndev->nx_resources; int retval = 0; if (STAILQ_FIRST(rl)) retval += printf(" at"); retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#jx"); retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#jx"); retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd"); return retval; } static int nexus_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += nexus_print_all_resources(child); if (device_get_flags(child)) retval += printf(" flags %#x", device_get_flags(child)); retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */ return (retval); } static device_t nexus_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct nexus_device *ndev; ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO); if (!ndev) return(0); resource_list_init(&ndev->nx_resources); child = device_add_child_ordered(bus, order, name, unit); /* should we free this in nexus_child_detached? */ device_set_ivars(child, ndev); return(child); } static struct rman * nexus_rman(int type) { switch (type) { case SYS_RES_IRQ: return (&irq_rman); case SYS_RES_DRQ: return (&drq_rman); case SYS_RES_IOPORT: return (&port_rman); case SYS_RES_MEMORY: return (&mem_rman); default: return (NULL); } } /* * Allocate a resource on behalf of child. NB: child is usually going to be a * child of one of our descendants, not a direct child of nexus0. * (Exceptions include npx.) */ static struct resource * nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct nexus_device *ndev = DEVTONX(child); struct resource *rv; struct resource_list_entry *rle; struct rman *rm; int needactivate = flags & RF_ACTIVE; /* * If this is an allocation of the "default" range for a given * RID, and we know what the resources for this device are * (ie. they aren't maintained by a child bus), then work out * the start/end values. */ if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) { if (device_get_parent(child) != bus || ndev == NULL) return(NULL); rle = resource_list_find(&ndev->nx_resources, type, *rid); if (rle == NULL) return(NULL); start = rle->start; end = rle->end; count = rle->count; } flags &= ~RF_ACTIVE; rm = nexus_rman(type); if (rm == NULL) return (NULL); rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == NULL) return 0; rman_set_rid(rv, *rid); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return 0; } } return rv; } static int nexus_adjust_resource(device_t bus, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end) { struct rman *rm; rm = nexus_rman(type); if (rm == NULL) return (ENXIO); if (!rman_is_region_manager(r, rm)) return (EINVAL); return (rman_adjust_resource(r, start, end)); } static int nexus_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { struct resource_map map; int error; error = rman_activate_resource(r); if (error != 0) return (error); if (!(rman_get_flags(r) & RF_UNMAPPED) && (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT)) { error = nexus_map_resource(bus, child, type, r, NULL, &map); if (error) { rman_deactivate_resource(r); return (error); } rman_set_mapping(r,&map); } return (0); } static int nexus_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { struct resource_map map; int error; error = rman_deactivate_resource(r); if (error) return (error); if (!(rman_get_flags(r) & RF_UNMAPPED) && (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT)) { rman_get_mapping(r, &map); nexus_unmap_resource(bus, child, type, r, &map); } return (0); } static int nexus_map_resource(device_t bus, device_t child, int type, struct resource *r, struct resource_map_request *argsp, struct resource_map *map) { struct resource_map_request args; rman_res_t end, length, start; /* Resources must be active to be mapped. */ if (!(rman_get_flags(r) & RF_ACTIVE)) return (ENXIO); /* Mappings are only supported on I/O and memory resources. */ switch (type) { case SYS_RES_IOPORT: case SYS_RES_MEMORY: break; default: return (EINVAL); } resource_init_map_request(&args); if (argsp != NULL) bcopy(argsp, &args, imin(argsp->size, args.size)); start = rman_get_start(r) + args.offset; if (args.length == 0) length = rman_get_size(r); else length = args.length; end = start + length - 1; if (start > rman_get_end(r) || start < rman_get_start(r)) return (EINVAL); if (end > rman_get_end(r) || end < start) return (EINVAL); /* * If this is a memory resource, map it into the kernel. */ switch (type) { case SYS_RES_IOPORT: map->r_bushandle = start; map->r_bustag = X86_BUS_SPACE_IO; map->r_size = length; map->r_vaddr = NULL; break; case SYS_RES_MEMORY: map->r_vaddr = pmap_mapdev_attr(start, length, args.memattr); map->r_bustag = X86_BUS_SPACE_MEM; map->r_size = length; /* * The handle is the virtual address. */ map->r_bushandle = (bus_space_handle_t)map->r_vaddr; break; } return (0); } static int nexus_unmap_resource(device_t bus, device_t child, int type, struct resource *r, struct resource_map *map) { /* * If this is a memory resource, unmap it. */ switch (type) { case SYS_RES_MEMORY: pmap_unmapdev((vm_offset_t)map->r_vaddr, map->r_size); /* FALLTHROUGH */ case SYS_RES_IOPORT: break; default: return (EINVAL); } return (0); } static int nexus_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { if (rman_get_flags(r) & RF_ACTIVE) { int error = bus_deactivate_resource(child, type, rid, r); if (error) return error; } return (rman_release_resource(r)); } /* * Currently this uses the really grody interface from kern/kern_intr.c * (which really doesn't belong in kern/anything.c). Eventually, all of * the code in kern_intr.c and machdep_intr.c should get moved here, since * this is going to be the official interface. */ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, driver_filter_t filter, void (*ihand)(void *), void *arg, void **cookiep) { int error, domain; /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) panic("nexus_setup_intr: NULL irq resource!"); *cookiep = NULL; if ((rman_get_flags(irq) & RF_SHAREABLE) == 0) flags |= INTR_EXCL; /* * We depend here on rman_activate_resource() being idempotent. */ error = rman_activate_resource(irq); if (error) return (error); if (bus_get_domain(child, &domain) != 0) domain = 0; error = intr_add_handler(device_get_nameunit(child), rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain); return (error); } static int nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih) { return (intr_remove_handler(ih)); } #ifdef SMP static int nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu) { return (intr_bind(rman_get_start(irq), cpu)); } #endif static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) { return (intr_config_intr(irq, trig, pol)); } static int nexus_describe_intr(device_t dev, device_t child, struct resource *irq, void *cookie, const char *descr) { return (intr_describe(rman_get_start(irq), cookie, descr)); } static struct resource_list * nexus_get_reslist(device_t dev, device_t child) { struct nexus_device *ndev = DEVTONX(child); return (&ndev->nx_resources); } static int nexus_set_resource(device_t dev, device_t child, int type, int rid, rman_res_t start, rman_res_t count) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; /* XXX this should return a success/failure indicator */ resource_list_add(rl, type, rid, start, start + count - 1, count); return(0); } static int nexus_get_resource(device_t dev, device_t child, int type, int rid, rman_res_t *startp, rman_res_t *countp) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; struct resource_list_entry *rle; rle = resource_list_find(rl, type, rid); if (!rle) return(ENOENT); if (startp) *startp = rle->start; if (countp) *countp = rle->count; return(0); } static void nexus_delete_resource(device_t dev, device_t child, int type, int rid) { struct nexus_device *ndev = DEVTONX(child); struct resource_list *rl = &ndev->nx_resources; resource_list_delete(rl, type, rid); } static int nexus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize, cpuset_t *cpuset) { switch (op) { #ifdef SMP case INTR_CPUS: if (setsize != sizeof(cpuset_t)) return (EINVAL); *cpuset = intr_cpus; return (0); #endif default: return (bus_generic_get_cpus(dev, child, op, setsize, cpuset)); } } /* Called from the MSI code to add new IRQs to the IRQ rman. */ void nexus_add_irq(u_long irq) { if (rman_manage_region(&irq_rman, irq, irq) != 0) panic("%s: failed", __func__); } -#ifdef DEV_APIC +#if defined(DEV_APIC) && defined(DEV_PCI) static int nexus_alloc_msix(device_t pcib, device_t dev, int *irq) { return (msix_alloc(dev, irq)); } static int nexus_release_msix(device_t pcib, device_t dev, int irq) { return (msix_release(irq)); } static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { return (msi_alloc(dev, count, maxcount, irqs)); } static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs) { return (msi_release(irqs, count)); } static int nexus_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { return (msi_map(irq, addr, data)); } -#endif +#endif /* DEV_APIC && DEV_PCI */ /* Placeholder for system RAM. */ static void ram_identify(driver_t *driver, device_t parent) { if (resource_disabled("ram", 0)) return; if (BUS_ADD_CHILD(parent, 0, "ram", 0) == NULL) panic("ram_identify"); } static int ram_probe(device_t dev) { device_quiet(dev); device_set_desc(dev, "System RAM"); return (0); } static int ram_attach(device_t dev) { struct bios_smap *smapbase, *smap, *smapend; struct resource *res; vm_paddr_t *p; caddr_t kmdp; uint32_t smapsize; int error, rid; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type(ELF_KERN_STR); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); rid = 0; for (smap = smapbase; smap < smapend; smap++) { if (smap->type != SMAP_TYPE_MEMORY || smap->length == 0) continue; #ifdef __i386__ /* * Resources use long's to track resources, so * we can't include memory regions above 4GB. */ if (smap->base > ~0ul) continue; #endif error = bus_set_resource(dev, SYS_RES_MEMORY, rid, smap->base, smap->length); if (error) panic( "ram_attach: resource %d failed set with %d", rid, error); res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 0); if (res == NULL) panic("ram_attach: resource %d failed to attach", rid); rid++; } return (0); } /* * If the system map is not available, fall back to using * dump_avail[]. We use the dump_avail[] array rather than * phys_avail[] for the memory map as phys_avail[] contains * holes for kernel memory, page 0, the message buffer, and * the dcons buffer. We test the end address in the loop * instead of the start since the start address for the first * segment is 0. */ for (rid = 0, p = dump_avail; p[1] != 0; rid++, p += 2) { #ifdef PAE /* * Resources use long's to track resources, so we can't * include memory regions above 4GB. */ if (p[0] > ~0ul) break; #endif error = bus_set_resource(dev, SYS_RES_MEMORY, rid, p[0], p[1] - p[0]); if (error) panic("ram_attach: resource %d failed set with %d", rid, error); res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 0); if (res == NULL) panic("ram_attach: resource %d failed to attach", rid); } return (0); } static device_method_t ram_methods[] = { /* Device interface */ DEVMETHOD(device_identify, ram_identify), DEVMETHOD(device_probe, ram_probe), DEVMETHOD(device_attach, ram_attach), { 0, 0 } }; static driver_t ram_driver = { "ram", ram_methods, 1, /* no softc */ }; static devclass_t ram_devclass; DRIVER_MODULE(ram, nexus, ram_driver, ram_devclass, 0, 0); #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system * resources. */ static struct isa_pnp_id sysresource_ids[] = { { 0x010cd041 /* PNP0c01 */, "System Memory" }, { 0x020cd041 /* PNP0c02 */, "System Resource" }, { 0 } }; static int sysresource_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, sysresource_ids)) <= 0) { device_quiet(dev); } return(result); } static int sysresource_attach(device_t dev) { return(0); } static device_method_t sysresource_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sysresource_probe), DEVMETHOD(device_attach, sysresource_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t sysresource_driver = { "sysresource", sysresource_methods, 1, /* no softc */ }; static devclass_t sysresource_devclass; DRIVER_MODULE(sysresource, isa, sysresource_driver, sysresource_devclass, 0, 0); ISA_PNP_INFO(sysresource_ids); #endif /* DEV_ISA */ Index: projects/pnfs-planb-server/sys =================================================================== --- projects/pnfs-planb-server/sys (revision 334966) +++ projects/pnfs-planb-server/sys (revision 334967) Property changes on: projects/pnfs-planb-server/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r334905-334966