diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 646524717c2c..7551cc5c8b93 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -1,326 +1,342 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD$ */ /* * Interrupt entry points for external interrupts triggered by I/O APICs * as well as IPI handlers. */ #include "opt_smp.h" #include #include #include "assym.s" /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word * in the ISR. The handler determines the highest bit set in the ISR, * translates that into a vector, and passes the vector to the * lapic_handle_intr() function. */ #define ISR_VEC(index, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ FAKE_MCOUNT(TF_RIP(%rsp)) ; \ movq lapic, %rdx ; /* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ jz 1f ; \ addl $(32 * index),%eax ; \ movq %rsp, %rsi ; \ movl %eax, %edi ; /* pass the IRQ */ \ call lapic_handle_intr ; \ 1: ; \ MEXITCOUNT ; \ jmp doreti /* * Handle "spurious INTerrupts". * Notes: * This is different than the "spurious INTerrupt" generated by an * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ .text SUPERALIGN_TEXT IDTVEC(spuriousint) /* No EOI cycle used here */ jmp doreti_iret ISR_VEC(1, apic_isr1) ISR_VEC(2, apic_isr2) ISR_VEC(3, apic_isr3) ISR_VEC(4, apic_isr4) ISR_VEC(5, apic_isr5) ISR_VEC(6, apic_isr6) ISR_VEC(7, apic_isr7) /* * Local APIC periodic timer handler. */ .text SUPERALIGN_TEXT IDTVEC(timerint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call lapic_handle_timer MEXITCOUNT jmp doreti /* * Local APIC CMCI handler. */ .text SUPERALIGN_TEXT IDTVEC(cmcint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) call lapic_handle_cmc MEXITCOUNT jmp doreti /* * Local APIC error interrupt handler. */ .text SUPERALIGN_TEXT IDTVEC(errorint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) call lapic_handle_error MEXITCOUNT jmp doreti +#ifdef XENHVM +/* + * Xen event channel upcall interrupt handler. + * Only used when the hypervisor supports direct vector callbacks. + */ + .text + SUPERALIGN_TEXT +IDTVEC(xen_intr_upcall) + PUSH_FRAME + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp, %rdi + call xen_intr_handle_upcall + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invltlb) #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME movl PCPU(CPUID), %eax #ifdef COUNT_XINVLTLB_HITS incl xhits_gbl(,%rax,4) #endif #ifdef COUNT_IPIS movq ipi_invltlb_counts(,%rax,8),%rax incq (%rax) #endif POP_FRAME #endif pushq %rax movq %cr3, %rax /* invalidate the TLB */ movq %rax, %cr3 movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rax jmp doreti_iret /* * Single page TLB shootdown */ .text SUPERALIGN_TEXT IDTVEC(invlpg) #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME movl PCPU(CPUID), %eax #ifdef COUNT_XINVLTLB_HITS incl xhits_pg(,%rax,4) #endif #ifdef COUNT_IPIS movq ipi_invlpg_counts(,%rax,8),%rax incq (%rax) #endif POP_FRAME #endif pushq %rax movq smp_tlb_addr1, %rax invlpg (%rax) /* invalidate single page */ movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rax jmp doreti_iret /* * Page range TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invlrng) #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) PUSH_FRAME movl PCPU(CPUID), %eax #ifdef COUNT_XINVLTLB_HITS incl xhits_rng(,%rax,4) #endif #ifdef COUNT_IPIS movq ipi_invlrng_counts(,%rax,8),%rax incq (%rax) #endif POP_FRAME #endif pushq %rax pushq %rdx movq smp_tlb_addr1, %rdx movq smp_tlb_addr2, %rax 1: invlpg (%rdx) /* invalidate single page */ addq $PAGE_SIZE, %rdx cmpq %rax, %rdx jb 1b movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rdx popq %rax jmp doreti_iret /* * Invalidate cache. */ .text SUPERALIGN_TEXT IDTVEC(invlcache) #ifdef COUNT_IPIS PUSH_FRAME movl PCPU(CPUID), %eax movq ipi_invlcache_counts(,%rax,8),%rax incq (%rax) POP_FRAME #endif pushq %rax wbinvd movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popq %rax jmp doreti_iret /* * Handler for IPIs sent via the per-cpu IPI bitmap. */ .text SUPERALIGN_TEXT IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME movq lapic, %rdx movl $0, LA_EOI(%rdx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(TF_RIP(%rsp)) call ipi_bitmap_handler MEXITCOUNT jmp doreti /* * Executed by a CPU when it receives an IPI_STOP from another CPU. */ .text SUPERALIGN_TEXT IDTVEC(cpustop) PUSH_FRAME movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ call cpustop_handler jmp doreti /* * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. */ .text SUPERALIGN_TEXT IDTVEC(cpususpend) PUSH_FRAME call cpususpend_handler movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ jmp doreti /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. * * - Calls the generic rendezvous action function. */ .text SUPERALIGN_TEXT IDTVEC(rendezvous) PUSH_FRAME #ifdef COUNT_IPIS movl PCPU(CPUID), %eax movq ipi_rendezvous_counts(,%rax,8), %rax incq (%rax) #endif call smp_rendezvous_action movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ jmp doreti #endif /* SMP */ diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 7a39ef8f43b0..7f7e54a5a191 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1,2594 +1,2592 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atalk.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_sched.h" #include "opt_kdtrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_ATPIC #include #else #include #endif #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern u_int64_t hammer_time(u_int64_t, u_int64_t); extern void printcpuinfo(void); /* XXX header file */ extern void identify_cpu(void); extern void panicifcpuunsupported(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its value is * the physical address at which the kernel is loaded. */ extern char kernphys[]; #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif struct msgbuf *msgbufp; /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; struct mtx dt_lock; /* lock for GDT and LDT */ static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif realmem = Maxmem; /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct pcb *pcb; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; pcb = td->td_pcb; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, sizeof(sf.sf_uc.uc_mcontext.mc_spare)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned long)sp & ~0x3Ful); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = p->p_sysent->sv_sigcode_base; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct pcb *pcb; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; long rflags; int cs, error, ret; ksiginfo_t ksi; pcb = td->td_pcb; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) { uprintf("pid %d (%s): sigreturn copyin failed\n", p->p_pid, td->td_name); return (error); } ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, td->td_name, rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) { uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sys_sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* Not applicable */ } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { uint64_t tsc1, tsc2; uint64_t acnt, mcnt, perf; register_t reg; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); /* * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, * DELAY(9) based logic fails. */ if (tsc_is_invariant && !tsc_perf_stat) return (EOPNOTSUPP); #ifdef SMP if (smp_cpus > 1) { /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); } #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); if (tsc_is_invariant) { wrmsr(MSR_MPERF, 0); wrmsr(MSR_APERF, 0); tsc1 = rdtsc(); DELAY(1000); mcnt = rdmsr(MSR_MPERF); acnt = rdmsr(MSR_APERF); tsc2 = rdtsc(); intr_restore(reg); perf = 1000 * acnt / mcnt; *rate = (tsc2 - tsc1) * perf; } else { tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); *rate = (tsc2 - tsc1) * 1000; } #ifdef SMP if (smp_cpus > 1) { thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } #endif return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) halt(); } void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ TUNABLE_INT("machdep.idle_mwait", &idle_mwait); SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, 0, "Use MONITOR/MWAIT for short idle"); #define STATE_RUNNING 0x0 #define STATE_MWAIT 0x1 #define STATE_SLEEPING 0x2 static void cpu_idle_acpi(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) cpu_idle_hook(sbt); else __asm __volatile("sti; hlt"); *state = STATE_RUNNING; } static void cpu_idle_hlt(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; /* * Since we may be in a critical section from cpu_idle(), if * an interrupt fires during that critical section we may have * a pending preemption. If the CPU halts, then that thread * may not execute until a later interrupt awakens the CPU. * To handle this race, check for a runnable thread after * disabling interrupts and immediately return if one is * found. Also, we must absolutely guarentee that hlt is * the next instruction after sti. This ensures that any * interrupt that fires after the call to disable_intr() will * immediately awaken the CPU from hlt. Finally, please note * that on x86 this works fine because of interrupts enabled only * after the instruction following sti takes place, while IF is set * to 1 immediately, allowing hlt instruction to acknowledge the * interrupt. */ disable_intr(); if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); *state = STATE_RUNNING; } /* * MWAIT cpu power states. Lower 4 bits are sub-states. */ #define MWAIT_C0 0xf0 #define MWAIT_C1 0x00 #define MWAIT_C2 0x10 #define MWAIT_C3 0x20 #define MWAIT_C4 0x30 static void cpu_idle_mwait(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_MWAIT; /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) { enable_intr(); *state = STATE_RUNNING; return; } cpu_monitor(state, 0, 0); if (*state == STATE_MWAIT) __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); else enable_intr(); *state = STATE_RUNNING; } static void cpu_idle_spin(sbintime_t sbt) { int *state; int i; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_RUNNING; /* * The sched_runnable() call is racy but as long as there is * a loop missing it one time will have just a little impact if any * (and it is much better than missing the check at all). */ for (i = 0; i < 1000; i++) { if (sched_runnable()) return; cpu_spinwait(); } } /* * C1E renders the local APIC timer dead, so we disable it by * reading the Interrupt Pending Message register and clearing * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). * * Reference: * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" * #32559 revision 3.00+ */ #define MSR_AMDK8_IPM 0xc0010055 #define AMDK8_SMIONCMPHALT (1ULL << 27) #define AMDK8_C1EONCMPHALT (1ULL << 28) #define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) static void cpu_probe_amdc1e(void) { /* * Detect the presence of C1E capability mostly on latest * dual-cores (or future) k8 family. */ if (cpu_vendor_id == CPU_VENDOR_AMD && (cpu_id & 0x00000f00) == 0x00000f00 && (cpu_id & 0x0fff0000) >= 0x00040000) { cpu_ident_amdc1e = 1; } } void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; void cpu_idle(int busy) { uint64_t msr; sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); #ifdef MP_WATCHDOG ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { cpu_idle_mwait(busy); goto out; } } /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); sbt = cpu_idleclock(); } /* Apply AMD APIC timer C1E workaround. */ if (cpu_ident_amdc1e && cpu_disable_deep_sleep) { msr = rdmsr(MSR_AMDK8_IPM); if (msr & AMDK8_CMPHALT) wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); } /* Call main idle method. */ cpu_idle_fn(sbt); /* Switch timers mack into active mode. */ if (!busy) { cpu_activeclock(); critical_exit(); } out: CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; int *state; pcpu = pcpu_find(cpu); state = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ if (*state == STATE_SLEEPING) return (0); if (*state == STATE_MWAIT) *state = STATE_RUNNING; return (1); } /* * Ordered by speed/power consumption. */ struct { void *id_fn; char *id_name; } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, { cpu_idle_hlt, "hlt" }, { cpu_idle_acpi, "acpi" }, { NULL, NULL } }; static int idle_sysctl_available(SYSCTL_HANDLER_ARGS) { char *avail, *p; int error; int i; avail = malloc(256, M_TEMP, M_WAITOK); p = avail; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; p += sprintf(p, "%s%s", p != avail ? ", " : "", idle_tbl[i].id_name); } error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, idle_sysctl_available, "A", "list of available idle functions"); static int idle_sysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; char *p; int i; p = "unknown"; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (idle_tbl[i].id_fn == cpu_idle_fn) { p = idle_tbl[i].id_name; break; } } strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; if (strcmp(idle_tbl[i].id_name, buf)) continue; cpu_idle_fn = idle_tbl[i].id_fn; return (0); } return (EINVAL); } SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; td->td_retval[1] = 0; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); } /* * Initialize amd64 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; /* * Software prototypes -- in more palatable form. * * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same * slots as corresponding segments for i386 kernel. */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GNULL2_SEL 1 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 8 64 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE - 1, .ssd_type = SDT_SYSTSS, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Actually, the TSS is a system descriptor which is double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 LDT Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 12 LDT Descriptor, double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; void setidt(idx, func, typ, dpl, ist) int idx; inthand_t *func; int typ; int dpl; int ist; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), +#endif +#ifdef XENHVM + IDTVEC(xen_intr_upcall), #endif IDTVEC(fast_syscall), IDTVEC(fast_syscall32); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { struct { uint16_t limit; uint64_t base; } __packed idtr, gdtr; uint16_t ldt, tr; __asm __volatile("sidt %0" : "=m" (idtr)); db_printf("idtr\t0x%016lx/%04x\n", (u_long)idtr.base, (u_int)idtr.limit); __asm __volatile("sgdt %0" : "=m" (gdtr)); db_printf("gdtr\t0x%016lx/%04x\n", (u_long)gdtr.base, (u_int)gdtr.limit); __asm __volatile("sldt %0" : "=r" (ldt)); db_printf("ldtr\t0x%04x\n", ldt); __asm __volatile("str %0" : "=r" (tr)); db_printf("tr\t0x%04x\n", tr); db_printf("cr0\t0x%016lx\n", rcr0()); db_printf("cr2\t0x%016lx\n", rcr2()); db_printf("cr3\t0x%016lx\n", rcr3()); db_printf("cr4\t0x%016lx\n", rcr4()); db_printf("EFER\t%016lx\n", rdmsr(MSR_EFER)); db_printf("FEATURES_CTL\t%016lx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); db_printf("DEBUG_CTL\t%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); db_printf("PAT\t%016lx\n", rdmsr(MSR_PAT)); db_printf("GSBASE\t%016lx\n", rdmsr(MSR_GSBASE)); } #endif void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include #include /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. * This is only here to pacify sio. It is NOT FATAL if this doesn't work. * It shouldn't be here. There should probably be an APIC centric * implementation in the apic driver code, if at all. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } #endif u_int basemem; static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); if (smap->length == 0) return (0); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = physmap_idx + 2; for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (smap->base + smap->length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && smap->base + smap->length == physmap[insert_idx]) { physmap[insert_idx] = smap->base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && smap->base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += smap->length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = smap->base; physmap[insert_idx + 1] = smap->base + smap->length; return (1); } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx, da_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; u_long physmem_start, physmem_tunable, memtest; pt_entry_t *pte; struct bios_smap *smapbase, *smap, *smapend; u_int32_t smapsize; quad_t dcons_addr, dcons_size; bzero(physmap, sizeof(physmap)); basemem = 0; physmap_idx = 0; /* * get memory map from INT 15:E820, kindly supplied by the loader. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) panic("No BIOS smap info from loader!"); smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, &physmap_idx)) break; /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0) panic("BIOS smap did not include a basemem segment!"); #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * By default enable the memory test on real hardware, and disable * it if we appear to be running in a VM. This avoids touching all * pages unnecessarily, which doesn't matter on real hardware but is * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); /* * Don't allow MAXMEM or hw.physmem to extend the amount of memory * in the system. */ if (Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. * * XXX Some BIOSes corrupt low 64KB between suspend and resume. * By default, mask off the first 16 pages unless we appear to be * running in a VM. */ physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); if (physmem_start < PAGE_SIZE) physmap[0] = PAGE_SIZE; else if (physmem_start >= physmap[1]) physmap[0] = round_page(physmap[1] - PAGE_SIZE); else physmap[0] = round_page(physmem_start); pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= (vm_paddr_t)kernphys && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; struct xstate_hdr *xhdr; u_int64_t msr; char *env; size_t kstack0_sz; thread0.td_kstack = physfree + KERNBASE; thread0.td_kstack_pages = KSTACK_PAGES; kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; bzero((void *)thread0.td_kstack, kstack0_sz); physfree += kstack0_sz; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif /* Init basic tunables, hz etc */ init_param1(); /* * make gdt memory segments */ for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); dpcpu_init((void *)(physfree + KERNBASE), 0); physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); #endif +#ifdef XENHVM + setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); #endif #else #error "have you forgotten the isa device?"; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* * NMI stack, runs on ist2. The pcpu pointer is stored just * above the start of the ist2 stack. */ np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, msgbufsize); fpuinit(); /* * Set up thread0 pcb after fpuinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; /* Ensure the stack is aligned to 16 bytes */ common_tss[0].tss_rsp0 &= ~0xFul; PCPU_SET(rsp0, common_tss[0].tss_rsp0); PCPU_SET(curpcb, thread0.td_pcb); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); _ufssel = GSEL(GUFS32_SEL, SEL_UPL); _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_cr3 = KPML4phys; thread0.td_frame = &proc0_tf; env = getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); -#ifdef XENHVM - if (inw(0x10) == 0x49d2) { - if (bootverbose) - printf("Xen detected: disabling emulated block and network devices\n"); - outw(0x10, 3); - } -#endif - cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_r12 = tf->tf_r12; pcb->pcb_r13 = tf->tf_r13; pcb->pcb_r14 = tf->tf_r14; pcb->pcb_r15 = tf->tf_r15; pcb->pcb_rbp = tf->tf_rbp; pcb->pcb_rbx = tf->tf_rbx; pcb->pcb_rip = tf->tf_rip; pcb->pcb_rsp = tf->tf_rsp; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_rflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_rflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; tp = td->td_frame; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; if (tp->tf_flags & TF_HASSEGS) { regs->r_ds = tp->tf_ds; regs->r_es = tp->tf_es; regs->r_fs = tp->tf_fs; regs->r_gs = tp->tf_gs; } else { regs->r_ds = 0; regs->r_es = 0; regs->r_fs = 0; regs->r_gs = 0; } return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; register_t rflags; tp = td->td_frame; rflags = regs->r_rflags & 0xffffffff; if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; if (0) { /* XXXKIB */ tp->tf_ds = regs->r_ds; tp->tf_es = regs->r_es; tp->tf_fs = regs->r_fs; tp->tf_gs = regs->r_gs; tp->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); fpugetregs(td); fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); fpuuserinited(td); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; struct trapframe *tp; pcb = td->td_pcb; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; mcp->mc_rflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; mcp->mc_rflags &= ~PSL_C; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_ds = tp->tf_ds; mcp->mc_es = tp->tf_es; mcp->mc_fs = tp->tf_fs; mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tp; char *xfpustate; long rflags; int ret; pcb = td->td_pcb; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; tp->tf_flags = mcp->mc_flags; if (tp->tf_flags & TF_HASSEGS) { tp->tf_ds = mcp->mc_ds; tp->tf_es = mcp->mc_es; tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { size_t max_len, len; mcp->mc_ownedfp = fpugetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(struct savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { struct savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); } else return (EINVAL); return (error); } void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetuserregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetuserregs()... perhaps we just * have too many layers. */ clear_pcb_flags(curthread->td_pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[8] = 0; dbregs->dr[9] = 0; dbregs->dr[10] = 0; dbregs->dr[11] = 0; dbregs->dr[12] = 0; dbregs->dr[13] = 0; dbregs->dr[14] = 0; dbregs->dr[15] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP or a general protection fault right here. * Upper bits of dr6 and dr7 must not be set */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (td->td_frame->tf_cs == _ucode32sel && DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) return (EINVAL); } if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || (dbregs->dr[7] & 0xffffffff00000000ul) != 0) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; set_pcb_flags(pcb, PCB_DBREGS); } return (0); } void reset_dbregs(void) { load_dr7(0); /* Turn off the control bits first */ load_dr0(0); load_dr1(0); load_dr2(0); load_dr3(0); load_dr6(0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int64_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 79aeb9c32291..267b933cefd2 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1,1497 +1,1506 @@ /*- * Copyright (c) 1996, by Steve Passe * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_sched.h" #include "opt_smp.h" #include #include #include #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#ifdef XENHVM +#include +#endif + #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) /* lock region used by kernel profiling */ int mcount_lock; int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ extern struct pcpu __pcpu[]; /* AP uses this during bootstrap. Do not staticize. */ char *bootSTK; static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; /* Temporary variables for init_secondary() */ char *doublefault_stack; char *nmi_stack; void *dpcpu; struct pcb stoppcbs[MAXCPU]; struct pcb **susppcbs; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; #ifdef COUNT_IPIS /* Interrupt counts. */ static u_long *ipi_preempt_counts[MAXCPU]; static u_long *ipi_ast_counts[MAXCPU]; u_long *ipi_invltlb_counts[MAXCPU]; u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); /* * Local data and functions. */ static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready = 0; /* * Store data from cpu_add() until later in the boot when we actually setup * the APs. */ struct cpu_info { int cpu_present:1; int cpu_bsp:1; int cpu_disabled:1; int cpu_hyperthread:1; } static cpu_info[MAX_APIC_ID + 1]; int cpu_apic_ids[MAXCPU]; int apic_cpuids[MAX_APIC_ID + 1]; /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; static int cpu_logical; /* logical cpus per core */ static int cpu_cores; /* cores per package */ static void assign_cpu_ids(void); static void set_interrupt_apic_ids(void); static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ static int hyperthreading_allowed = 1; static u_int bootMP_size; static void mem_range_AP_init(void) { if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) mem_range_softc.mr_op->initAP(&mem_range_softc); } static void topo_probe_amd(void) { int core_id_bits; int id; /* AMD processors do not support HTT. */ cpu_logical = 1; if ((amd_feature2 & AMDID2_CMP) == 0) { cpu_cores = 1; return; } core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >> AMDID_COREID_SIZE_SHIFT; if (core_id_bits == 0) { cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; return; } /* Fam 10h and newer should get here. */ for (id = 0; id <= MAX_APIC_ID; id++) { /* Check logical CPU availability. */ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) continue; /* Check if logical CPU has the same package ID. */ if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits)) continue; cpu_cores++; } } /* * Round up to the next power of two, if necessary, and then * take log2. * Returns -1 if argument is zero. */ static __inline int mask_width(u_int x) { return (fls(x << (1 - powerof2(x))) - 1); } static void topo_probe_0x4(void) { u_int p[4]; int pkg_id_bits; int core_id_bits; int max_cores; int max_logical; int id; /* Both zero and one here mean one logical processor per package. */ max_logical = (cpu_feature & CPUID_HTT) != 0 ? (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; if (max_logical <= 1) return; /* * Because of uniformity assumption we examine only * those logical processors that belong to the same * package as BSP. Further, we count number of * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ if (cpu_high >= 0x4) { cpuid_count(0x04, 0, p); max_cores = ((p[0] >> 26) & 0x3f) + 1; } else max_cores = 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; pkg_id_bits = core_id_bits + mask_width(max_cores); for (id = 0; id <= MAX_APIC_ID; id++) { /* Check logical CPU availability. */ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) continue; /* Check if logical CPU has the same package ID. */ if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) continue; cpu_cores++; /* Check if logical CPU has the same package and core IDs. */ if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) cpu_logical++; } KASSERT(cpu_cores >= 1 && cpu_logical >= 1, ("topo_probe_0x4 couldn't find BSP")); cpu_cores /= cpu_logical; hyperthreading_cpus = cpu_logical; } static void topo_probe_0xb(void) { u_int p[4]; int bits; int cnt; int i; int logical; int type; int x; /* We only support three levels for now. */ for (i = 0; i < 3; i++) { cpuid_count(0x0b, i, p); /* Fall back if CPU leaf 11 doesn't really exist. */ if (i == 0 && p[1] == 0) { topo_probe_0x4(); return; } bits = p[0] & 0x1f; logical = p[1] &= 0xffff; type = (p[2] >> 8) & 0xff; if (type == 0 || logical == 0) break; /* * Because of uniformity assumption we examine only * those logical processors that belong to the same * package as BSP. */ for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_disabled) continue; if (x >> bits == boot_cpu_id >> bits) cnt++; } if (type == CPUID_TYPE_SMT) cpu_logical = cnt; else if (type == CPUID_TYPE_CORE) cpu_cores = cnt; } if (cpu_logical == 0) cpu_logical = 1; cpu_cores /= cpu_logical; } /* * Both topology discovery code and code that consumes topology * information assume top-down uniformity of the topology. * That is, all physical packages must be identical and each * core in a package must have the same number of threads. * Topology information is queried only on BSP, on which this * code runs and for which it can query CPUID information. * Then topology is extrapolated on all packages using the * uniformity assumption. */ static void topo_probe(void) { static int cpu_topo_probed = 0; if (cpu_topo_probed) return; CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) topo_probe_amd(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) { /* * See Intel(R) 64 Architecture Processor * Topology Enumeration article for details. * * Note that 0x1 <= cpu_high < 4 case should be * compatible with topo_probe_0x4() logic when * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) * or it should trigger the fallback otherwise. */ if (cpu_high >= 0xb) topo_probe_0xb(); else if (cpu_high >= 0x1) topo_probe_0x4(); } /* * Fallback: assume each logical CPU is in separate * physical package. That is, no multi-core, no SMT. */ if (cpu_cores == 0 || cpu_logical == 0) cpu_cores = cpu_logical = 1; cpu_topo_probed = 1; } struct cpu_group * cpu_topo(void) { int cg_flags; /* * Determine whether any threading flags are * necessry. */ topo_probe(); if (cpu_logical > 1 && hyperthreading_cpus) cg_flags = CG_FLAG_HTT; else if (cpu_logical > 1) cg_flags = CG_FLAG_SMT; else cg_flags = 0; if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { printf("WARNING: Non-uniform processors.\n"); printf("WARNING: Using suboptimal topology.\n"); return (smp_topo_none()); } /* * No multi-core or hyper-threaded. */ if (cpu_logical * cpu_cores == 1) return (smp_topo_none()); /* * Only HTT no multi-core. */ if (cpu_logical > 1 && cpu_cores == 1) return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags)); /* * Only multi-core no HTT. */ if (cpu_cores > 1 && cpu_logical == 1) return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags)); /* * Both HTT and multi-core. */ return (smp_topo_2level(CG_SHARE_L2, cpu_cores, CG_SHARE_L1, cpu_logical, cg_flags)); } /* * Calculate usable address in base memory for AP trampoline code. */ u_int mp_bootaddress(u_int basemem) { bootMP_size = mptramp_end - mptramp_start; boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ if (((basemem * 1024) - boot_address) < bootMP_size) boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ /* 3 levels of page table pages */ mptramp_pagetables = boot_address - (PAGE_SIZE * 3); return mptramp_pagetables; } void cpu_add(u_int apic_id, char boot_cpu) { if (apic_id > MAX_APIC_ID) { panic("SMP: APIC ID %d too high", apic_id); return; } KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", apic_id)); cpu_info[apic_id].cpu_present = 1; if (boot_cpu) { KASSERT(boot_cpu_id == -1, ("CPU %d claims to be BSP, but CPU %d already is", apic_id, boot_cpu_id)); boot_cpu_id = apic_id; cpu_info[apic_id].cpu_bsp = 1; } if (mp_ncpus < MAXCPU) { mp_ncpus++; mp_maxid = mp_ncpus - 1; } if (bootverbose) printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : "AP"); } void cpu_mp_setmaxid(void) { /* * mp_maxid should be already set by calls to cpu_add(). * Just sanity check its value here. */ if (mp_ncpus == 0) KASSERT(mp_maxid == 0, ("%s: mp_ncpus is zero, but mp_maxid is not", __func__)); else if (mp_ncpus == 1) mp_maxid = 0; else KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } int cpu_mp_probe(void) { /* * Always record BSP in CPU map so that the mbuf init code works * correctly. */ CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup * the variables to represent a system with a single CPU * with an id of 0. */ mp_ncpus = 1; return (0); } /* At least one CPU was found. */ if (mp_ncpus == 1) { /* * One CPU was found, so this must be a UP system with * an I/O APIC. */ mp_maxid = 0; return (0); } /* At least two CPUs were found. */ return (1); } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); set_interrupt_apic_ids(); } /* * Print various information about the SMP system hardware and setup. */ void cpu_mp_announce(void) { const char *hyperthread; int i; printf("FreeBSD/SMP: %d package(s) x %d core(s)", mp_ncpus / (cpu_cores * cpu_logical), cpu_cores); if (hyperthreading_cpus > 1) printf(" x %d HTT threads", cpu_logical); else if (cpu_logical > 1) printf(" x %d SMT threads", cpu_logical); printf("\n"); /* List active CPUs first. */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1; i < mp_ncpus; i++) { if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread, cpu_apic_ids[i]); } /* List disabled CPUs last. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled) continue; if (cpu_info[i].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread, i); } } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr, cr0; u_int cpuid; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; /* Set by the startup code for us to use */ cpu = bootAP; /* Init tss */ common_tss[cpu] = common_tss[0]; common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */ common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE; common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; /* The NMI stack runs on IST2. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist2 = (long) np; /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); } ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; lgdt(&ap_gdt); /* does magic intra-segment return */ /* Get per-cpu data */ pc = &__pcpu[cpu]; /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); dpcpu_init(dpcpu, cpu); pc->pc_apic_id = cpu_apic_ids[cpu]; pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_tssp = &common_tss[cpu]; pc->pc_commontssp = &common_tss[cpu]; pc->pc_rsp0 = 0; pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]; pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GUSERLDT_SEL]; /* Save the per-cpu pointer for use by the NMI handler. */ np->np_pcpu = (register_t) pc; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ lidt(&r_idt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); /* Disable local APIC just to be sure. */ lapic_disable(); /* signal our startup to the BSP. */ mp_naps++; /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); /* Initialize the PAT MSR. */ pmap_init_pat(); /* set up CPU registers and state */ cpu_setregs(); /* set up SSE/NX registers */ initializecpu(); /* set up FPU state on the AP */ fpuinit(); +#ifdef XENHVM + /* register vcpu_info area */ + xen_hvm_init_cpu(); +#endif + /* A quick check from sanity claus */ cpuid = PCPU_GET(cpuid); if (PCPU_GET(apic_id) != lapic_id()) { printf("SMP: cpuid = %d\n", cpuid); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); panic("cpuid mismatch! boom!!"); } /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); mca_init(); mtx_lock_spin(&ap_boot_mtx); /* Init local apic for irq's */ lapic_setup(1); /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); smp_cpus++; CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); printf("SMP: AP CPU #%d Launched!\n", cpuid); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) CPU_SET(cpuid, &logical_cpus_mask); if (bootverbose) lapic_dump("AP"); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); smp_active = 1; /* historic */ } /* * Enable global pages TLB extension * This also implicitly flushes the TLB */ load_cr4(rcr4() | CR4_PGE); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); mtx_unlock_spin(&ap_boot_mtx); /* Wait until all the AP's are up. */ while (smp_started == 0) ia32_pause(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ } /******************************************************************* * local functions and data */ /* * We tell the I/O APIC code about all the CPUs we want to receive * interrupts. If we don't want certain CPUs to receive IRQs we * can simply not tell the I/O APIC code about them in this function. * We also do not tell it about the BSP since it tells itself about * the BSP internally to work with UP kernels and on UP machines. */ static void set_interrupt_apic_ids(void) { u_int i, apic_id; for (i = 0; i < MAXCPU; i++) { apic_id = cpu_apic_ids[i]; if (apic_id == -1) continue; if (cpu_info[apic_id].cpu_bsp) continue; if (cpu_info[apic_id].cpu_disabled) continue; /* Don't let hyperthreads service interrupts. */ if (hyperthreading_cpus > 1 && apic_id % hyperthreading_cpus != 0) continue; intr_add_cpu(i); } } /* * Assign logical CPU IDs to local APICs. */ static void assign_cpu_ids(void) { u_int i; TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", &hyperthreading_allowed); /* Check for explicitly disabled CPUs. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) continue; if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; /* * Don't use HT CPU if it has been disabled by a * tunable. */ if (hyperthreading_allowed == 0) { cpu_info[i].cpu_disabled = 1; continue; } } /* Don't use this CPU if it has been disabled by a tunable. */ if (resource_disabled("lapic", i)) { cpu_info[i].cpu_disabled = 1; continue; } } if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { hyperthreading_cpus = 0; cpu_logical = 1; } /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. * * To minimize confusion for userland, we attempt to number * CPUs such that all threads and cores in a package are * grouped together. For now we assume that the BSP is always * the first thread in a package and just start adding APs * starting with the BSP's APIC ID. */ mp_ncpus = 1; cpu_apic_ids[0] = boot_cpu_id; apic_cpuids[boot_cpu_id] = 0; for (i = boot_cpu_id + 1; i != boot_cpu_id; i == MAX_APIC_ID ? i = 0 : i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || cpu_info[i].cpu_disabled) continue; if (mp_ncpus < MAXCPU) { cpu_apic_ids[mp_ncpus] = i; apic_cpuids[i] = mp_ncpus; mp_ncpus++; } else cpu_info[i].cpu_disabled = 1; } KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } /* * start each AP in our list */ static int start_all_aps(void) { vm_offset_t va = boot_address + KERNBASE; u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; int apic_id, cpu, i; u_char mpbiosreason; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* install the AP 1st level boot code */ pmap_kenter(va, boot_address); pmap_invalidate_page(kernel_pmap, va); bcopy(mptramp_start, (void *)va, bootMP_size); /* Locate the page tables, they'll be below the trampoline */ pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* Each slot of the level 4 pages points to the same level 3 page */ pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); pt4[i] |= PG_V | PG_RW | PG_U; /* Each slot of the level 3 pages points to the same level 2 page */ pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, KSTACK_PAGES * PAGE_SIZE, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; /* attempt to start the Application Processor */ if (!start_ap(apic_id)) { /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* number of APs actually started */ return mp_naps; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } #ifdef COUNT_XINVLTLB_HITS u_int xhits_gbl[MAXCPU]; u_int xhits_pg[MAXCPU]; u_int xhits_rng[MAXCPU]; static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, sizeof(xhits_gbl), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, sizeof(xhits_pg), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, sizeof(xhits_rng), "IU", ""); u_int ipi_global; u_int ipi_page; u_int ipi_range; u_int ipi_range_size; SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 0, ""); u_int ipi_masked_global; u_int ipi_masked_page; u_int ipi_masked_range; u_int ipi_masked_range_size; SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, &ipi_masked_global, 0, ""); SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, &ipi_masked_page, 0, ""); SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, &ipi_masked_range, 0, ""); SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, &ipi_masked_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ /* * Init and startup IPI. */ void ipi_startup(int apic_id, int vector) { /* * first we do an INIT IPI: this INIT IPI might be run, resetting * and running the target CPU. OR this INIT IPI might be latched (P5 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be * ignored. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); lapic_ipi_wait(-1); DELAY(10000); /* wait ~10mS */ /* * next we do a STARTUP IPI: the previous INIT IPI might still be * latched, (P5 bug) this 1st STARTUP would then terminate * immediately, and the previously started INIT IPI would continue. OR * the previous INIT IPI has already run. and this STARTUP IPI will * run. OR the previous INIT IPI was ignored. and this STARTUP IPI * will run. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ /* * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ } /* * Send an IPI to specified CPU handling the bitmap logic. */ static void ipi_send_cpu(int cpu, u_int ipi) { u_int bitmap, old_pending, new_pending; KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; do { old_pending = cpu_ipi_pending[cpu]; new_pending = old_pending | bitmap; } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], old_pending, new_pending)); if (old_pending) return; } lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); } /* * Flush the TLB on all other CPU's */ static void smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) { u_int ncpu; ncpu = mp_ncpus - 1; /* does not shootdown self */ if (ncpu < 1) return; /* no other cpus */ if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); ipi_all_but_self(vector); while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } static void smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; if (CPU_ISFULLSET(&mask)) { if (othercpus < 1) return; } else { CPU_CLR(PCPU_GET(cpuid), &mask); if (CPU_EMPTY(&mask)) return; } if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); if (CPU_ISFULLSET(&mask)) { ncpu = othercpus; ipi_all_but_self(vector); } else { ncpu = 0; while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, vector); ipi_send_cpu(cpu, vector); ncpu++; } } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } void smp_cache_flush(void) { if (smp_started) smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); } void smp_invltlb(void) { if (smp_started) { smp_tlb_shootdown(IPI_INVLTLB, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif } } void smp_invlpg(vm_offset_t addr) { if (smp_started) { smp_tlb_shootdown(IPI_INVLPG, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif } } void smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void smp_masked_invltlb(cpuset_t mask) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_global++; #endif } } void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_page++; #endif } } void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_masked_range++; ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void ipi_bitmap_handler(struct trapframe frame) { struct trapframe *oldframe; struct thread *td; int cpu = PCPU_GET(cpuid); u_int ipi_bitmap; critical_enter(); td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = &frame; ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); if (ipi_bitmap & (1 << IPI_PREEMPT)) { #ifdef COUNT_IPIS (*ipi_preempt_counts[cpu])++; #endif sched_preempt(td); } if (ipi_bitmap & (1 << IPI_AST)) { #ifdef COUNT_IPIS (*ipi_ast_counts[cpu])++; #endif /* Nothing to do for AST */ } if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { #ifdef COUNT_IPIS (*ipi_hardclock_counts[cpu])++; #endif hardclockintr(); } td->td_intr_frame = oldframe; td->td_intr_nesting_level--; critical_exit(); } /* * send an IPI to a set of cpus. */ void ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } /* * send an IPI to a specific CPU. */ void ipi_cpu(int cpu, u_int ipi) { /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } /* * send an IPI to all CPUs EXCEPT myself */ void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(other_cpus, ipi); return; } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int ipi_nmi_handler() { u_int cpuid; /* * As long as there is not a simple way to know about a NMI's * source, if the bitmask for the current CPU is present in * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) return (1); CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); cpustop_handler(); return (0); } /* * Handle an IPI_STOP by saving our current context and spinning until we * are resumed. */ void cpustop_handler(void) { u_int cpu; cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); #ifdef DDB amd64_db_resume_dbreg(); #endif if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); cpustop_restartfunc = NULL; } } /* * Handle an IPI_SUSPEND by saving our current context and spinning until we * are resumed. */ void cpususpend_handler(void) { u_int cpu; cpu = PCPU_GET(cpuid); if (savectx(susppcbs[cpu])) { ctx_fpusave(susppcbs[cpu]->pcb_fpususpend); wbinvd(); CPU_SET_ATOMIC(cpu, &suspended_cpus); } else { pmap_init_pat(); initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); /* Indicate that we are resumed */ CPU_CLR_ATOMIC(cpu, &suspended_cpus); } /* Wait for resume */ while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); /* Resume MCA and local APIC */ mca_resume(); lapic_setup(0); CPU_CLR_ATOMIC(cpu, &started_cpus); } /* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ static void release_aps(void *dummy __unused) { if (mp_ncpus == 1) return; atomic_store_rel_int(&aps_ready, 1); while (smp_started == 0) ia32_pause(); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. */ static void mp_ipi_intrcnt(void *dummy) { char buf[64]; int i; CPU_FOREACH(i) { snprintf(buf, sizeof(buf), "cpu%d:invltlb", i); intrcnt_add(buf, &ipi_invltlb_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlrng", i); intrcnt_add(buf, &ipi_invlrng_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlpg", i); intrcnt_add(buf, &ipi_invlpg_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlcache", i); intrcnt_add(buf, &ipi_invlcache_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:preempt", i); intrcnt_add(buf, &ipi_preempt_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:ast", i); intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); #endif diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index ae2f5b90791e..9cd4c95aff8d 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -1,232 +1,233 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_APICVAR_H_ #define _MACHINE_APICVAR_H_ #include /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define MAX_APIC_ID 0xfe #define APIC_ID_ALL 0xff /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 6) /* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ #define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #define LVT_LINT0 0 #define LVT_LINT1 1 #define LVT_TIMER 2 #define LVT_ERROR 3 #define LVT_PMC 4 #define LVT_THERMAL 5 #define LVT_CMCI 6 #define LVT_MAX LVT_CMCI #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI /* * An APIC enumerator is a psuedo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint), IDTVEC(spuriousint), IDTVEC(timerint); extern vm_paddr_t lapic_paddr; extern int apic_cpuids[]; u_int apic_alloc_vector(u_int apic_id, u_int irq); u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align); void apic_disable_vector(u_int apic_id, u_int vector); void apic_enable_vector(u_int apic_id, u_int vector); void apic_free_vector(u_int apic_id, u_int vector, u_int irq); u_int apic_idt_to_irq(u_int apic_id, u_int vector); void apic_register_enumerator(struct apic_enumerator *enumerator); u_int apic_cpuid(u_int apic_id); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); void lapic_create(u_int apic_id, int boot_cpu); void lapic_disable(void); void lapic_disable_pmc(void); void lapic_dump(const char *str); void lapic_enable_cmc(void); int lapic_enable_pmc(void); void lapic_eoi(void); int lapic_id(void); void lapic_init(vm_paddr_t addr); int lapic_intr_pending(u_int vector); void lapic_ipi_raw(register_t icrlo, u_int dest); void lapic_ipi_vectored(u_int vector, int dest); int lapic_ipi_wait(int delay); void lapic_handle_cmc(void); void lapic_handle_error(void); void lapic_handle_intr(int vector, struct trapframe *frame); void lapic_handle_timer(struct trapframe *frame); void lapic_reenable_pmc(void); void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol); int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(int boot); +void xen_intr_handle_upcall(struct trapframe *frame); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h index 8671605f8aed..c8b454d6d783 100644 --- a/sys/amd64/include/intr_machdep.h +++ b/sys/amd64/include/intr_machdep.h @@ -1,174 +1,186 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #ifdef _KERNEL /* * The maximum number of I/O interrupts we allow. This number is rather * arbitrary as it is just the maximum IRQ resource value. The interrupt * source for a given IRQ maps that I/O interrupt to device interrupt * source whether it be a pin on an interrupt controller or an MSI interrupt. * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device * interrupts allocate IDT vectors on demand. Currently we have 191 IDT * vectors available for device interrupts. On many systems with I/O APICs, * a lot of the IRQs are not used, so this number can be much larger than * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. + * IRQ values from 256 to 767 are used by MSI. When running under the Xen + * Hypervisor, IRQ values from 768 to 4863 are available for binding to + * event channel events. We leave 255 unused to avoid confusion since 255 is + * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) +#ifdef XENHVM +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT \ + (FIRST_MSI_INT + NUM_MSI_INTS) +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#else +#define NUM_EVTCHN_INTS 0 +#endif +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 8 counters for each CPU for IPI counters for SMP. */ #ifdef SMP #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU) #else #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif #ifndef LOCORE typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; }; struct trapframe; /* * The following data structure holds per-cpu data, and is placed just * above the top of the space used for the NMI stack. */ struct nmi_pcpu { register_t np_pcpu; register_t __padding; /* pad to 16 bytes */ }; extern struct mtx icu_lock; extern int elcr_found; #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(void); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int *irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index ba4c61870797..1c83c2a625ee 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -1,266 +1,256 @@ /*- * Copyright (c) Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #ifndef _SYS_CDEFS_H_ #error "sys/cdefs.h is a prerequisite for this file" #endif #if defined(XEN) || defined(XENHVM) #ifndef NR_VIRQS #define NR_VIRQS 24 #endif #ifndef NR_IPIS #define NR_IPIS 2 #endif #endif -#ifdef XENHVM -#define PCPU_XEN_FIELDS \ - ; \ - unsigned int pc_last_processed_l1i; \ - unsigned int pc_last_processed_l2i -#else -#define PCPU_XEN_FIELDS -#endif - /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. * The reason for doing it via a struct is so that an array of pointers * to each CPU's data can be set up for things like "check curproc on all * other processors" */ #define PCPU_MD_FIELDS \ char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \ struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \ register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ u_int pc_apic_id; \ u_int pc_acpi_id; /* ACPI CPU id */ \ /* Pointer to the CPU %fs descriptor */ \ struct user_segment_descriptor *pc_fs32p; \ /* Pointer to the CPU %gs descriptor */ \ struct user_segment_descriptor *pc_gs32p; \ /* Pointer to the CPU LDT descriptor */ \ struct system_segment_descriptor *pc_ldt; \ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ - u_int pc_cmci_mask /* MCx banks for CMCI */ \ - PCPU_XEN_FIELDS; \ + u_int pc_cmci_mask; /* MCx banks for CMCI */ \ uint64_t pc_dbreg[16]; /* ddb debugging regs */ \ int pc_dbreg_cmd; /* ddb debugging reg cmd */ \ char __pad[161] /* be divisor of PAGE_SIZE \ after cache alignment */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 #ifdef _KERNEL #ifdef lint extern struct pcpu *pcpup; #define PCPU_GET(member) (pcpup->pc_ ## member) #define PCPU_ADD(member, val) (pcpup->pc_ ## member += (val)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&pcpup->pc_ ## member) #define PCPU_SET(member, val) (pcpup->pc_ ## member = (val)) #elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) /* * Evaluates to the byte offset of the per-cpu variable name. */ #define __pcpu_offset(name) \ __offsetof(struct pcpu, name) /* * Evaluates to the type of the per-cpu variable name. */ #define __pcpu_type(name) \ __typeof(((struct pcpu *)0)->name) /* * Evaluates to the address of the per-cpu variable name. */ #define __PCPU_PTR(name) __extension__ ({ \ __pcpu_type(name) *__p; \ \ __asm __volatile("movq %%gs:%1,%0; addq %2,%0" \ : "=r" (__p) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \ "i" (__pcpu_offset(name))); \ \ __p; \ }) /* * Evaluates to the value of the per-cpu variable name. */ #define __PCPU_GET(name) __extension__ ({ \ __pcpu_type(name) __res; \ struct __s { \ u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ } __s; \ \ if (sizeof(__res) == 1 || sizeof(__res) == 2 || \ sizeof(__res) == 4 || sizeof(__res) == 8) { \ __asm __volatile("mov %%gs:%1,%0" \ : "=r" (__s) \ : "m" (*(struct __s *)(__pcpu_offset(name)))); \ *(struct __s *)(void *)&__res = __s; \ } else { \ __res = *__PCPU_PTR(name); \ } \ __res; \ }) /* * Adds the value to the per-cpu counter name. The implementation * must be atomic with respect to interrupts. */ #define __PCPU_ADD(name, val) do { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4 || sizeof(__val) == 8) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("add %1,%%gs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else \ *__PCPU_PTR(name) += __val; \ } while (0) /* * Increments the value of the per-cpu counter name. The implementation * must be atomic with respect to interrupts. */ #define __PCPU_INC(name) do { \ CTASSERT(sizeof(__pcpu_type(name)) == 1 || \ sizeof(__pcpu_type(name)) == 2 || \ sizeof(__pcpu_type(name)) == 4 || \ sizeof(__pcpu_type(name)) == 8); \ if (sizeof(__pcpu_type(name)) == 1) { \ __asm __volatile("incb %%gs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } else if (sizeof(__pcpu_type(name)) == 2) { \ __asm __volatile("incw %%gs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } else if (sizeof(__pcpu_type(name)) == 4) { \ __asm __volatile("incl %%gs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } else if (sizeof(__pcpu_type(name)) == 8) { \ __asm __volatile("incq %%gs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } \ } while (0) /* * Sets the value of the per-cpu variable name to value val. */ #define __PCPU_SET(name, val) { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__pcpu_type(name)), 8)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4 || sizeof(__val) == 8) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("mov %1,%%gs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else { \ *__PCPU_PTR(name) = __val; \ } \ } #define PCPU_GET(member) __PCPU_GET(pc_ ## member) #define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) #define PCPU_INC(member) __PCPU_INC(pc_ ## member) #define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) #define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) #define OFFSETOF_CURTHREAD 0 #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnull-dereference" #endif static __inline __pure2 struct thread * __curthread(void) { struct thread *td; __asm("movq %%gs:%1,%0" : "=r" (td) : "m" (*(char *)OFFSETOF_CURTHREAD)); return (td); } #ifdef __clang__ #pragma clang diagnostic pop #endif #define curthread (__curthread()) #define OFFSETOF_CURPCB 32 static __inline __pure2 struct pcb * __curpcb(void) { struct pcb *pcb; __asm("movq %%gs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB)); return (pcb); } #define curpcb (__curpcb()) #define IS_BSP() (PCPU_GET(cpuid) == 0) #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */ #error "this file needs to be ported to your compiler" #endif /* lint, etc. */ #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ diff --git a/sys/amd64/include/xen/hypercall.h b/sys/amd64/include/xen/hypercall.h index 50fa376ff90b..a1b2a5cdeb3a 100644 --- a/sys/amd64/include/xen/hypercall.h +++ b/sys/amd64/include/xen/hypercall.h @@ -1,415 +1,417 @@ /****************************************************************************** * hypercall.h * - * Linux-specific hypervisor handling. + * FreeBSD-specific hypervisor handling. * * Copyright (c) 2002-2004, K A Fraser * * 64-bit updates: * Benjamin Liu * Jun Nakajima * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. + * + * $FreeBSD$ */ #ifndef __MACHINE_XEN_HYPERCALL_H__ #define __MACHINE_XEN_HYPERCALL_H__ #include #ifndef __XEN_HYPERVISOR_H__ # error "please don't include this file directly" #endif #define __STR(x) #x #define STR(x) __STR(x) #define ENOXENSYS 38 #define CONFIG_XEN_COMPAT 0x030002 #define __must_check #ifdef XEN #define HYPERCALL_STR(name) \ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)" #else #define HYPERCALL_STR(name) \ "mov $("STR(__HYPERVISOR_##name)" * 32),%%eax; "\ "add hypercall_stubs(%%rip),%%rax; " \ "call *%%rax" #endif #define _hypercall0(type, name) \ ({ \ type __res; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res) \ : \ : "memory" ); \ __res; \ }) #define _hypercall1(type, name, a1) \ ({ \ type __res; \ long __ign1; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=D" (__ign1) \ : "1" ((long)(a1)) \ : "memory" ); \ __res; \ }) #define _hypercall2(type, name, a1, a2) \ ({ \ type __res; \ long __ign1, __ign2; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ : "1" ((long)(a1)), "2" ((long)(a2)) \ : "memory" ); \ __res; \ }) #define _hypercall3(type, name, a1, a2, a3) \ ({ \ type __res; \ long __ign1, __ign2, __ign3; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ "=d" (__ign3) \ : "1" ((long)(a1)), "2" ((long)(a2)), \ "3" ((long)(a3)) \ : "memory" ); \ __res; \ }) #define _hypercall4(type, name, a1, a2, a3, a4) \ ({ \ type __res; \ long __ign1, __ign2, __ign3; \ register long __arg4 __asm__("r10") = (long)(a4); \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ "=d" (__ign3), "+r" (__arg4) \ : "1" ((long)(a1)), "2" ((long)(a2)), \ "3" ((long)(a3)) \ : "memory" ); \ __res; \ }) #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ ({ \ type __res; \ long __ign1, __ign2, __ign3; \ register long __arg4 __asm__("r10") = (long)(a4); \ register long __arg5 __asm__("r8") = (long)(a5); \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \ : "1" ((long)(a1)), "2" ((long)(a2)), \ "3" ((long)(a3)) \ : "memory" ); \ __res; \ }) static inline int __must_check HYPERVISOR_set_trap_table( const trap_info_t *table) { return _hypercall1(int, set_trap_table, table); } static inline int __must_check HYPERVISOR_mmu_update( mmu_update_t *req, unsigned int count, unsigned int *success_count, domid_t domid) { return _hypercall4(int, mmu_update, req, count, success_count, domid); } static inline int __must_check HYPERVISOR_mmuext_op( struct mmuext_op *op, unsigned int count, unsigned int *success_count, domid_t domid) { return _hypercall4(int, mmuext_op, op, count, success_count, domid); } static inline int __must_check HYPERVISOR_set_gdt( unsigned long *frame_list, unsigned int entries) { return _hypercall2(int, set_gdt, frame_list, entries); } static inline int __must_check HYPERVISOR_stack_switch( unsigned long ss, unsigned long esp) { return _hypercall2(int, stack_switch, ss, esp); } static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_address, unsigned long failsafe_address, unsigned long syscall_address) { return _hypercall3(int, set_callbacks, event_address, failsafe_address, syscall_address); } static inline int HYPERVISOR_fpu_taskswitch( int set) { return _hypercall1(int, fpu_taskswitch, set); } static inline int __must_check HYPERVISOR_sched_op_compat( int cmd, unsigned long arg) { return _hypercall2(int, sched_op_compat, cmd, arg); } static inline int __must_check HYPERVISOR_sched_op( int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); } static inline long __must_check HYPERVISOR_set_timer_op( uint64_t timeout) { return _hypercall1(long, set_timer_op, timeout); } static inline int __must_check HYPERVISOR_platform_op( struct xen_platform_op *platform_op) { platform_op->interface_version = XENPF_INTERFACE_VERSION; return _hypercall1(int, platform_op, platform_op); } static inline int __must_check HYPERVISOR_set_debugreg( unsigned int reg, unsigned long value) { return _hypercall2(int, set_debugreg, reg, value); } static inline unsigned long __must_check HYPERVISOR_get_debugreg( unsigned int reg) { return _hypercall1(unsigned long, get_debugreg, reg); } static inline int __must_check HYPERVISOR_update_descriptor( unsigned long ma, unsigned long word) { return _hypercall2(int, update_descriptor, ma, word); } static inline int __must_check HYPERVISOR_memory_op( unsigned int cmd, void *arg) { return _hypercall2(int, memory_op, cmd, arg); } static inline int __must_check HYPERVISOR_multicall( multicall_entry_t *call_list, unsigned int nr_calls) { return _hypercall2(int, multicall, call_list, nr_calls); } static inline int __must_check HYPERVISOR_update_va_mapping( unsigned long va, uint64_t new_val, unsigned long flags) { return _hypercall3(int, update_va_mapping, va, new_val, flags); } static inline int __must_check HYPERVISOR_event_channel_op( int cmd, void *arg) { int rc = _hypercall2(int, event_channel_op, cmd, arg); #if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOXENSYS)) { + if (__predict_false(rc == -ENOXENSYS)) { struct evtchn_op op; op.cmd = cmd; memcpy(&op.u, arg, sizeof(op.u)); rc = _hypercall1(int, event_channel_op_compat, &op); memcpy(arg, &op.u, sizeof(op.u)); } #endif return rc; } static inline int __must_check HYPERVISOR_xen_version( int cmd, void *arg) { return _hypercall2(int, xen_version, cmd, arg); } static inline int __must_check HYPERVISOR_console_io( int cmd, unsigned int count, char *str) { return _hypercall3(int, console_io, cmd, count, str); } static inline int __must_check HYPERVISOR_physdev_op( int cmd, void *arg) { int rc = _hypercall2(int, physdev_op, cmd, arg); #if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOXENSYS)) { + if (__predict_false(rc == -ENOXENSYS)) { struct physdev_op op; op.cmd = cmd; memcpy(&op.u, arg, sizeof(op.u)); rc = _hypercall1(int, physdev_op_compat, &op); memcpy(arg, &op.u, sizeof(op.u)); } #endif return rc; } static inline int __must_check HYPERVISOR_grant_table_op( unsigned int cmd, void *uop, unsigned int count) { return _hypercall3(int, grant_table_op, cmd, uop, count); } static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid) { return _hypercall4(int, update_va_mapping_otherdomain, va, new_val, flags, domid); } static inline int __must_check HYPERVISOR_vm_assist( unsigned int cmd, unsigned int type) { return _hypercall2(int, vm_assist, cmd, type); } static inline int __must_check HYPERVISOR_vcpu_op( int cmd, unsigned int vcpuid, void *extra_args) { return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } static inline int __must_check HYPERVISOR_set_segment_base( int reg, unsigned long value) { return _hypercall2(int, set_segment_base, reg, value); } static inline int __must_check HYPERVISOR_suspend( unsigned long srec) { struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_suspend }; int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, &sched_shutdown, srec); #if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOXENSYS) rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, SHUTDOWN_suspend, srec); #endif return rc; } #if CONFIG_XEN_COMPAT <= 0x030002 static inline int HYPERVISOR_nmi_op( unsigned long op, void *arg) { return _hypercall2(int, nmi_op, op, arg); } #endif #ifndef CONFIG_XEN static inline unsigned long __must_check HYPERVISOR_hvm_op( int op, void *arg) { return _hypercall2(unsigned long, hvm_op, op, arg); } #endif static inline int __must_check HYPERVISOR_callback_op( int cmd, const void *arg) { return _hypercall2(int, callback_op, cmd, arg); } static inline int __must_check HYPERVISOR_xenoprof_op( int op, void *arg) { return _hypercall2(int, xenoprof_op, op, arg); } static inline int __must_check HYPERVISOR_kexec_op( unsigned long op, void *args) { return _hypercall2(int, kexec_op, op, args); } #undef __must_check #endif /* __MACHINE_XEN_HYPERCALL_H__ */ diff --git a/sys/amd64/include/xen/xen-os.h b/sys/amd64/include/xen/xen-os.h index 89743237ff90..ee498b9d09ac 100644 --- a/sys/amd64/include/xen/xen-os.h +++ b/sys/amd64/include/xen/xen-os.h @@ -1,299 +1,132 @@ /****************************************************************************** - * os.h + * amd64/xen/xen-os.h * - * random collection of macros and definition + * Random collection of macros and definition + * + * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ -#ifndef _XEN_OS_H_ -#define _XEN_OS_H_ +#ifndef _MACHINE_XEN_XEN_OS_H_ +#define _MACHINE_XEN_XEN_OS_H_ #ifdef PAE #define CONFIG_X86_PAE #endif -#ifdef LOCORE -#define __ASSEMBLY__ -#endif - -#if !defined(__XEN_INTERFACE_VERSION__) -#define __XEN_INTERFACE_VERSION__ 0x00030208 -#endif - -#define GRANT_REF_INVALID 0xffffffff - -#include - /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ -/* Force a proper event-channel callback from Xen. */ -void force_evtchn_callback(void); - -extern int gdtset; - -extern shared_info_t *HYPERVISOR_shared_info; - /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static inline void rep_nop(void) { __asm__ __volatile__ ( "rep;nop" : : : "memory" ); } #define cpu_relax() rep_nop() -/* crude memory allocator for memory allocation early in - * boot - */ -void *bootmem_alloc(unsigned int size); -void bootmem_free(void *ptr, unsigned int size); - -void printk(const char *fmt, ...); - -/* some function prototypes */ -void trap_init(void); - -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) - -#ifndef XENHVM - -/* - * STI/CLI equivalents. These basically set and clear the virtual - * event_enable flag in the shared_info structure. Note that when - * the enable bit is set, there may be pending events to be handled. - * We may therefore call into do_hypervisor_callback() directly. - */ - -#define __cli() \ -do { \ - vcpu_info_t *_vcpu; \ - _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ - _vcpu->evtchn_upcall_mask = 1; \ - barrier(); \ -} while (0) - -#define __sti() \ -do { \ - vcpu_info_t *_vcpu; \ - barrier(); \ - _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ - _vcpu->evtchn_upcall_mask = 0; \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ -} while (0) - -#define __restore_flags(x) \ -do { \ - vcpu_info_t *_vcpu; \ - barrier(); \ - _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ - if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ - barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ - force_evtchn_callback(); \ - } \ -} while (0) - -/* - * Add critical_{enter, exit}? - * - */ -#define __save_and_cli(x) \ -do { \ - vcpu_info_t *_vcpu; \ - _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ - (x) = _vcpu->evtchn_upcall_mask; \ - _vcpu->evtchn_upcall_mask = 1; \ - barrier(); \ -} while (0) - - -#define cli() __cli() -#define sti() __sti() -#define save_flags(x) __save_flags(x) -#define restore_flags(x) __restore_flags(x) -#define save_and_cli(x) __save_and_cli(x) - -#define local_irq_save(x) __save_and_cli(x) -#define local_irq_restore(x) __restore_flags(x) -#define local_irq_disable() __cli() -#define local_irq_enable() __sti() - -#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));} -#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); } -#define spin_lock_irqsave mtx_lock_irqsave -#define spin_unlock_irqrestore mtx_unlock_irqrestore - -#else -#endif - -#ifndef xen_mb -#define xen_mb() mb() -#endif -#ifndef xen_rmb -#define xen_rmb() rmb() -#endif -#ifndef xen_wmb -#define xen_wmb() wmb() -#endif -#ifdef SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - - /* This is a barrier for the compiler only, NOT the processor! */ #define barrier() __asm__ __volatile__("": : :"memory") #define LOCK_PREFIX "" #define LOCK "" #define ADDR (*(volatile long *) addr) -/* - * Make sure gcc doesn't try to be clever and move things around - * on us. We need to use _exactly_ the address the user gave us, - * not some alias that contains the same information. - */ -typedef struct { volatile int counter; } atomic_t; - - - -#define xen_xchg(ptr,v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((volatile struct __xchg_dummy *)(x)) -static __inline unsigned long __xchg(unsigned long x, volatile void * ptr, - int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; -} /** * test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static __inline int test_and_clear_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( LOCK_PREFIX "btrl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"Ir" (nr) : "memory"); return oldbit; } static __inline int constant_test_bit(int nr, const volatile void * addr) { return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } static __inline int variable_test_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( "btl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit) :"m" (ADDR),"Ir" (nr)); return oldbit; } #define test_bit(nr,addr) \ (__builtin_constant_p(nr) ? \ constant_test_bit((nr),(addr)) : \ variable_test_bit((nr),(addr))) - /** * set_bit - Atomically set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * This function is atomic and may not be reordered. See __set_bit() * if you do not require the atomic guarantees. * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static __inline__ void set_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btsl %1,%0" :"=m" (ADDR) :"Ir" (nr)); } /** * clear_bit - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * clear_bit() is atomic and may not be reordered. However, it does * not contain a memory barrier, so if it is used for locking purposes, * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ static __inline__ void clear_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btrl %1,%0" :"=m" (ADDR) :"Ir" (nr)); } -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_inc(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "incl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) - #endif /* !__ASSEMBLY__ */ -#endif /* _OS_H_ */ +#endif /* _MACHINE_XEN_XEN_OS_H_ */ diff --git a/sys/conf/files b/sys/conf/files index ec59771573ae..8fda4772420a 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,3833 +1,3831 @@ # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # acpi_quirks.h optional acpi \ dependency "$S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \ compile-with "${AWK} -f $S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \ no-obj no-implicit-rule before-depend \ clean "acpi_quirks.h" aicasm optional ahc | ahd \ dependency "$S/dev/aic7xxx/aicasm/*.[chyl]" \ compile-with "CC='${CC}' ${MAKE} -f $S/dev/aic7xxx/aicasm/Makefile MAKESRCPATH=$S/dev/aic7xxx/aicasm" \ no-obj no-implicit-rule \ clean "aicasm* y.tab.h" aic7xxx_seq.h optional ahc \ compile-with "./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic7xxx_seq.h -r aic7xxx_reg.h -p aic7xxx_reg_print.c -i $S/dev/aic7xxx/aic7xxx_osm.h $S/dev/aic7xxx/aic7xxx.seq" \ no-obj no-implicit-rule before-depend local \ clean "aic7xxx_seq.h" \ dependency "$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm" aic7xxx_reg.h optional ahc \ compile-with "./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic7xxx_seq.h -r aic7xxx_reg.h -p aic7xxx_reg_print.c -i $S/dev/aic7xxx/aic7xxx_osm.h $S/dev/aic7xxx/aic7xxx.seq" \ no-obj no-implicit-rule before-depend local \ clean "aic7xxx_reg.h" \ dependency "$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm" aic7xxx_reg_print.c optional ahc \ compile-with "./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic7xxx_seq.h -r aic7xxx_reg.h -p aic7xxx_reg_print.c -i $S/dev/aic7xxx/aic7xxx_osm.h $S/dev/aic7xxx/aic7xxx.seq" \ no-obj no-implicit-rule local \ clean "aic7xxx_reg_print.c" \ dependency "$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm" aic7xxx_reg_print.o optional ahc ahc_reg_pretty_print \ compile-with "${NORMAL_C}" \ no-implicit-rule local aic79xx_seq.h optional ahd pci \ compile-with "./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic79xx_seq.h -r aic79xx_reg.h -p aic79xx_reg_print.c -i $S/dev/aic7xxx/aic79xx_osm.h $S/dev/aic7xxx/aic79xx.seq" \ no-obj no-implicit-rule before-depend local \ clean "aic79xx_seq.h" \ dependency "$S/dev/aic7xxx/aic79xx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm" aic79xx_reg.h optional ahd pci \ compile-with "./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic79xx_seq.h -r aic79xx_reg.h -p aic79xx_reg_print.c -i $S/dev/aic7xxx/aic79xx_osm.h $S/dev/aic7xxx/aic79xx.seq" \ no-obj no-implicit-rule before-depend local \ clean "aic79xx_reg.h" \ dependency "$S/dev/aic7xxx/aic79xx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm" aic79xx_reg_print.c optional ahd pci \ compile-with "./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic79xx_seq.h -r aic79xx_reg.h -p aic79xx_reg_print.c -i $S/dev/aic7xxx/aic79xx_osm.h $S/dev/aic7xxx/aic79xx.seq" \ no-obj no-implicit-rule local \ clean "aic79xx_reg_print.c" \ dependency "$S/dev/aic7xxx/aic79xx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm" aic79xx_reg_print.o optional ahd pci ahd_reg_pretty_print \ compile-with "${NORMAL_C}" \ no-implicit-rule local # # The 'fdt_dtb_file' target covers an actual DTB file name, which is derived # from the specified source (DTS) file: .dts -> .dtb # fdt_dtb_file optional fdt \ compile-with "if [ -f $S/boot/fdt/dts/${FDT_DTS_FILE} ]; then dtc -O dtb -o ${FDT_DTS_FILE:R}.dtb -b 0 -p 1024 $S/boot/fdt/dts/${FDT_DTS_FILE}; fi" \ no-obj no-implicit-rule before-depend \ clean "${FDT_DTS_FILE:R}.dtb" fdt_static_dtb.h optional fdt fdt_dtb_static \ compile-with "sh $S/tools/fdt/make_dtbh.sh ${FDT_DTS_FILE} ." \ no-obj no-implicit-rule before-depend \ clean "fdt_static_dtb.h" feeder_eq_gen.h optional sound \ dependency "$S/tools/sound/feeder_eq_mkfilter.awk" \ compile-with "${AWK} -f $S/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > feeder_eq_gen.h" \ no-obj no-implicit-rule before-depend \ clean "feeder_eq_gen.h" feeder_rate_gen.h optional sound \ dependency "$S/tools/sound/feeder_rate_mkfilter.awk" \ compile-with "${AWK} -f $S/tools/sound/feeder_rate_mkfilter.awk -- ${FEEDER_RATE_PRESETS} > feeder_rate_gen.h" \ no-obj no-implicit-rule before-depend \ clean "feeder_rate_gen.h" snd_fxdiv_gen.h optional sound \ dependency "$S/tools/sound/snd_fxdiv_gen.awk" \ compile-with "${AWK} -f $S/tools/sound/snd_fxdiv_gen.awk -- > snd_fxdiv_gen.h" \ no-obj no-implicit-rule before-depend \ clean "snd_fxdiv_gen.h" miidevs.h optional miibus | mii \ dependency "$S/tools/miidevs2h.awk $S/dev/mii/miidevs" \ compile-with "${AWK} -f $S/tools/miidevs2h.awk $S/dev/mii/miidevs" \ no-obj no-implicit-rule before-depend \ clean "miidevs.h" pccarddevs.h standard \ dependency "$S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \ compile-with "${AWK} -f $S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \ no-obj no-implicit-rule before-depend \ clean "pccarddevs.h" teken_state.h optional sc \ dependency "$S/teken/gensequences $S/teken/sequences" \ compile-with "${AWK} -f $S/teken/gensequences $S/teken/sequences > teken_state.h" \ no-obj no-implicit-rule before-depend \ clean "teken_state.h" usbdevs.h optional usb \ dependency "$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \ compile-with "${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -h" \ no-obj no-implicit-rule before-depend \ clean "usbdevs.h" usbdevs_data.h optional usb \ dependency "$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \ compile-with "${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -d" \ no-obj no-implicit-rule before-depend \ clean "usbdevs_data.h" cam/cam.c optional scbus cam/cam_compat.c optional scbus cam/cam_periph.c optional scbus cam/cam_queue.c optional scbus cam/cam_sim.c optional scbus cam/cam_xpt.c optional scbus cam/ata/ata_all.c optional scbus cam/ata/ata_xpt.c optional scbus cam/ata/ata_pmp.c optional scbus cam/scsi/scsi_xpt.c optional scbus cam/scsi/scsi_all.c optional scbus cam/scsi/scsi_cd.c optional cd cam/scsi/scsi_ch.c optional ch cam/ata/ata_da.c optional ada | da cam/ctl/ctl.c optional ctl cam/ctl/ctl_backend.c optional ctl cam/ctl/ctl_backend_block.c optional ctl cam/ctl/ctl_backend_ramdisk.c optional ctl cam/ctl/ctl_cmd_table.c optional ctl cam/ctl/ctl_frontend.c optional ctl cam/ctl/ctl_frontend_cam_sim.c optional ctl cam/ctl/ctl_frontend_internal.c optional ctl cam/ctl/ctl_mem_pool.c optional ctl cam/ctl/ctl_scsi_all.c optional ctl cam/ctl/ctl_error.c optional ctl cam/ctl/ctl_util.c optional ctl cam/ctl/scsi_ctl.c optional ctl cam/scsi/scsi_da.c optional da cam/scsi/scsi_low.c optional ct | ncv | nsp | stg cam/scsi/scsi_pass.c optional pass cam/scsi/scsi_pt.c optional pt cam/scsi/scsi_sa.c optional sa cam/scsi/scsi_enc.c optional ses cam/scsi/scsi_enc_ses.c optional ses cam/scsi/scsi_enc_safte.c optional ses cam/scsi/scsi_sg.c optional sg cam/scsi/scsi_targ_bh.c optional targbh cam/scsi/scsi_target.c optional targ cam/scsi/smp_all.c optional scbus # shared between zfs and dtrace cddl/compat/opensolaris/kern/opensolaris.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_cmn_err.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_kmem.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_misc.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_sunddi.c optional zfs compile-with "${ZFS_C}" # zfs specific cddl/compat/opensolaris/kern/opensolaris_acl.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_kobj.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_kstat.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_lookup.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_policy.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_string.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_sysevent.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_taskq.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_uio.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_vfs.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_vm.c optional zfs compile-with "${ZFS_C}" cddl/compat/opensolaris/kern/opensolaris_zone.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/acl/acl_common.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/avl/avl.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/nvpair/fnvpair.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/nvpair/nvpair.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/unicode/u8_textprep.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfeature_common.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_comutil.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_deleg.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zfs_prop.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zpool_prop.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/common/zfs/zprop_common.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/gfs.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/vnode.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c optional zfs compile-with "${ZFS_C}" \ warning "kernel contains CDDL licensed ZFS filesystem" cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/sha256.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/unique.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_debug.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/callb.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/fm.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/list.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/adler32.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/deflate.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/inffast.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/inflate.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/inftrees.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/opensolaris_crc32.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/trees.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/zmod.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/zmod_subr.c optional zfs compile-with "${ZFS_C}" cddl/contrib/opensolaris/uts/common/zmod/zutil.c optional zfs compile-with "${ZFS_C}" compat/freebsd32/freebsd32_capability.c optional compat_freebsd32 compat/freebsd32/freebsd32_ioctl.c optional compat_freebsd32 compat/freebsd32/freebsd32_misc.c optional compat_freebsd32 compat/freebsd32/freebsd32_syscalls.c optional compat_freebsd32 compat/freebsd32/freebsd32_sysent.c optional compat_freebsd32 contrib/altq/altq/altq_cbq.c optional altq contrib/altq/altq/altq_cdnr.c optional altq contrib/altq/altq/altq_hfsc.c optional altq contrib/altq/altq/altq_priq.c optional altq contrib/altq/altq/altq_red.c optional altq contrib/altq/altq/altq_rio.c optional altq contrib/altq/altq/altq_rmclass.c optional altq contrib/altq/altq/altq_subr.c optional altq contrib/dev/acpica/components/debugger/dbcmds.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbconvert.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbdisply.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbexec.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbfileio.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbhistry.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbinput.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbmethod.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbnames.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbstats.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbutils.c optional acpi acpi_debug contrib/dev/acpica/components/debugger/dbxface.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmbuffer.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmdeferred.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmnames.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmopcode.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmobject.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrc.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrcl.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrcl2.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmresrcs.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmutils.c optional acpi acpi_debug contrib/dev/acpica/components/disassembler/dmwalk.c optional acpi acpi_debug contrib/dev/acpica/components/dispatcher/dsargs.c optional acpi contrib/dev/acpica/components/dispatcher/dscontrol.c optional acpi contrib/dev/acpica/components/dispatcher/dsfield.c optional acpi contrib/dev/acpica/components/dispatcher/dsinit.c optional acpi contrib/dev/acpica/components/dispatcher/dsmethod.c optional acpi contrib/dev/acpica/components/dispatcher/dsmthdat.c optional acpi contrib/dev/acpica/components/dispatcher/dsobject.c optional acpi contrib/dev/acpica/components/dispatcher/dsopcode.c optional acpi contrib/dev/acpica/components/dispatcher/dsutils.c optional acpi contrib/dev/acpica/components/dispatcher/dswexec.c optional acpi contrib/dev/acpica/components/dispatcher/dswload.c optional acpi contrib/dev/acpica/components/dispatcher/dswload2.c optional acpi contrib/dev/acpica/components/dispatcher/dswscope.c optional acpi contrib/dev/acpica/components/dispatcher/dswstate.c optional acpi contrib/dev/acpica/components/events/evevent.c optional acpi contrib/dev/acpica/components/events/evglock.c optional acpi contrib/dev/acpica/components/events/evgpe.c optional acpi contrib/dev/acpica/components/events/evgpeblk.c optional acpi contrib/dev/acpica/components/events/evgpeinit.c optional acpi contrib/dev/acpica/components/events/evgpeutil.c optional acpi contrib/dev/acpica/components/events/evhandler.c optional acpi contrib/dev/acpica/components/events/evmisc.c optional acpi contrib/dev/acpica/components/events/evregion.c optional acpi contrib/dev/acpica/components/events/evrgnini.c optional acpi contrib/dev/acpica/components/events/evsci.c optional acpi contrib/dev/acpica/components/events/evxface.c optional acpi contrib/dev/acpica/components/events/evxfevnt.c optional acpi contrib/dev/acpica/components/events/evxfgpe.c optional acpi contrib/dev/acpica/components/events/evxfregn.c optional acpi contrib/dev/acpica/components/executer/exconfig.c optional acpi contrib/dev/acpica/components/executer/exconvrt.c optional acpi contrib/dev/acpica/components/executer/excreate.c optional acpi contrib/dev/acpica/components/executer/exdebug.c optional acpi contrib/dev/acpica/components/executer/exdump.c optional acpi contrib/dev/acpica/components/executer/exfield.c optional acpi contrib/dev/acpica/components/executer/exfldio.c optional acpi contrib/dev/acpica/components/executer/exmisc.c optional acpi contrib/dev/acpica/components/executer/exmutex.c optional acpi contrib/dev/acpica/components/executer/exnames.c optional acpi contrib/dev/acpica/components/executer/exoparg1.c optional acpi contrib/dev/acpica/components/executer/exoparg2.c optional acpi contrib/dev/acpica/components/executer/exoparg3.c optional acpi contrib/dev/acpica/components/executer/exoparg6.c optional acpi contrib/dev/acpica/components/executer/exprep.c optional acpi contrib/dev/acpica/components/executer/exregion.c optional acpi contrib/dev/acpica/components/executer/exresnte.c optional acpi contrib/dev/acpica/components/executer/exresolv.c optional acpi contrib/dev/acpica/components/executer/exresop.c optional acpi contrib/dev/acpica/components/executer/exstore.c optional acpi contrib/dev/acpica/components/executer/exstoren.c optional acpi contrib/dev/acpica/components/executer/exstorob.c optional acpi contrib/dev/acpica/components/executer/exsystem.c optional acpi contrib/dev/acpica/components/executer/exutils.c optional acpi contrib/dev/acpica/components/hardware/hwacpi.c optional acpi contrib/dev/acpica/components/hardware/hwesleep.c optional acpi contrib/dev/acpica/components/hardware/hwgpe.c optional acpi contrib/dev/acpica/components/hardware/hwpci.c optional acpi contrib/dev/acpica/components/hardware/hwregs.c optional acpi contrib/dev/acpica/components/hardware/hwsleep.c optional acpi contrib/dev/acpica/components/hardware/hwtimer.c optional acpi contrib/dev/acpica/components/hardware/hwvalid.c optional acpi contrib/dev/acpica/components/hardware/hwxface.c optional acpi contrib/dev/acpica/components/hardware/hwxfsleep.c optional acpi contrib/dev/acpica/components/namespace/nsaccess.c optional acpi contrib/dev/acpica/components/namespace/nsalloc.c optional acpi contrib/dev/acpica/components/namespace/nsarguments.c optional acpi contrib/dev/acpica/components/namespace/nsconvert.c optional acpi contrib/dev/acpica/components/namespace/nsdump.c optional acpi contrib/dev/acpica/components/namespace/nseval.c optional acpi contrib/dev/acpica/components/namespace/nsinit.c optional acpi contrib/dev/acpica/components/namespace/nsload.c optional acpi contrib/dev/acpica/components/namespace/nsnames.c optional acpi contrib/dev/acpica/components/namespace/nsobject.c optional acpi contrib/dev/acpica/components/namespace/nsparse.c optional acpi contrib/dev/acpica/components/namespace/nspredef.c optional acpi contrib/dev/acpica/components/namespace/nsprepkg.c optional acpi contrib/dev/acpica/components/namespace/nsrepair.c optional acpi contrib/dev/acpica/components/namespace/nsrepair2.c optional acpi contrib/dev/acpica/components/namespace/nssearch.c optional acpi contrib/dev/acpica/components/namespace/nsutils.c optional acpi contrib/dev/acpica/components/namespace/nswalk.c optional acpi contrib/dev/acpica/components/namespace/nsxfeval.c optional acpi contrib/dev/acpica/components/namespace/nsxfname.c optional acpi contrib/dev/acpica/components/namespace/nsxfobj.c optional acpi contrib/dev/acpica/components/parser/psargs.c optional acpi contrib/dev/acpica/components/parser/psloop.c optional acpi contrib/dev/acpica/components/parser/psobject.c optional acpi contrib/dev/acpica/components/parser/psopcode.c optional acpi contrib/dev/acpica/components/parser/psopinfo.c optional acpi contrib/dev/acpica/components/parser/psparse.c optional acpi contrib/dev/acpica/components/parser/psscope.c optional acpi contrib/dev/acpica/components/parser/pstree.c optional acpi contrib/dev/acpica/components/parser/psutils.c optional acpi contrib/dev/acpica/components/parser/pswalk.c optional acpi contrib/dev/acpica/components/parser/psxface.c optional acpi contrib/dev/acpica/components/resources/rsaddr.c optional acpi contrib/dev/acpica/components/resources/rscalc.c optional acpi contrib/dev/acpica/components/resources/rscreate.c optional acpi contrib/dev/acpica/components/resources/rsdump.c optional acpi contrib/dev/acpica/components/resources/rsdumpinfo.c optional acpi contrib/dev/acpica/components/resources/rsinfo.c optional acpi contrib/dev/acpica/components/resources/rsio.c optional acpi contrib/dev/acpica/components/resources/rsirq.c optional acpi contrib/dev/acpica/components/resources/rslist.c optional acpi contrib/dev/acpica/components/resources/rsmemory.c optional acpi contrib/dev/acpica/components/resources/rsmisc.c optional acpi contrib/dev/acpica/components/resources/rsserial.c optional acpi contrib/dev/acpica/components/resources/rsutils.c optional acpi contrib/dev/acpica/components/resources/rsxface.c optional acpi contrib/dev/acpica/components/tables/tbfadt.c optional acpi contrib/dev/acpica/components/tables/tbfind.c optional acpi contrib/dev/acpica/components/tables/tbinstal.c optional acpi contrib/dev/acpica/components/tables/tbprint.c optional acpi contrib/dev/acpica/components/tables/tbutils.c optional acpi contrib/dev/acpica/components/tables/tbxface.c optional acpi contrib/dev/acpica/components/tables/tbxfload.c optional acpi contrib/dev/acpica/components/tables/tbxfroot.c optional acpi contrib/dev/acpica/components/utilities/utaddress.c optional acpi contrib/dev/acpica/components/utilities/utalloc.c optional acpi contrib/dev/acpica/components/utilities/utbuffer.c optional acpi contrib/dev/acpica/components/utilities/utcache.c optional acpi contrib/dev/acpica/components/utilities/utcopy.c optional acpi contrib/dev/acpica/components/utilities/utdebug.c optional acpi contrib/dev/acpica/components/utilities/utdecode.c optional acpi contrib/dev/acpica/components/utilities/utdelete.c optional acpi contrib/dev/acpica/components/utilities/uterror.c optional acpi contrib/dev/acpica/components/utilities/uteval.c optional acpi contrib/dev/acpica/components/utilities/utexcep.c optional acpi contrib/dev/acpica/components/utilities/utglobal.c optional acpi contrib/dev/acpica/components/utilities/utids.c optional acpi contrib/dev/acpica/components/utilities/utinit.c optional acpi contrib/dev/acpica/components/utilities/utlock.c optional acpi contrib/dev/acpica/components/utilities/utmath.c optional acpi contrib/dev/acpica/components/utilities/utmisc.c optional acpi contrib/dev/acpica/components/utilities/utmutex.c optional acpi contrib/dev/acpica/components/utilities/utobject.c optional acpi contrib/dev/acpica/components/utilities/utosi.c optional acpi contrib/dev/acpica/components/utilities/utownerid.c optional acpi contrib/dev/acpica/components/utilities/utpredef.c optional acpi contrib/dev/acpica/components/utilities/utresrc.c optional acpi contrib/dev/acpica/components/utilities/utstate.c optional acpi contrib/dev/acpica/components/utilities/utstring.c optional acpi contrib/dev/acpica/components/utilities/utxface.c optional acpi contrib/dev/acpica/components/utilities/utxferror.c optional acpi contrib/dev/acpica/components/utilities/utxfinit.c optional acpi #contrib/dev/acpica/components/utilities/utxfmutex.c optional acpi contrib/ipfilter/netinet/fil.c optional ipfilter inet \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_auth.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_frag.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_log.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_nat.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_proxy.c optional ipfilter inet \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_state.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_lookup.c optional ipfilter inet \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-error -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_pool.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_htable.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/ip_sync.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \ compile-with "${NORMAL_C} -I$S/contrib/ipfilter" contrib/libfdt/fdt.c optional fdt contrib/libfdt/fdt_ro.c optional fdt contrib/libfdt/fdt_rw.c optional fdt contrib/libfdt/fdt_strerror.c optional fdt contrib/libfdt/fdt_sw.c optional fdt contrib/libfdt/fdt_wip.c optional fdt contrib/ngatm/netnatm/api/cc_conn.c optional ngatm_ccatm \ compile-with "${NORMAL_C_NOWERROR} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_data.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_dump.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_port.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_sig.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/cc_user.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/api/unisap.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/misc/straddr.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/misc/unimsg_common.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/msg/traffic.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/msg/uni_ie.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/msg/uni_msg.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/saal/saal_sscfu.c optional ngatm_sscfu \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/saal/saal_sscop.c optional ngatm_sscop \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_call.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_coord.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_party.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_print.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_reset.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_uni.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_unimsgcpy.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" crypto/blowfish/bf_ecb.c optional ipsec crypto/blowfish/bf_skey.c optional crypto | ipsec crypto/camellia/camellia.c optional crypto | ipsec crypto/camellia/camellia-api.c optional crypto | ipsec crypto/des/des_ecb.c optional crypto | ipsec | netsmb crypto/des/des_setkey.c optional crypto | ipsec | netsmb crypto/rc4/rc4.c optional netgraph_mppc_encryption | kgssapi crypto/rijndael/rijndael-alg-fst.c optional crypto | geom_bde | \ ipsec | random | wlan_ccmp crypto/rijndael/rijndael-api-fst.c optional geom_bde | random crypto/rijndael/rijndael-api.c optional crypto | ipsec | wlan_ccmp crypto/sha1.c optional carp | crypto | ipsec | \ netgraph_mppc_encryption | sctp crypto/sha2/sha2.c optional crypto | geom_bde | ipsec | random | \ sctp | zfs crypto/siphash/siphash.c optional inet | inet6 crypto/siphash/siphash_test.c optional inet | inet6 ddb/db_access.c optional ddb ddb/db_break.c optional ddb ddb/db_capture.c optional ddb ddb/db_command.c optional ddb ddb/db_examine.c optional ddb ddb/db_expr.c optional ddb ddb/db_input.c optional ddb ddb/db_lex.c optional ddb ddb/db_main.c optional ddb ddb/db_output.c optional ddb ddb/db_print.c optional ddb ddb/db_ps.c optional ddb ddb/db_run.c optional ddb ddb/db_script.c optional ddb ddb/db_sym.c optional ddb ddb/db_thread.c optional ddb ddb/db_textdump.c optional ddb ddb/db_variables.c optional ddb ddb/db_watch.c optional ddb ddb/db_write_cmd.c optional ddb #dev/dpt/dpt_control.c optional dpt dev/aac/aac.c optional aac dev/aac/aac_cam.c optional aacp aac dev/aac/aac_debug.c optional aac dev/aac/aac_disk.c optional aac dev/aac/aac_linux.c optional aac compat_linux dev/aac/aac_pci.c optional aac pci dev/aacraid/aacraid.c optional aacraid dev/aacraid/aacraid_cam.c optional aacraid scbus dev/aacraid/aacraid_debug.c optional aacraid dev/aacraid/aacraid_linux.c optional aacraid compat_linux dev/aacraid/aacraid_pci.c optional aacraid pci dev/acpi_support/acpi_wmi.c optional acpi_wmi acpi dev/acpi_support/acpi_asus.c optional acpi_asus acpi dev/acpi_support/acpi_asus_wmi.c optional acpi_asus_wmi acpi dev/acpi_support/acpi_fujitsu.c optional acpi_fujitsu acpi dev/acpi_support/acpi_hp.c optional acpi_hp acpi dev/acpi_support/acpi_ibm.c optional acpi_ibm acpi dev/acpi_support/acpi_panasonic.c optional acpi_panasonic acpi dev/acpi_support/acpi_sony.c optional acpi_sony acpi dev/acpi_support/acpi_toshiba.c optional acpi_toshiba acpi dev/acpi_support/atk0110.c optional aibs acpi dev/acpica/Osd/OsdDebug.c optional acpi dev/acpica/Osd/OsdHardware.c optional acpi dev/acpica/Osd/OsdInterrupt.c optional acpi dev/acpica/Osd/OsdMemory.c optional acpi dev/acpica/Osd/OsdSchedule.c optional acpi dev/acpica/Osd/OsdStream.c optional acpi dev/acpica/Osd/OsdSynch.c optional acpi dev/acpica/Osd/OsdTable.c optional acpi dev/acpica/acpi.c optional acpi dev/acpica/acpi_acad.c optional acpi dev/acpica/acpi_battery.c optional acpi dev/acpica/acpi_button.c optional acpi dev/acpica/acpi_cmbat.c optional acpi dev/acpica/acpi_cpu.c optional acpi dev/acpica/acpi_ec.c optional acpi dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_isab.c optional acpi isa dev/acpica/acpi_lid.c optional acpi dev/acpica/acpi_package.c optional acpi dev/acpica/acpi_pci.c optional acpi pci dev/acpica/acpi_pci_link.c optional acpi pci dev/acpica/acpi_pcib.c optional acpi pci dev/acpica/acpi_pcib_acpi.c optional acpi pci dev/acpica/acpi_pcib_pci.c optional acpi pci dev/acpica/acpi_perf.c optional acpi dev/acpica/acpi_powerres.c optional acpi dev/acpica/acpi_quirk.c optional acpi dev/acpica/acpi_resource.c optional acpi dev/acpica/acpi_smbat.c optional acpi dev/acpica/acpi_thermal.c optional acpi dev/acpica/acpi_throttle.c optional acpi dev/acpica/acpi_timer.c optional acpi dev/acpica/acpi_video.c optional acpi_video acpi dev/acpica/acpi_dock.c optional acpi_dock acpi dev/adlink/adlink.c optional adlink dev/advansys/adv_eisa.c optional adv eisa dev/advansys/adv_pci.c optional adv pci dev/advansys/advansys.c optional adv dev/advansys/advlib.c optional adv dev/advansys/advmcode.c optional adv dev/advansys/adw_pci.c optional adw pci dev/advansys/adwcam.c optional adw dev/advansys/adwlib.c optional adw dev/advansys/adwmcode.c optional adw dev/ae/if_ae.c optional ae pci dev/age/if_age.c optional age pci dev/agp/agp.c optional agp pci dev/agp/agp_if.m optional agp pci dev/aha/aha.c optional aha dev/aha/aha_isa.c optional aha isa dev/aha/aha_mca.c optional aha mca dev/ahb/ahb.c optional ahb eisa dev/ahci/ahci.c optional ahci pci dev/ahci/ahciem.c optional ahci pci dev/aic/aic.c optional aic dev/aic/aic_pccard.c optional aic pccard dev/aic7xxx/ahc_eisa.c optional ahc eisa dev/aic7xxx/ahc_isa.c optional ahc isa dev/aic7xxx/ahc_pci.c optional ahc pci \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/aic7xxx/ahd_pci.c optional ahd pci \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/aic7xxx/aic7770.c optional ahc dev/aic7xxx/aic79xx.c optional ahd pci dev/aic7xxx/aic79xx_osm.c optional ahd pci dev/aic7xxx/aic79xx_pci.c optional ahd pci dev/aic7xxx/aic7xxx.c optional ahc dev/aic7xxx/aic7xxx_93cx6.c optional ahc dev/aic7xxx/aic7xxx_osm.c optional ahc dev/aic7xxx/aic7xxx_pci.c optional ahc pci dev/alc/if_alc.c optional alc pci dev/ale/if_ale.c optional ale pci dev/altera/avgen/altera_avgen.c optional altera_avgen dev/altera/avgen/altera_avgen_fdt.c optional altera_avgen fdt dev/altera/avgen/altera_avgen_nexus.c optional altera_avgen dev/altera/sdcard/altera_sdcard.c optional altera_sdcard dev/altera/sdcard/altera_sdcard_disk.c optional altera_sdcard dev/altera/sdcard/altera_sdcard_io.c optional altera_sdcard dev/altera/sdcard/altera_sdcard_fdt.c optional altera_sdcard fdt dev/altera/sdcard/altera_sdcard_nexus.c optional altera_sdcard dev/amr/amr.c optional amr dev/amr/amr_cam.c optional amrp amr dev/amr/amr_disk.c optional amr dev/amr/amr_linux.c optional amr compat_linux dev/amr/amr_pci.c optional amr pci dev/an/if_an.c optional an dev/an/if_an_isa.c optional an isa dev/an/if_an_pccard.c optional an pccard dev/an/if_an_pci.c optional an pci dev/asr/asr.c optional asr pci \ compile-with "${NORMAL_C} ${NO_WARRAY_BOUNDS}" # dev/ata/ata_if.m optional ata | atacore dev/ata/ata-all.c optional ata | atacore dev/ata/ata-dma.c optional ata | atacore dev/ata/ata-lowlevel.c optional ata | atacore dev/ata/ata-sata.c optional ata | atacore dev/ata/ata-card.c optional ata pccard | atapccard dev/ata/ata-cbus.c optional ata pc98 | atapc98 dev/ata/ata-isa.c optional ata isa | ataisa dev/ata/ata-pci.c optional ata pci | atapci dev/ata/chipsets/ata-ahci.c optional ata pci | ataahci | ataacerlabs | \ ataati | ataintel | atajmicron | \ atavia | atanvidia dev/ata/chipsets/ata-acard.c optional ata pci | ataacard dev/ata/chipsets/ata-acerlabs.c optional ata pci | ataacerlabs dev/ata/chipsets/ata-adaptec.c optional ata pci | ataadaptec dev/ata/chipsets/ata-amd.c optional ata pci | ataamd dev/ata/chipsets/ata-ati.c optional ata pci | ataati dev/ata/chipsets/ata-cenatek.c optional ata pci | atacenatek dev/ata/chipsets/ata-cypress.c optional ata pci | atacypress dev/ata/chipsets/ata-cyrix.c optional ata pci | atacyrix dev/ata/chipsets/ata-highpoint.c optional ata pci | atahighpoint dev/ata/chipsets/ata-intel.c optional ata pci | ataintel dev/ata/chipsets/ata-ite.c optional ata pci | ataite dev/ata/chipsets/ata-jmicron.c optional ata pci | atajmicron dev/ata/chipsets/ata-marvell.c optional ata pci | atamarvell | ataadaptec dev/ata/chipsets/ata-micron.c optional ata pci | atamicron dev/ata/chipsets/ata-national.c optional ata pci | atanational dev/ata/chipsets/ata-netcell.c optional ata pci | atanetcell dev/ata/chipsets/ata-nvidia.c optional ata pci | atanvidia dev/ata/chipsets/ata-promise.c optional ata pci | atapromise dev/ata/chipsets/ata-serverworks.c optional ata pci | ataserverworks dev/ata/chipsets/ata-siliconimage.c optional ata pci | atasiliconimage | ataati dev/ata/chipsets/ata-sis.c optional ata pci | atasis dev/ata/chipsets/ata-via.c optional ata pci | atavia # dev/ath/if_ath_pci.c optional ath_pci pci \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/ath/if_ath_ahb.c optional ath_ahb \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/ath/if_ath.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_beacon.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_btcoex.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_debug.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_keycache.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_led.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_lna_div.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tx.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tx_edma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tx_ht.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_tdma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_sysctl.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_rx.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_rx_edma.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/if_ath_spectral.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ah_osdep.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/ath/ath_hal/ah.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v1.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v3.c optional ath_hal | ath_ar5211 | ath_ar5212 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v14.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_v4k.c \ optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_eeprom_9287.c \ optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_regdomain.c optional ath \ compile-with "${NORMAL_C} ${NO_WSHIFT_COUNT_NEGATIVE} ${NO_WSHIFT_COUNT_OVERFLOW} -I$S/dev/ath" # ar5210 dev/ath/ath_hal/ar5210/ar5210_attach.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_beacon.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_interrupts.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_keycache.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_misc.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_phy.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_power.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_recv.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_reset.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5210/ar5210_xmit.c optional ath_hal | ath_ar5210 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5211 dev/ath/ath_hal/ar5211/ar5211_attach.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_beacon.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_interrupts.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_keycache.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_misc.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_phy.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_power.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_recv.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_reset.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5211/ar5211_xmit.c optional ath_hal | ath_ar5211 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5212 dev/ath/ath_hal/ar5212/ar5212_ani.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_attach.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_beacon.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_eeprom.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_gpio.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_interrupts.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_keycache.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_misc.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_phy.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_power.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_recv.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_reset.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_rfgain.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_xmit.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5416 (depends on ar5212) dev/ath/ath_hal/ar5416/ar5416_ani.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_attach.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_beacon.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_btcoex.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_eeprom.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_gpio.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_interrupts.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_keycache.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_misc.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_phy.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_power.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_radar.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_recv.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_reset.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_spectral.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_xmit.c \ optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130 # # Since this is an embedded MAC SoC, there's no need to compile it into the # default HAL. dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_ar9130 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9001/ar9130_phy.c optional ath_ar9130 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9001/ar9130_eeprom.c optional ath_ar9130 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9160 (depends on ar5416) dev/ath/ath_hal/ar9001/ar9160_attach.c optional ath_hal | ath_ar9160 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9280 (depends on ar5416) dev/ath/ath_hal/ar9002/ar9280_attach.c optional ath_hal | ath_ar9280 | \ ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9280_olc.c optional ath_hal | ath_ar9280 | \ ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9285 (depends on ar5416 and ar9280) dev/ath/ath_hal/ar9002/ar9285_attach.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_btcoex.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_reset.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_cal.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9287 (depends on ar5416) dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9300 contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_attach.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_eeprom.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WCONSTANT_CONVERSION}" contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_interrupts.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_keycache.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_mci.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_paprd.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_phy.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_power.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_radar.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_radio.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_recv.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_recv_ds.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WSOMETIMES_UNINITIALIZED}" contrib/dev/ath/ath_hal/ar9300/ar9300_stub.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_stub_funcs.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_timer.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" contrib/dev/ath/ath_hal/ar9300/ar9300_xmit_ds.c optional ath_hal | ath_ar9300 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal" # rf backends dev/ath/ath_hal/ar5212/ar2316.c optional ath_rf2316 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar2317.c optional ath_rf2317 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar2413.c optional ath_hal | ath_rf2413 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar2425.c optional ath_hal | ath_rf2425 | ath_rf2417 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5111.c optional ath_hal | ath_rf5111 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5112.c optional ath_hal | ath_rf5112 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5413.c optional ath_hal | ath_rf5413 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar2133.c optional ath_hal | ath_ar5416 | \ ath_ar9130 | ath_ar9160 | ath_ar9280 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ath rate control algorithms dev/ath/ath_rate/amrr/amrr.c optional ath_rate_amrr \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_rate/onoe/onoe.c optional ath_rate_onoe \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_rate/sample/sample.c optional ath_rate_sample \ compile-with "${NORMAL_C} -I$S/dev/ath" # ath DFS modules dev/ath/ath_dfs/null/dfs_null.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" # dev/bce/if_bce.c optional bce dev/bfe/if_bfe.c optional bfe dev/bge/if_bge.c optional bge dev/bktr/bktr_audio.c optional bktr pci dev/bktr/bktr_card.c optional bktr pci dev/bktr/bktr_core.c optional bktr pci dev/bktr/bktr_i2c.c optional bktr pci smbus dev/bktr/bktr_os.c optional bktr pci dev/bktr/bktr_tuner.c optional bktr pci dev/bktr/msp34xx.c optional bktr pci dev/buslogic/bt.c optional bt dev/buslogic/bt_eisa.c optional bt eisa dev/buslogic/bt_isa.c optional bt isa dev/buslogic/bt_mca.c optional bt mca dev/buslogic/bt_pci.c optional bt pci dev/bwi/bwimac.c optional bwi dev/bwi/bwiphy.c optional bwi dev/bwi/bwirf.c optional bwi dev/bwi/if_bwi.c optional bwi dev/bwi/if_bwi_pci.c optional bwi pci # XXX Work around clang warning, until maintainer approves fix. dev/bwn/if_bwn.c optional bwn siba_bwn \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/bxe/if_bxe.c optional bxe dev/bxe/bxe_link.c optional bxe dev/cardbus/cardbus.c optional cardbus dev/cardbus/cardbus_cis.c optional cardbus dev/cardbus/cardbus_device.c optional cardbus dev/cas/if_cas.c optional cas dev/cfi/cfi_bus_fdt.c optional cfi fdt dev/cfi/cfi_bus_nexus.c optional cfi dev/cfi/cfi_core.c optional cfi dev/cfi/cfi_dev.c optional cfi dev/cfi/cfi_disk.c optional cfid dev/ciss/ciss.c optional ciss dev/cm/smc90cx6.c optional cm dev/cmx/cmx.c optional cmx dev/cmx/cmx_pccard.c optional cmx pccard dev/cpufreq/ichss.c optional cpufreq dev/cs/if_cs.c optional cs dev/cs/if_cs_isa.c optional cs isa dev/cs/if_cs_pccard.c optional cs pccard dev/cxgb/cxgb_main.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/cxgb_sge.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_mc5.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_vsc7323.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_vsc8211.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_ael1002.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_aq100x.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_mv88e1xxx.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_xgmac.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_t3_hw.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/common/cxgb_tn1010.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/sys/uipc_mvec.c optional cxgb pci \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \ compile-with "${NORMAL_C} -I$S/dev/cxgb" dev/cxgbe/t4_main.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_sge.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_l2t.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_tracer.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/common/t4_hw.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" t4fw_cfg.c optional cxgbe \ compile-with "${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "t4fw_cfg.c" t4fw_cfg.fwo optional cxgbe \ dependency "t4fw_cfg.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t4fw_cfg.fwo" t4fw_cfg.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t4fw_cfg.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t4fw_cfg.fw" t4fw_cfg_uwire.fwo optional cxgbe \ dependency "t4fw_cfg_uwire.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t4fw_cfg_uwire.fwo" t4fw_cfg_uwire.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t4fw_cfg_uwire.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t4fw_cfg_uwire.fw" t4fw.fwo optional cxgbe \ dependency "t4fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t4fw.fwo" t4fw.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t4fw-1.8.11.0.bin.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "t4fw.fw" t5fw_cfg.c optional cxgbe \ compile-with "${AWK} -f $S/tools/fw_stub.awk t5fw_cfg.fw:t5fw_cfg t5fw.fw:t5fw -mt5fw_cfg -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "t5fw_cfg.c" t5fw_cfg.fwo optional cxgbe \ dependency "t5fw_cfg.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t5fw_cfg.fwo" t5fw_cfg.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t5fw_cfg.txt" \ compile-with "${CP} ${.ALLSRC} ${.TARGET}" \ no-obj no-implicit-rule \ clean "t5fw_cfg.fw" t5fw.fwo optional cxgbe \ dependency "t5fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "t5fw.fwo" t5fw.fw optional cxgbe \ dependency "$S/dev/cxgbe/firmware/t5fw-1.8.22.0.bin.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "t5fw.fw" dev/cy/cy.c optional cy dev/cy/cy_isa.c optional cy isa dev/cy/cy_pci.c optional cy pci dev/dc/if_dc.c optional dc pci dev/dc/dcphy.c optional dc pci dev/dc/pnphy.c optional dc pci dev/dcons/dcons.c optional dcons dev/dcons/dcons_crom.c optional dcons_crom dev/dcons/dcons_os.c optional dcons dev/de/if_de.c optional de pci dev/digi/CX.c optional digi_CX dev/digi/CX_PCI.c optional digi_CX_PCI dev/digi/EPCX.c optional digi_EPCX dev/digi/EPCX_PCI.c optional digi_EPCX_PCI dev/digi/Xe.c optional digi_Xe dev/digi/Xem.c optional digi_Xem dev/digi/Xr.c optional digi_Xr dev/digi/digi.c optional digi dev/digi/digi_isa.c optional digi isa dev/digi/digi_pci.c optional digi pci dev/dpt/dpt_eisa.c optional dpt eisa dev/dpt/dpt_pci.c optional dpt pci dev/dpt/dpt_scsi.c optional dpt dev/drm/ati_pcigart.c optional drm dev/drm/drm_agpsupport.c optional drm dev/drm/drm_auth.c optional drm dev/drm/drm_bufs.c optional drm dev/drm/drm_context.c optional drm dev/drm/drm_dma.c optional drm dev/drm/drm_drawable.c optional drm dev/drm/drm_drv.c optional drm dev/drm/drm_fops.c optional drm dev/drm/drm_hashtab.c optional drm dev/drm/drm_ioctl.c optional drm dev/drm/drm_irq.c optional drm dev/drm/drm_lock.c optional drm dev/drm/drm_memory.c optional drm dev/drm/drm_mm.c optional drm dev/drm/drm_pci.c optional drm dev/drm/drm_scatter.c optional drm dev/drm/drm_sman.c optional drm dev/drm/drm_sysctl.c optional drm dev/drm/drm_vm.c optional drm dev/drm/i915_dma.c optional i915drm dev/drm/i915_drv.c optional i915drm dev/drm/i915_irq.c optional i915drm dev/drm/i915_mem.c optional i915drm dev/drm/i915_suspend.c optional i915drm dev/drm/mach64_dma.c optional mach64drm dev/drm/mach64_drv.c optional mach64drm dev/drm/mach64_irq.c optional mach64drm dev/drm/mach64_state.c optional mach64drm dev/drm/mga_dma.c optional mgadrm dev/drm/mga_drv.c optional mgadrm dev/drm/mga_irq.c optional mgadrm dev/drm/mga_state.c optional mgadrm dev/drm/mga_warp.c optional mgadrm dev/drm/r128_cce.c optional r128drm \ compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE} ${NO_WCONSTANT_CONVERSION}" dev/drm/r128_drv.c optional r128drm dev/drm/r128_irq.c optional r128drm dev/drm/r128_state.c optional r128drm \ compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE}" dev/drm/r300_cmdbuf.c optional radeondrm dev/drm/r600_blit.c optional radeondrm dev/drm/r600_cp.c optional radeondrm \ compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE} ${NO_WCONSTANT_CONVERSION}" dev/drm/radeon_cp.c optional radeondrm \ compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE} ${NO_WCONSTANT_CONVERSION}" dev/drm/radeon_cs.c optional radeondrm dev/drm/radeon_drv.c optional radeondrm dev/drm/radeon_irq.c optional radeondrm dev/drm/radeon_mem.c optional radeondrm dev/drm/radeon_state.c optional radeondrm dev/drm/savage_bci.c optional savagedrm dev/drm/savage_drv.c optional savagedrm dev/drm/savage_state.c optional savagedrm dev/drm/sis_drv.c optional sisdrm dev/drm/sis_ds.c optional sisdrm dev/drm/sis_mm.c optional sisdrm dev/drm/tdfx_drv.c optional tdfxdrm dev/drm/via_dma.c optional viadrm dev/drm/via_dmablit.c optional viadrm dev/drm/via_drv.c optional viadrm dev/drm/via_irq.c optional viadrm dev/drm/via_map.c optional viadrm dev/drm/via_mm.c optional viadrm dev/drm/via_verifier.c optional viadrm dev/drm/via_video.c optional viadrm dev/ed/if_ed.c optional ed dev/ed/if_ed_novell.c optional ed dev/ed/if_ed_rtl80x9.c optional ed dev/ed/if_ed_pccard.c optional ed pccard dev/ed/if_ed_pci.c optional ed pci dev/eisa/eisa_if.m standard dev/eisa/eisaconf.c optional eisa dev/e1000/if_em.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/if_lem.c optional em \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/if_igb.c optional igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_80003es2lan.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82540.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82541.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82542.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82543.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82571.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_82575.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_ich8lan.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_i210.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_api.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_mac.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_manage.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_nvm.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_phy.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_vf.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_mbx.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_osdep.c optional em | igb \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/et/if_et.c optional et dev/en/if_en_pci.c optional en pci dev/en/midway.c optional en dev/ep/if_ep.c optional ep dev/ep/if_ep_eisa.c optional ep eisa dev/ep/if_ep_isa.c optional ep isa dev/ep/if_ep_mca.c optional ep mca dev/ep/if_ep_pccard.c optional ep pccard dev/esp/esp_pci.c optional esp pci dev/esp/ncr53c9x.c optional esp dev/etherswitch/arswitch/arswitch.c optional arswitch dev/etherswitch/arswitch/arswitch_reg.c optional arswitch dev/etherswitch/arswitch/arswitch_phy.c optional arswitch dev/etherswitch/arswitch/arswitch_8216.c optional arswitch dev/etherswitch/arswitch/arswitch_8226.c optional arswitch dev/etherswitch/arswitch/arswitch_8316.c optional arswitch dev/etherswitch/arswitch/arswitch_7240.c optional arswitch dev/etherswitch/arswitch/arswitch_vlans.c optional arswitch dev/etherswitch/etherswitch.c optional etherswitch dev/etherswitch/etherswitch_if.m optional etherswitch dev/etherswitch/ip17x/ip17x.c optional ip17x dev/etherswitch/ip17x/ip175c.c optional ip17x dev/etherswitch/ip17x/ip175d.c optional ip17x dev/etherswitch/ip17x/ip17x_phy.c optional ip17x dev/etherswitch/ip17x/ip17x_vlans.c optional ip17x dev/etherswitch/mdio_if.m optional miiproxy dev/etherswitch/mdio.c optional miiproxy dev/etherswitch/miiproxy.c optional miiproxy dev/etherswitch/rtl8366/rtl8366rb.c optional rtl8366rb dev/etherswitch/ukswitch/ukswitch.c optional ukswitch dev/ex/if_ex.c optional ex dev/ex/if_ex_isa.c optional ex isa dev/ex/if_ex_pccard.c optional ex pccard dev/exca/exca.c optional cbb dev/fatm/if_fatm.c optional fatm pci dev/fb/splash.c optional splash dev/fdt/fdt_common.c optional fdt dev/fdt/fdt_pci.c optional fdt pci dev/fdt/fdt_slicer.c optional fdt cfi | fdt nand dev/fdt/fdt_static_dtb.S optional fdt fdt_dtb_static \ dependency "$S/boot/fdt/dts/${FDT_DTS_FILE}" dev/fdt/fdtbus.c optional fdt dev/fdt/simplebus.c optional fdt dev/fe/if_fe.c optional fe dev/fe/if_fe_pccard.c optional fe pccard dev/filemon/filemon.c optional filemon dev/firewire/firewire.c optional firewire dev/firewire/fwcrom.c optional firewire dev/firewire/fwdev.c optional firewire dev/firewire/fwdma.c optional firewire dev/firewire/fwmem.c optional firewire dev/firewire/fwohci.c optional firewire dev/firewire/fwohci_pci.c optional firewire pci dev/firewire/if_fwe.c optional fwe dev/firewire/if_fwip.c optional fwip dev/firewire/sbp.c optional sbp dev/firewire/sbp_targ.c optional sbp_targ dev/flash/at45d.c optional at45d dev/flash/mx25l.c optional mx25l dev/fxp/if_fxp.c optional fxp dev/fxp/inphy.c optional fxp dev/gem/if_gem.c optional gem dev/gem/if_gem_pci.c optional gem pci dev/gem/if_gem_sbus.c optional gem sbus dev/gpio/gpiobus.c optional gpio \ dependency "gpiobus_if.h" dev/gpio/gpioc.c optional gpio \ dependency "gpio_if.h" dev/gpio/gpioiic.c optional gpioiic dev/gpio/gpioled.c optional gpioled dev/gpio/gpio_if.m optional gpio dev/gpio/gpiobus_if.m optional gpio dev/hatm/if_hatm.c optional hatm pci dev/hatm/if_hatm_intr.c optional hatm pci dev/hatm/if_hatm_ioctl.c optional hatm pci dev/hatm/if_hatm_rx.c optional hatm pci dev/hatm/if_hatm_tx.c optional hatm pci dev/hifn/hifn7751.c optional hifn dev/hme/if_hme.c optional hme dev/hme/if_hme_pci.c optional hme pci dev/hme/if_hme_sbus.c optional hme sbus dev/hptiop/hptiop.c optional hptiop scbus dev/hwpmc/hwpmc_logging.c optional hwpmc dev/hwpmc/hwpmc_mod.c optional hwpmc dev/hwpmc/hwpmc_soft.c optional hwpmc dev/ichsmb/ichsmb.c optional ichsmb dev/ichsmb/ichsmb_pci.c optional ichsmb pci dev/ida/ida.c optional ida dev/ida/ida_disk.c optional ida dev/ida/ida_eisa.c optional ida eisa dev/ida/ida_pci.c optional ida pci dev/ie/if_ie.c optional ie isa nowerror dev/ie/if_ie_isa.c optional ie isa dev/ieee488/ibfoo.c optional pcii | tnt4882 dev/ieee488/pcii.c optional pcii dev/ieee488/tnt4882.c optional tnt4882 dev/ieee488/upd7210.c optional pcii | tnt4882 dev/iicbus/ad7418.c optional ad7418 dev/iicbus/ds133x.c optional ds133x dev/iicbus/ds1374.c optional ds1374 dev/iicbus/ds1672.c optional ds1672 dev/iicbus/icee.c optional icee dev/iicbus/if_ic.c optional ic dev/iicbus/iic.c optional iic dev/iicbus/iicbb.c optional iicbb dev/iicbus/iicbb_if.m optional iicbb dev/iicbus/iicbus.c optional iicbus dev/iicbus/iicbus_if.m optional iicbus dev/iicbus/iiconf.c optional iicbus dev/iicbus/iicsmb.c optional iicsmb \ dependency "iicbus_if.h" dev/iicbus/iicoc.c optional iicoc dev/iicbus/pcf8563.c optional pcf8563 dev/iicbus/s35390a.c optional s35390a dev/iir/iir.c optional iir dev/iir/iir_ctrl.c optional iir dev/iir/iir_pci.c optional iir pci # XXX Work around clang warning, until maintainer approves fix. dev/ips/ips.c optional ips \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/ips/ips_commands.c optional ips dev/ips/ips_disk.c optional ips dev/ips/ips_ioctl.c optional ips dev/ips/ips_pci.c optional ips pci dev/ipw/if_ipw.c optional ipw ipwbssfw.c optional ipwbssfw | ipwfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk ipw_bss.fw:ipw_bss:130 -lintel_ipw -mipw_bss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "ipwbssfw.c" ipw_bss.fwo optional ipwbssfw | ipwfw \ dependency "ipw_bss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ipw_bss.fwo" ipw_bss.fw optional ipwbssfw | ipwfw \ dependency "$S/contrib/dev/ipw/ipw2100-1.3.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_bss.fw" ipwibssfw.c optional ipwibssfw | ipwfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk ipw_ibss.fw:ipw_ibss:130 -lintel_ipw -mipw_ibss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "ipwibssfw.c" ipw_ibss.fwo optional ipwibssfw | ipwfw \ dependency "ipw_ibss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ipw_ibss.fwo" ipw_ibss.fw optional ipwibssfw | ipwfw \ dependency "$S/contrib/dev/ipw/ipw2100-1.3-i.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_ibss.fw" ipwmonitorfw.c optional ipwmonitorfw | ipwfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk ipw_monitor.fw:ipw_monitor:130 -lintel_ipw -mipw_monitor -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "ipwmonitorfw.c" ipw_monitor.fwo optional ipwmonitorfw | ipwfw \ dependency "ipw_monitor.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "ipw_monitor.fwo" ipw_monitor.fw optional ipwmonitorfw | ipwfw \ dependency "$S/contrib/dev/ipw/ipw2100-1.3-p.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_monitor.fw" dev/iscsi_initiator/iscsi.c optional iscsi_initiator scbus dev/iscsi_initiator/iscsi_subr.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_cam.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_soc.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_sm.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_subr.c optional iscsi_initiator scbus dev/isf/isf.c optional isf dev/isf/isf_fdt.c optional isf fdt dev/isf/isf_nexus.c optional isf dev/isp/isp.c optional isp dev/isp/isp_freebsd.c optional isp dev/isp/isp_library.c optional isp dev/isp/isp_pci.c optional isp pci dev/isp/isp_sbus.c optional isp sbus dev/isp/isp_target.c optional isp dev/ispfw/ispfw.c optional ispfw dev/iwi/if_iwi.c optional iwi iwibssfw.c optional iwibssfw | iwifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwi_bss.fw:iwi_bss:300 -lintel_iwi -miwi_bss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwibssfw.c" iwi_bss.fwo optional iwibssfw | iwifw \ dependency "iwi_bss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwi_bss.fwo" iwi_bss.fw optional iwibssfw | iwifw \ dependency "$S/contrib/dev/iwi/ipw2200-bss.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwi_bss.fw" iwiibssfw.c optional iwiibssfw | iwifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwi_ibss.fw:iwi_ibss:300 -lintel_iwi -miwi_ibss -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwiibssfw.c" iwi_ibss.fwo optional iwiibssfw | iwifw \ dependency "iwi_ibss.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwi_ibss.fwo" iwi_ibss.fw optional iwiibssfw | iwifw \ dependency "$S/contrib/dev/iwi/ipw2200-ibss.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwi_ibss.fw" iwimonitorfw.c optional iwimonitorfw | iwifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwi_monitor.fw:iwi_monitor:300 -lintel_iwi -miwi_monitor -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwimonitorfw.c" iwi_monitor.fwo optional iwimonitorfw | iwifw \ dependency "iwi_monitor.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwi_monitor.fwo" iwi_monitor.fw optional iwimonitorfw | iwifw \ dependency "$S/contrib/dev/iwi/ipw2200-sniffer.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwi_monitor.fw" dev/iwn/if_iwn.c optional iwn iwn1000fw.c optional iwn1000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn1000.fw:iwn1000fw -miwn1000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn1000fw.c" iwn1000fw.fwo optional iwn1000fw | iwnfw \ dependency "iwn1000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn1000fw.fwo" iwn1000.fw optional iwn1000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-1000-39.31.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn1000.fw" iwn4965fw.c optional iwn4965fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn4965.fw:iwn4965fw -miwn4965fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn4965fw.c" iwn4965fw.fwo optional iwn4965fw | iwnfw \ dependency "iwn4965.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn4965fw.fwo" iwn4965.fw optional iwn4965fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-4965-228.61.2.24.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn4965.fw" iwn5000fw.c optional iwn5000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn5000.fw:iwn5000fw -miwn5000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn5000fw.c" iwn5000fw.fwo optional iwn5000fw | iwnfw \ dependency "iwn5000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn5000fw.fwo" iwn5000.fw optional iwn5000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-5000-8.83.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn5000.fw" iwn5150fw.c optional iwn5150fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn5150.fw:iwn5150fw -miwn5150fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn5150fw.c" iwn5150fw.fwo optional iwn5150fw | iwnfw \ dependency "iwn5150.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn5150fw.fwo" iwn5150.fw optional iwn5150fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-5150-8.24.2.2.fw.uu"\ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn5150.fw" iwn6000fw.c optional iwn6000fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6000.fw:iwn6000fw -miwn6000fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6000fw.c" iwn6000fw.fwo optional iwn6000fw | iwnfw \ dependency "iwn6000.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6000fw.fwo" iwn6000.fw optional iwn6000fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6000-9.221.4.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6000.fw" iwn6000g2afw.c optional iwn6000g2afw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6000g2a.fw:iwn6000g2afw -miwn6000g2afw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6000g2afw.c" iwn6000g2afw.fwo optional iwn6000g2afw | iwnfw \ dependency "iwn6000g2a.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6000g2afw.fwo" iwn6000g2a.fw optional iwn6000g2afw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6000g2a-17.168.5.2.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6000g2a.fw" iwn6000g2bfw.c optional iwn6000g2bfw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6000g2b.fw:iwn6000g2bfw -miwn6000g2bfw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6000g2bfw.c" iwn6000g2bfw.fwo optional iwn6000g2bfw | iwnfw \ dependency "iwn6000g2b.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6000g2bfw.fwo" iwn6000g2b.fw optional iwn6000g2bfw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6000g2b-17.168.5.2.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6000g2b.fw" iwn6050fw.c optional iwn6050fw | iwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk iwn6050.fw:iwn6050fw -miwn6050fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "iwn6050fw.c" iwn6050fw.fwo optional iwn6050fw | iwnfw \ dependency "iwn6050.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "iwn6050fw.fwo" iwn6050.fw optional iwn6050fw | iwnfw \ dependency "$S/contrib/dev/iwn/iwlwifi-6050-41.28.5.1.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "iwn6050.fw" dev/ixgb/if_ixgb.c optional ixgb dev/ixgb/ixgb_ee.c optional ixgb dev/ixgb/ixgb_hw.c optional ixgb dev/ixgbe/ixgbe.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP -DIXGBE_FDIR" dev/ixgbe/ixv.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_phy.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_api.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_common.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_mbx.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_vf.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_82598.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_82599.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_x540.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb_82598.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb_82599.c optional ixgbe inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/jme/if_jme.c optional jme pci dev/joy/joy.c optional joy dev/joy/joy_isa.c optional joy isa dev/joy/joy_pccard.c optional joy pccard dev/kbdmux/kbdmux.c optional kbdmux dev/ksyms/ksyms.c optional ksyms dev/le/am7990.c optional le dev/le/am79900.c optional le dev/le/if_le_pci.c optional le pci dev/le/lance.c optional le dev/led/led.c standard dev/lge/if_lge.c optional lge dev/lmc/if_lmc.c optional lmc dev/malo/if_malo.c optional malo dev/malo/if_malohal.c optional malo dev/malo/if_malo_pci.c optional malo pci dev/mc146818/mc146818.c optional mc146818 dev/mca/mca_bus.c optional mca dev/mcd/mcd.c optional mcd isa nowerror dev/mcd/mcd_isa.c optional mcd isa nowerror dev/md/md.c optional md dev/mem/memdev.c optional mem dev/mem/memutil.c optional mem dev/mfi/mfi.c optional mfi dev/mfi/mfi_debug.c optional mfi dev/mfi/mfi_pci.c optional mfi pci dev/mfi/mfi_disk.c optional mfi dev/mfi/mfi_syspd.c optional mfi dev/mfi/mfi_tbolt.c optional mfi dev/mfi/mfi_linux.c optional mfi compat_linux dev/mfi/mfi_cam.c optional mfip scbus dev/mii/acphy.c optional miibus | acphy dev/mii/amphy.c optional miibus | amphy dev/mii/atphy.c optional miibus | atphy dev/mii/axphy.c optional miibus | axphy dev/mii/bmtphy.c optional miibus | bmtphy dev/mii/brgphy.c optional miibus | brgphy dev/mii/ciphy.c optional miibus | ciphy dev/mii/e1000phy.c optional miibus | e1000phy dev/mii/gentbi.c optional miibus | gentbi dev/mii/icsphy.c optional miibus | icsphy dev/mii/ip1000phy.c optional miibus | ip1000phy dev/mii/jmphy.c optional miibus | jmphy dev/mii/lxtphy.c optional miibus | lxtphy dev/mii/mii.c optional miibus | mii dev/mii/mii_bitbang.c optional miibus | mii_bitbang dev/mii/mii_physubr.c optional miibus | mii dev/mii/miibus_if.m optional miibus | mii dev/mii/mlphy.c optional miibus | mlphy dev/mii/nsgphy.c optional miibus | nsgphy dev/mii/nsphy.c optional miibus | nsphy dev/mii/nsphyter.c optional miibus | nsphyter dev/mii/pnaphy.c optional miibus | pnaphy dev/mii/qsphy.c optional miibus | qsphy dev/mii/rdcphy.c optional miibus | rdcphy dev/mii/rgephy.c optional miibus | rgephy dev/mii/rlphy.c optional miibus | rlphy dev/mii/rlswitch.c optional rlswitch dev/mii/smcphy.c optional miibus | smcphy dev/mii/smscphy.c optional miibus | smscphy dev/mii/tdkphy.c optional miibus | tdkphy dev/mii/tlphy.c optional miibus | tlphy dev/mii/truephy.c optional miibus | truephy dev/mii/ukphy.c optional miibus | mii dev/mii/ukphy_subr.c optional miibus | mii dev/mii/xmphy.c optional miibus | xmphy dev/mk48txx/mk48txx.c optional mk48txx dev/mlx/mlx.c optional mlx dev/mlx/mlx_disk.c optional mlx dev/mlx/mlx_pci.c optional mlx pci dev/mly/mly.c optional mly dev/mmc/mmc.c optional mmc dev/mmc/mmcbr_if.m standard dev/mmc/mmcbus_if.m standard dev/mmc/mmcsd.c optional mmcsd dev/mn/if_mn.c optional mn pci dev/mps/mps.c optional mps dev/mps/mps_config.c optional mps # XXX Work around clang warning, until maintainer approves fix. dev/mps/mps_mapping.c optional mps \ compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}" dev/mps/mps_pci.c optional mps pci dev/mps/mps_sas.c optional mps \ compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}" dev/mps/mps_sas_lsi.c optional mps dev/mps/mps_table.c optional mps dev/mps/mps_user.c optional mps dev/mpt/mpt.c optional mpt dev/mpt/mpt_cam.c optional mpt dev/mpt/mpt_debug.c optional mpt dev/mpt/mpt_pci.c optional mpt pci dev/mpt/mpt_raid.c optional mpt dev/mpt/mpt_user.c optional mpt dev/msk/if_msk.c optional msk dev/mvs/mvs.c optional mvs dev/mvs/mvs_if.m optional mvs dev/mvs/mvs_pci.c optional mvs pci dev/mwl/if_mwl.c optional mwl dev/mwl/if_mwl_pci.c optional mwl pci dev/mwl/mwlhal.c optional mwl mwlfw.c optional mwlfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk mw88W8363.fw:mw88W8363fw mwlboot.fw:mwlboot -mmwl -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "mwlfw.c" mw88W8363.fwo optional mwlfw \ dependency "mw88W8363.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "mw88W8363.fwo" mw88W8363.fw optional mwlfw \ dependency "$S/contrib/dev/mwl/mw88W8363.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "mw88W8363.fw" mwlboot.fwo optional mwlfw \ dependency "mwlboot.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "mwlboot.fwo" mwlboot.fw optional mwlfw \ dependency "$S/contrib/dev/mwl/mwlboot.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "mwlboot.fw" dev/mxge/if_mxge.c optional mxge pci dev/mxge/mxge_eth_z8e.c optional mxge pci dev/mxge/mxge_ethp_z8e.c optional mxge pci dev/mxge/mxge_rss_eth_z8e.c optional mxge pci dev/mxge/mxge_rss_ethp_z8e.c optional mxge pci dev/my/if_my.c optional my dev/nand/nand.c optional nand dev/nand/nand_bbt.c optional nand dev/nand/nand_cdev.c optional nand dev/nand/nand_generic.c optional nand dev/nand/nand_geom.c optional nand dev/nand/nand_id.c optional nand dev/nand/nandbus.c optional nand dev/nand/nandbus_if.m optional nand dev/nand/nand_if.m optional nand dev/nand/nandsim.c optional nandsim nand dev/nand/nandsim_chip.c optional nandsim nand dev/nand/nandsim_ctrl.c optional nandsim nand dev/nand/nandsim_log.c optional nandsim nand dev/nand/nandsim_swap.c optional nandsim nand dev/nand/nfc_if.m optional nand dev/ncv/ncr53c500.c optional ncv dev/ncv/ncr53c500_pccard.c optional ncv pccard dev/netmap/netmap.c optional netmap dev/nge/if_nge.c optional nge dev/nxge/if_nxge.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-device.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-mm.c optional nxge dev/nxge/xgehal/xge-queue.c optional nxge dev/nxge/xgehal/xgehal-driver.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-ring.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-channel.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-fifo.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-stats.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nxge/xgehal/xgehal-config.c optional nxge dev/nxge/xgehal/xgehal-mgmt.c optional nxge \ compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}" dev/nmdm/nmdm.c optional nmdm dev/nsp/nsp.c optional nsp dev/nsp/nsp_pccard.c optional nsp pccard dev/null/null.c standard dev/oce/oce_hw.c optional oce pci dev/oce/oce_if.c optional oce pci dev/oce/oce_mbox.c optional oce pci dev/oce/oce_queue.c optional oce pci dev/oce/oce_sysctl.c optional oce pci dev/oce/oce_util.c optional oce pci dev/ofw/ofw_bus_if.m optional fdt dev/ofw/ofw_bus_subr.c optional fdt dev/ofw/ofw_fdt.c optional fdt dev/ofw/ofw_if.m optional fdt dev/ofw/openfirm.c optional fdt dev/ofw/openfirmio.c optional fdt dev/patm/if_patm.c optional patm pci dev/patm/if_patm_attach.c optional patm pci dev/patm/if_patm_intr.c optional patm pci dev/patm/if_patm_ioctl.c optional patm pci dev/patm/if_patm_rtables.c optional patm pci dev/patm/if_patm_rx.c optional patm pci dev/patm/if_patm_tx.c optional patm pci dev/pbio/pbio.c optional pbio isa dev/pccard/card_if.m standard dev/pccard/pccard.c optional pccard dev/pccard/pccard_cis.c optional pccard dev/pccard/pccard_cis_quirks.c optional pccard dev/pccard/pccard_device.c optional pccard dev/pccard/power_if.m standard dev/pccbb/pccbb.c optional cbb dev/pccbb/pccbb_isa.c optional cbb isa dev/pccbb/pccbb_pci.c optional cbb pci dev/pcf/pcf.c optional pcf dev/pci/eisa_pci.c optional pci eisa dev/pci/fixup_pci.c optional pci dev/pci/hostb_pci.c optional pci dev/pci/ignore_pci.c optional pci dev/pci/isa_pci.c optional pci isa dev/pci/pci.c optional pci dev/pci/pci_if.m standard dev/pci/pci_pci.c optional pci dev/pci/pci_subr.c optional pci dev/pci/pci_user.c optional pci dev/pci/pcib_if.m standard dev/pci/vga_pci.c optional pci dev/pcn/if_pcn.c optional pcn pci dev/pdq/if_fea.c optional fea eisa dev/pdq/if_fpa.c optional fpa pci dev/pdq/pdq.c optional nowerror fea eisa | fpa pci dev/pdq/pdq_ifsubr.c optional nowerror fea eisa | fpa pci dev/ppbus/if_plip.c optional plip dev/ppbus/immio.c optional vpo dev/ppbus/lpbb.c optional lpbb dev/ppbus/lpt.c optional lpt dev/ppbus/pcfclock.c optional pcfclock dev/ppbus/ppb_1284.c optional ppbus dev/ppbus/ppb_base.c optional ppbus dev/ppbus/ppb_msq.c optional ppbus dev/ppbus/ppbconf.c optional ppbus dev/ppbus/ppbus_if.m optional ppbus dev/ppbus/ppi.c optional ppi dev/ppbus/pps.c optional pps dev/ppbus/vpo.c optional vpo dev/ppbus/vpoio.c optional vpo dev/ppc/ppc.c optional ppc dev/ppc/ppc_acpi.c optional ppc acpi dev/ppc/ppc_isa.c optional ppc isa dev/ppc/ppc_pci.c optional ppc pci dev/ppc/ppc_puc.c optional ppc puc dev/pst/pst-iop.c optional pst dev/pst/pst-pci.c optional pst pci dev/pst/pst-raid.c optional pst dev/pty/pty.c optional pty dev/puc/puc.c optional puc dev/puc/puc_cfg.c optional puc dev/puc/puc_pccard.c optional puc pccard dev/puc/puc_pci.c optional puc pci dev/puc/pucdata.c optional puc pci dev/quicc/quicc_core.c optional quicc dev/ral/rt2560.c optional ral dev/ral/rt2661.c optional ral dev/ral/rt2860.c optional ral dev/ral/if_ral_pci.c optional ral pci rt2561fw.c optional rt2561fw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2561.fw:rt2561fw -mrt2561 -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2561fw.c" rt2561fw.fwo optional rt2561fw | ralfw \ dependency "rt2561.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2561fw.fwo" rt2561.fw optional rt2561fw | ralfw \ dependency "$S/contrib/dev/ral/rt2561.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2561.fw" rt2561sfw.c optional rt2561sfw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2561s.fw:rt2561sfw -mrt2561s -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2561sfw.c" rt2561sfw.fwo optional rt2561sfw | ralfw \ dependency "rt2561s.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2561sfw.fwo" rt2561s.fw optional rt2561sfw | ralfw \ dependency "$S/contrib/dev/ral/rt2561s.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2561s.fw" rt2661fw.c optional rt2661fw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2661.fw:rt2661fw -mrt2661 -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2661fw.c" rt2661fw.fwo optional rt2661fw | ralfw \ dependency "rt2661.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2661fw.fwo" rt2661.fw optional rt2661fw | ralfw \ dependency "$S/contrib/dev/ral/rt2661.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2661.fw" rt2860fw.c optional rt2860fw | ralfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rt2860.fw:rt2860fw -mrt2860 -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rt2860fw.c" rt2860fw.fwo optional rt2860fw | ralfw \ dependency "rt2860.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rt2860fw.fwo" rt2860.fw optional rt2860fw | ralfw \ dependency "$S/contrib/dev/ral/rt2860.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rt2860.fw" dev/random/harvest.c standard dev/random/hash.c optional random dev/random/probe.c optional random dev/random/random_adaptors.c standard dev/random/randomdev.c optional random dev/random/randomdev_soft.c optional random dev/random/yarrow.c optional random dev/rc/rc.c optional rc dev/re/if_re.c optional re dev/rndtest/rndtest.c optional rndtest dev/rp/rp.c optional rp dev/rp/rp_isa.c optional rp isa dev/rp/rp_pci.c optional rp pci dev/safe/safe.c optional safe dev/scc/scc_if.m optional scc dev/scc/scc_bfe_ebus.c optional scc ebus dev/scc/scc_bfe_quicc.c optional scc quicc dev/scc/scc_bfe_sbus.c optional scc fhc | scc sbus dev/scc/scc_core.c optional scc dev/scc/scc_dev_quicc.c optional scc quicc dev/scc/scc_dev_sab82532.c optional scc dev/scc/scc_dev_z8530.c optional scc dev/scd/scd.c optional scd isa dev/scd/scd_isa.c optional scd isa dev/sdhci/sdhci.c optional sdhci dev/sdhci/sdhci_if.m optional sdhci dev/sdhci/sdhci_pci.c optional sdhci pci dev/sf/if_sf.c optional sf pci dev/sge/if_sge.c optional sge pci dev/si/si.c optional si dev/si/si2_z280.c optional si dev/si/si3_t225.c optional si dev/si/si_eisa.c optional si eisa dev/si/si_isa.c optional si isa dev/si/si_pci.c optional si pci dev/siba/siba.c optional siba dev/siba/siba_bwn.c optional siba_bwn pci dev/siba/siba_cc.c optional siba dev/siba/siba_core.c optional siba | siba_bwn pci dev/siba/siba_pcib.c optional siba pci dev/siis/siis.c optional siis pci dev/sis/if_sis.c optional sis pci dev/sk/if_sk.c optional sk pci dev/smbus/smb.c optional smb dev/smbus/smbconf.c optional smbus dev/smbus/smbus.c optional smbus dev/smbus/smbus_if.m optional smbus dev/smc/if_smc.c optional smc dev/sn/if_sn.c optional sn dev/sn/if_sn_isa.c optional sn isa dev/sn/if_sn_pccard.c optional sn pccard dev/snp/snp.c optional snp dev/sound/clone.c optional sound dev/sound/unit.c optional sound dev/sound/isa/ad1816.c optional snd_ad1816 isa dev/sound/isa/ess.c optional snd_ess isa dev/sound/isa/gusc.c optional snd_gusc isa dev/sound/isa/mss.c optional snd_mss isa dev/sound/isa/sb16.c optional snd_sb16 isa dev/sound/isa/sb8.c optional snd_sb8 isa dev/sound/isa/sbc.c optional snd_sbc isa dev/sound/isa/sndbuf_dma.c optional sound isa dev/sound/pci/als4000.c optional snd_als4000 pci dev/sound/pci/atiixp.c optional snd_atiixp pci dev/sound/pci/cmi.c optional snd_cmi pci dev/sound/pci/cs4281.c optional snd_cs4281 pci dev/sound/pci/csa.c optional snd_csa pci dev/sound/pci/csapcm.c optional snd_csa pci dev/sound/pci/ds1.c optional snd_ds1 pci dev/sound/pci/emu10k1.c optional snd_emu10k1 pci dev/sound/pci/emu10kx.c optional snd_emu10kx pci dev/sound/pci/emu10kx-pcm.c optional snd_emu10kx pci dev/sound/pci/emu10kx-midi.c optional snd_emu10kx pci dev/sound/pci/envy24.c optional snd_envy24 pci dev/sound/pci/envy24ht.c optional snd_envy24ht pci dev/sound/pci/es137x.c optional snd_es137x pci dev/sound/pci/fm801.c optional snd_fm801 pci dev/sound/pci/ich.c optional snd_ich pci dev/sound/pci/maestro.c optional snd_maestro pci dev/sound/pci/maestro3.c optional snd_maestro3 pci dev/sound/pci/neomagic.c optional snd_neomagic pci dev/sound/pci/solo.c optional snd_solo pci dev/sound/pci/spicds.c optional snd_spicds pci dev/sound/pci/t4dwave.c optional snd_t4dwave pci dev/sound/pci/via8233.c optional snd_via8233 pci dev/sound/pci/via82c686.c optional snd_via82c686 pci dev/sound/pci/vibes.c optional snd_vibes pci dev/sound/pci/hda/hdaa.c optional snd_hda pci dev/sound/pci/hda/hdaa_patches.c optional snd_hda pci dev/sound/pci/hda/hdac.c optional snd_hda pci dev/sound/pci/hda/hdac_if.m optional snd_hda pci dev/sound/pci/hda/hdacc.c optional snd_hda pci dev/sound/pci/hdspe.c optional snd_hdspe pci dev/sound/pci/hdspe-pcm.c optional snd_hdspe pci dev/sound/pcm/ac97.c optional sound dev/sound/pcm/ac97_if.m optional sound dev/sound/pcm/ac97_patch.c optional sound dev/sound/pcm/buffer.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/channel.c optional sound dev/sound/pcm/channel_if.m optional sound dev/sound/pcm/dsp.c optional sound dev/sound/pcm/feeder.c optional sound dev/sound/pcm/feeder_chain.c optional sound dev/sound/pcm/feeder_eq.c optional sound \ dependency "feeder_eq_gen.h" \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_if.m optional sound dev/sound/pcm/feeder_format.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_matrix.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_mixer.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_rate.c optional sound \ dependency "feeder_rate_gen.h" \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/feeder_volume.c optional sound \ dependency "snd_fxdiv_gen.h" dev/sound/pcm/mixer.c optional sound dev/sound/pcm/mixer_if.m optional sound dev/sound/pcm/sndstat.c optional sound dev/sound/pcm/sound.c optional sound dev/sound/pcm/vchan.c optional sound dev/sound/usb/uaudio.c optional snd_uaudio usb dev/sound/usb/uaudio_pcm.c optional snd_uaudio usb dev/sound/midi/midi.c optional sound dev/sound/midi/mpu401.c optional sound dev/sound/midi/mpu_if.m optional sound dev/sound/midi/mpufoi_if.m optional sound dev/sound/midi/sequencer.c optional sound dev/sound/midi/synth_if.m optional sound dev/spibus/spibus.c optional spibus \ dependency "spibus_if.h" dev/spibus/spibus_if.m optional spibus dev/ste/if_ste.c optional ste pci dev/stg/tmc18c30.c optional stg dev/stg/tmc18c30_isa.c optional stg isa dev/stg/tmc18c30_pccard.c optional stg pccard dev/stg/tmc18c30_pci.c optional stg pci dev/stg/tmc18c30_subr.c optional stg dev/stge/if_stge.c optional stge dev/streams/streams.c optional streams dev/sym/sym_hipd.c optional sym \ dependency "$S/dev/sym/sym_{conf,defs}.h" dev/syscons/blank/blank_saver.c optional blank_saver dev/syscons/daemon/daemon_saver.c optional daemon_saver dev/syscons/dragon/dragon_saver.c optional dragon_saver dev/syscons/fade/fade_saver.c optional fade_saver dev/syscons/fire/fire_saver.c optional fire_saver dev/syscons/green/green_saver.c optional green_saver dev/syscons/logo/logo.c optional logo_saver dev/syscons/logo/logo_saver.c optional logo_saver dev/syscons/rain/rain_saver.c optional rain_saver dev/syscons/schistory.c optional sc dev/syscons/scmouse.c optional sc dev/syscons/scterm.c optional sc dev/syscons/scvidctl.c optional sc dev/syscons/snake/snake_saver.c optional snake_saver dev/syscons/star/star_saver.c optional star_saver dev/syscons/syscons.c optional sc dev/syscons/sysmouse.c optional sc dev/syscons/warp/warp_saver.c optional warp_saver dev/tdfx/tdfx_linux.c optional tdfx_linux tdfx compat_linux dev/tdfx/tdfx_pci.c optional tdfx pci dev/ti/if_ti.c optional ti pci dev/tl/if_tl.c optional tl pci dev/trm/trm.c optional trm dev/twa/tw_cl_init.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_cl_intr.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_cl_io.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_cl_misc.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_osl_cam.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twa/tw_osl_freebsd.c optional twa \ compile-with "${NORMAL_C} -I$S/dev/twa" dev/twe/twe.c optional twe dev/twe/twe_freebsd.c optional twe dev/tws/tws.c optional tws dev/tws/tws_cam.c optional tws dev/tws/tws_hdm.c optional tws dev/tws/tws_services.c optional tws dev/tws/tws_user.c optional tws dev/tx/if_tx.c optional tx dev/txp/if_txp.c optional txp dev/uart/uart_bus_acpi.c optional uart acpi #dev/uart/uart_bus_cbus.c optional uart cbus dev/uart/uart_bus_ebus.c optional uart ebus dev/uart/uart_bus_fdt.c optional uart fdt dev/uart/uart_bus_isa.c optional uart isa dev/uart/uart_bus_pccard.c optional uart pccard dev/uart/uart_bus_pci.c optional uart pci dev/uart/uart_bus_puc.c optional uart puc dev/uart/uart_bus_scc.c optional uart scc dev/uart/uart_core.c optional uart dev/uart/uart_dbg.c optional uart gdb dev/uart/uart_dev_ns8250.c optional uart uart_ns8250 dev/uart/uart_dev_pl011.c optional uart pl011 dev/uart/uart_dev_quicc.c optional uart quicc dev/uart/uart_dev_sab82532.c optional uart uart_sab82532 dev/uart/uart_dev_sab82532.c optional uart scc dev/uart/uart_dev_z8530.c optional uart uart_z8530 dev/uart/uart_dev_z8530.c optional uart scc dev/uart/uart_if.m optional uart dev/uart/uart_subr.c optional uart dev/uart/uart_tty.c optional uart dev/ubsec/ubsec.c optional ubsec # # USB controller drivers # dev/usb/controller/at91dci.c optional at91dci dev/usb/controller/at91dci_atmelarm.c optional at91dci at91rm9200 dev/usb/controller/musb_otg.c optional musb dev/usb/controller/musb_otg_atmelarm.c optional musb at91rm9200 dev/usb/controller/dwc_otg.c optional dwcotg dev/usb/controller/ehci.c optional ehci dev/usb/controller/ehci_pci.c optional ehci pci dev/usb/controller/ohci.c optional ohci dev/usb/controller/ohci_atmelarm.c optional ohci at91rm9200 dev/usb/controller/ohci_pci.c optional ohci pci dev/usb/controller/uhci.c optional uhci dev/usb/controller/uhci_pci.c optional uhci pci dev/usb/controller/xhci.c optional xhci dev/usb/controller/xhci_pci.c optional xhci pci dev/usb/controller/uss820dci.c optional uss820dci dev/usb/controller/uss820dci_atmelarm.c optional uss820dci at91rm9200 dev/usb/controller/usb_controller.c optional usb # # USB storage drivers # dev/usb/storage/umass.c optional umass dev/usb/storage/urio.c optional urio dev/usb/storage/ustorage_fs.c optional usfs # # USB core # dev/usb/usb_busdma.c optional usb dev/usb/usb_compat_linux.c optional usb dev/usb/usb_core.c optional usb dev/usb/usb_debug.c optional usb dev/usb/usb_dev.c optional usb dev/usb/usb_device.c optional usb dev/usb/usb_dynamic.c optional usb dev/usb/usb_error.c optional usb dev/usb/usb_generic.c optional usb dev/usb/usb_handle_request.c optional usb dev/usb/usb_hid.c optional usb dev/usb/usb_hub.c optional usb dev/usb/usb_if.m optional usb dev/usb/usb_lookup.c optional usb dev/usb/usb_mbuf.c optional usb dev/usb/usb_msctest.c optional usb dev/usb/usb_parse.c optional usb dev/usb/usb_pf.c optional usb dev/usb/usb_process.c optional usb dev/usb/usb_request.c optional usb dev/usb/usb_transfer.c optional usb dev/usb/usb_util.c optional usb # # USB network drivers # dev/usb/net/if_aue.c optional aue dev/usb/net/if_axe.c optional axe dev/usb/net/if_cdce.c optional cdce dev/usb/net/if_cue.c optional cue dev/usb/net/if_ipheth.c optional ipheth dev/usb/net/if_kue.c optional kue dev/usb/net/if_mos.c optional mos dev/usb/net/if_rue.c optional rue dev/usb/net/if_smsc.c optional smsc dev/usb/net/if_udav.c optional udav dev/usb/net/if_usie.c optional usie dev/usb/net/ruephy.c optional rue dev/usb/net/usb_ethernet.c optional aue | axe | cdce | cue | kue | mos | \ rue | smsc | udav | ipheth dev/usb/net/uhso.c optional uhso # # USB WLAN drivers # dev/usb/wlan/if_rsu.c optional rsu rsu-rtl8712fw.c optional rsu-rtl8712fw | rsufw \ compile-with "${AWK} -f $S/tools/fw_stub.awk rsu-rtl8712fw.fw:rsu-rtl8712fw:120 -mrsu-rtl8712fw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "rsu-rtl8712fw.c" rsu-rtl8712fw.fwo optional rsu-rtl8712fw | rsufw \ dependency "rsu-rtl8712fw.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "rsu-rtl8712fw.fwo" rsu-rtl8712fw.fw optional rsu-rtl8712.fw | rsufw \ dependency "$S/contrib/dev/rsu/rsu-rtl8712fw.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "rsu-rtl8712fw.fw" dev/usb/wlan/if_rum.c optional rum dev/usb/wlan/if_run.c optional run runfw.c optional runfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk run.fw:runfw -mrunfw -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "runfw.c" runfw.fwo optional runfw \ dependency "run.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "runfw.fwo" run.fw optional runfw \ dependency "$S/contrib/dev/run/rt2870.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "run.fw" dev/usb/wlan/if_uath.c optional uath dev/usb/wlan/if_upgt.c optional upgt dev/usb/wlan/if_ural.c optional ural dev/usb/wlan/if_urtw.c optional urtw dev/usb/wlan/if_urtwn.c optional urtwn urtwn-rtl8192cfwT.c optional urtwn-rtl8192cfwT | urtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwT.fw:urtwn-rtl8192cfwT:111 -murtwn-rtl8192cfwT -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "urtwn-rtl8192cfwT.c" urtwn-rtl8192cfwT.fwo optional urtwn-rtl8192cfwT | urtwnfw \ dependency "urtwn-rtl8192cfwT.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "urtwn-rtl8192cfwT.fwo" urtwn-rtl8192cfwT.fw optional urtwn-rtl8192cfwT | urtwnfw \ dependency "$S/contrib/dev/urtwn/urtwn-rtl8192cfwT.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "urtwn-rtl8192cfwT.fw" urtwn-rtl8192cfwU.c optional urtwn-rtl8192cfwU | urtwnfw \ compile-with "${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwU.fw:urtwn-rtl8192cfwU:111 -murtwn-rtl8192cfwU -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "urtwn-rtl8192cfwU.c" urtwn-rtl8192cfwU.fwo optional urtwn-rtl8192cfwU | urtwnfw \ dependency "urtwn-rtl8192cfwU.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "urtwn-rtl8192cfwU.fwo" urtwn-rtl8192cfwU.fw optional urtwn-rtl8192cfwU | urtwnfw \ dependency "$S/contrib/dev/urtwn/urtwn-rtl8192cfwU.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "urtwn-rtl8192cfwU.fw" dev/usb/wlan/if_zyd.c optional zyd # # USB serial and parallel port drivers # dev/usb/serial/u3g.c optional u3g dev/usb/serial/uark.c optional uark dev/usb/serial/ubsa.c optional ubsa dev/usb/serial/ubser.c optional ubser dev/usb/serial/uchcom.c optional uchcom dev/usb/serial/ucycom.c optional ucycom dev/usb/serial/ufoma.c optional ufoma dev/usb/serial/uftdi.c optional uftdi dev/usb/serial/ugensa.c optional ugensa dev/usb/serial/uipaq.c optional uipaq dev/usb/serial/ulpt.c optional ulpt dev/usb/serial/umcs.c optional umcs dev/usb/serial/umct.c optional umct dev/usb/serial/umodem.c optional umodem dev/usb/serial/umoscom.c optional umoscom dev/usb/serial/uplcom.c optional uplcom dev/usb/serial/uslcom.c optional uslcom dev/usb/serial/uvisor.c optional uvisor dev/usb/serial/uvscom.c optional uvscom dev/usb/serial/usb_serial.c optional ucom | u3g | uark | ubsa | ubser | \ uchcom | ucycom | ufoma | uftdi | \ ugensa | uipaq | umcs | umct | \ umodem | umoscom | uplcom | usie | \ uslcom | uvisor | uvscom # # USB misc drivers # dev/usb/misc/ufm.c optional ufm dev/usb/misc/udbp.c optional udbp # # USB input drivers # dev/usb/input/atp.c optional atp dev/usb/input/uep.c optional uep dev/usb/input/uhid.c optional uhid dev/usb/input/ukbd.c optional ukbd dev/usb/input/ums.c optional ums # # USB quirks # dev/usb/quirk/usb_quirk.c optional usb # # USB templates # dev/usb/template/usb_template.c optional usb_template dev/usb/template/usb_template_audio.c optional usb_template dev/usb/template/usb_template_cdce.c optional usb_template dev/usb/template/usb_template_kbd.c optional usb_template dev/usb/template/usb_template_modem.c optional usb_template dev/usb/template/usb_template_mouse.c optional usb_template dev/usb/template/usb_template_msc.c optional usb_template dev/usb/template/usb_template_mtp.c optional usb_template # # USB END # dev/utopia/idtphy.c optional utopia dev/utopia/suni.c optional utopia dev/utopia/utopia.c optional utopia dev/vge/if_vge.c optional vge dev/vkbd/vkbd.c optional vkbd dev/vr/if_vr.c optional vr pci dev/vte/if_vte.c optional vte pci dev/vx/if_vx.c optional vx dev/vx/if_vx_eisa.c optional vx eisa dev/vx/if_vx_pci.c optional vx pci dev/vxge/vxge.c optional vxge dev/vxge/vxgehal/vxgehal-ifmsg.c optional vxge dev/vxge/vxgehal/vxgehal-mrpcim.c optional vxge dev/vxge/vxgehal/vxge-queue.c optional vxge dev/vxge/vxgehal/vxgehal-ring.c optional vxge dev/vxge/vxgehal/vxgehal-swapper.c optional vxge dev/vxge/vxgehal/vxgehal-mgmt.c optional vxge dev/vxge/vxgehal/vxgehal-srpcim.c optional vxge dev/vxge/vxgehal/vxgehal-config.c optional vxge dev/vxge/vxgehal/vxgehal-blockpool.c optional vxge dev/vxge/vxgehal/vxgehal-doorbells.c optional vxge dev/vxge/vxgehal/vxgehal-mgmtaux.c optional vxge dev/vxge/vxgehal/vxgehal-device.c optional vxge dev/vxge/vxgehal/vxgehal-mm.c optional vxge dev/vxge/vxgehal/vxgehal-driver.c optional vxge dev/vxge/vxgehal/vxgehal-virtualpath.c optional vxge dev/vxge/vxgehal/vxgehal-channel.c optional vxge dev/vxge/vxgehal/vxgehal-fifo.c optional vxge dev/watchdog/watchdog.c standard dev/wb/if_wb.c optional wb pci dev/wds/wd7000.c optional wds isa dev/wi/if_wi.c optional wi dev/wi/if_wi_pccard.c optional wi pccard dev/wi/if_wi_pci.c optional wi pci dev/wl/if_wl.c optional wl isa dev/wpi/if_wpi.c optional wpi pci wpifw.c optional wpifw \ compile-with "${AWK} -f $S/tools/fw_stub.awk wpi.fw:wpifw:153229 -mwpi -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "wpifw.c" wpifw.fwo optional wpifw \ dependency "wpi.fw" \ compile-with "${NORMAL_FWO}" \ no-implicit-rule \ clean "wpifw.fwo" wpi.fw optional wpifw \ dependency "$S/contrib/dev/wpi/iwlwifi-3945-15.32.2.9.fw.uu" \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "wpi.fw" dev/xe/if_xe.c optional xe dev/xe/if_xe_pccard.c optional xe pccard dev/xen/balloon/balloon.c optional xen | xenhvm dev/xen/blkfront/blkfront.c optional xen | xenhvm dev/xen/blkback/blkback.c optional xen | xenhvm dev/xen/console/console.c optional xen dev/xen/console/xencons_ring.c optional xen dev/xen/control/control.c optional xen | xenhvm dev/xen/netback/netback.c optional xen | xenhvm dev/xen/netfront/netfront.c optional xen | xenhvm dev/xen/xenpci/xenpci.c optional xenpci -dev/xen/xenpci/evtchn.c optional xenpci dev/xl/if_xl.c optional xl pci dev/xl/xlphy.c optional xl pci fs/deadfs/dead_vnops.c standard fs/devfs/devfs_devs.c standard fs/devfs/devfs_dir.c standard fs/devfs/devfs_rule.c standard fs/devfs/devfs_vfsops.c standard fs/devfs/devfs_vnops.c standard fs/fdescfs/fdesc_vfsops.c optional fdescfs fs/fdescfs/fdesc_vnops.c optional fdescfs fs/fifofs/fifo_vnops.c standard fs/fuse/fuse_device.c optional fuse fs/fuse/fuse_file.c optional fuse fs/fuse/fuse_internal.c optional fuse fs/fuse/fuse_io.c optional fuse fs/fuse/fuse_ipc.c optional fuse fs/fuse/fuse_main.c optional fuse fs/fuse/fuse_node.c optional fuse fs/fuse/fuse_vfsops.c optional fuse fs/fuse/fuse_vnops.c optional fuse fs/msdosfs/msdosfs_conv.c optional msdosfs fs/msdosfs/msdosfs_denode.c optional msdosfs fs/msdosfs/msdosfs_fat.c optional msdosfs fs/msdosfs/msdosfs_fileno.c optional msdosfs fs/msdosfs/msdosfs_iconv.c optional msdosfs_iconv fs/msdosfs/msdosfs_lookup.c optional msdosfs fs/msdosfs/msdosfs_vfsops.c optional msdosfs fs/msdosfs/msdosfs_vnops.c optional msdosfs fs/nandfs/bmap.c optional nandfs fs/nandfs/nandfs_alloc.c optional nandfs fs/nandfs/nandfs_bmap.c optional nandfs fs/nandfs/nandfs_buffer.c optional nandfs fs/nandfs/nandfs_cleaner.c optional nandfs fs/nandfs/nandfs_cpfile.c optional nandfs fs/nandfs/nandfs_dat.c optional nandfs fs/nandfs/nandfs_dir.c optional nandfs fs/nandfs/nandfs_ifile.c optional nandfs fs/nandfs/nandfs_segment.c optional nandfs fs/nandfs/nandfs_subr.c optional nandfs fs/nandfs/nandfs_sufile.c optional nandfs fs/nandfs/nandfs_vfsops.c optional nandfs fs/nandfs/nandfs_vnops.c optional nandfs fs/nfs/nfs_commonkrpc.c optional nfscl | nfsd fs/nfs/nfs_commonsubs.c optional nfscl | nfsd fs/nfs/nfs_commonport.c optional nfscl | nfsd fs/nfs/nfs_commonacl.c optional nfscl | nfsd fs/nfsclient/nfs_clcomsubs.c optional nfscl fs/nfsclient/nfs_clsubs.c optional nfscl fs/nfsclient/nfs_clstate.c optional nfscl fs/nfsclient/nfs_clkrpc.c optional nfscl fs/nfsclient/nfs_clrpcops.c optional nfscl fs/nfsclient/nfs_clvnops.c optional nfscl fs/nfsclient/nfs_clnode.c optional nfscl fs/nfsclient/nfs_clvfsops.c optional nfscl fs/nfsclient/nfs_clport.c optional nfscl fs/nfsclient/nfs_clbio.c optional nfscl fs/nfsclient/nfs_clnfsiod.c optional nfscl fs/nfsserver/nfs_fha_new.c optional nfsd inet fs/nfsserver/nfs_nfsdsocket.c optional nfsd inet fs/nfsserver/nfs_nfsdsubs.c optional nfsd inet fs/nfsserver/nfs_nfsdstate.c optional nfsd inet fs/nfsserver/nfs_nfsdkrpc.c optional nfsd inet fs/nfsserver/nfs_nfsdserv.c optional nfsd inet fs/nfsserver/nfs_nfsdport.c optional nfsd inet fs/nfsserver/nfs_nfsdcache.c optional nfsd inet fs/nullfs/null_subr.c optional nullfs fs/nullfs/null_vfsops.c optional nullfs fs/nullfs/null_vnops.c optional nullfs fs/procfs/procfs.c optional procfs fs/procfs/procfs_ctl.c optional procfs fs/procfs/procfs_dbregs.c optional procfs fs/procfs/procfs_fpregs.c optional procfs fs/procfs/procfs_ioctl.c optional procfs fs/procfs/procfs_map.c optional procfs fs/procfs/procfs_mem.c optional procfs fs/procfs/procfs_note.c optional procfs fs/procfs/procfs_osrel.c optional procfs fs/procfs/procfs_regs.c optional procfs fs/procfs/procfs_rlimit.c optional procfs fs/procfs/procfs_status.c optional procfs fs/procfs/procfs_type.c optional procfs fs/pseudofs/pseudofs.c optional pseudofs fs/pseudofs/pseudofs_fileno.c optional pseudofs fs/pseudofs/pseudofs_vncache.c optional pseudofs fs/pseudofs/pseudofs_vnops.c optional pseudofs fs/smbfs/smbfs_io.c optional smbfs fs/smbfs/smbfs_node.c optional smbfs fs/smbfs/smbfs_smb.c optional smbfs fs/smbfs/smbfs_subr.c optional smbfs fs/smbfs/smbfs_vfsops.c optional smbfs fs/smbfs/smbfs_vnops.c optional smbfs fs/udf/osta.c optional udf fs/udf/udf_iconv.c optional udf_iconv fs/udf/udf_vfsops.c optional udf fs/udf/udf_vnops.c optional udf fs/unionfs/union_subr.c optional unionfs fs/unionfs/union_vfsops.c optional unionfs fs/unionfs/union_vnops.c optional unionfs fs/tmpfs/tmpfs_vnops.c optional tmpfs fs/tmpfs/tmpfs_fifoops.c optional tmpfs fs/tmpfs/tmpfs_vfsops.c optional tmpfs fs/tmpfs/tmpfs_subr.c optional tmpfs gdb/gdb_cons.c optional gdb gdb/gdb_main.c optional gdb gdb/gdb_packet.c optional gdb geom/bde/g_bde.c optional geom_bde geom/bde/g_bde_crypt.c optional geom_bde geom/bde/g_bde_lock.c optional geom_bde geom/bde/g_bde_work.c optional geom_bde geom/cache/g_cache.c optional geom_cache geom/concat/g_concat.c optional geom_concat geom/eli/g_eli.c optional geom_eli geom/eli/g_eli_crypto.c optional geom_eli geom/eli/g_eli_ctl.c optional geom_eli geom/eli/g_eli_integrity.c optional geom_eli geom/eli/g_eli_key.c optional geom_eli geom/eli/g_eli_key_cache.c optional geom_eli geom/eli/g_eli_privacy.c optional geom_eli geom/eli/pkcs5v2.c optional geom_eli geom/gate/g_gate.c optional geom_gate geom/geom_aes.c optional geom_aes geom/geom_bsd.c optional geom_bsd geom/geom_bsd_enc.c optional geom_bsd geom/geom_ccd.c optional ccd | geom_ccd geom/geom_ctl.c standard geom/geom_dev.c standard geom/geom_disk.c standard geom/geom_dump.c standard geom/geom_event.c standard geom/geom_fox.c optional geom_fox geom/geom_flashmap.c optional fdt cfi | fdt nand geom/geom_io.c standard geom/geom_kern.c standard geom/geom_map.c optional geom_map geom/geom_mbr.c optional geom_mbr geom/geom_mbr_enc.c optional geom_mbr geom/geom_pc98.c optional geom_pc98 geom/geom_pc98_enc.c optional geom_pc98 geom/geom_redboot.c optional geom_redboot geom/geom_slice.c standard geom/geom_subr.c standard geom/geom_sunlabel.c optional geom_sunlabel geom/geom_sunlabel_enc.c optional geom_sunlabel geom/geom_vfs.c standard geom/geom_vol_ffs.c optional geom_vol geom/journal/g_journal.c optional geom_journal geom/journal/g_journal_ufs.c optional geom_journal geom/label/g_label.c optional geom_label geom/label/g_label_ext2fs.c optional geom_label geom/label/g_label_iso9660.c optional geom_label geom/label/g_label_msdosfs.c optional geom_label geom/label/g_label_ntfs.c optional geom_label geom/label/g_label_reiserfs.c optional geom_label geom/label/g_label_ufs.c optional geom_label geom/label/g_label_gpt.c optional geom_label geom/label/g_label_disk_ident.c optional geom_label geom/linux_lvm/g_linux_lvm.c optional geom_linux_lvm geom/mirror/g_mirror.c optional geom_mirror geom/mirror/g_mirror_ctl.c optional geom_mirror geom/mountver/g_mountver.c optional geom_mountver geom/multipath/g_multipath.c optional geom_multipath geom/nop/g_nop.c optional geom_nop geom/part/g_part.c standard geom/part/g_part_if.m standard geom/part/g_part_apm.c optional geom_part_apm geom/part/g_part_bsd.c optional geom_part_bsd geom/part/g_part_ebr.c optional geom_part_ebr geom/part/g_part_gpt.c optional geom_part_gpt geom/part/g_part_ldm.c optional geom_part_ldm geom/part/g_part_mbr.c optional geom_part_mbr geom/part/g_part_pc98.c optional geom_part_pc98 geom/part/g_part_vtoc8.c optional geom_part_vtoc8 geom/raid/g_raid.c optional geom_raid geom/raid/g_raid_ctl.c optional geom_raid geom/raid/g_raid_md_if.m optional geom_raid geom/raid/g_raid_tr_if.m optional geom_raid geom/raid/md_ddf.c optional geom_raid geom/raid/md_intel.c optional geom_raid geom/raid/md_jmicron.c optional geom_raid geom/raid/md_nvidia.c optional geom_raid geom/raid/md_promise.c optional geom_raid geom/raid/md_sii.c optional geom_raid geom/raid/tr_concat.c optional geom_raid geom/raid/tr_raid0.c optional geom_raid geom/raid/tr_raid1.c optional geom_raid geom/raid/tr_raid1e.c optional geom_raid geom/raid/tr_raid5.c optional geom_raid geom/raid3/g_raid3.c optional geom_raid3 geom/raid3/g_raid3_ctl.c optional geom_raid3 geom/shsec/g_shsec.c optional geom_shsec geom/stripe/g_stripe.c optional geom_stripe geom/uncompress/g_uncompress.c optional geom_uncompress contrib/xz-embedded/freebsd/xz_malloc.c \ optional xz_embedded | geom_uncompress \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_crc32.c \ optional xz_embedded | geom_uncompress \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c \ optional xz_embedded | geom_uncompress \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c \ optional xz_embedded | geom_uncompress \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" contrib/xz-embedded/linux/lib/xz/xz_dec_stream.c \ optional xz_embedded | geom_uncompress \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" geom/uzip/g_uzip.c optional geom_uzip geom/virstor/binstream.c optional geom_virstor geom/virstor/g_virstor.c optional geom_virstor geom/virstor/g_virstor_md.c optional geom_virstor geom/zero/g_zero.c optional geom_zero fs/ext2fs/ext2_alloc.c optional ext2fs fs/ext2fs/ext2_balloc.c optional ext2fs fs/ext2fs/ext2_bmap.c optional ext2fs fs/ext2fs/ext2_extents.c optional ext2fs fs/ext2fs/ext2_inode.c optional ext2fs fs/ext2fs/ext2_inode_cnv.c optional ext2fs fs/ext2fs/ext2_hash.c optional ext2fs fs/ext2fs/ext2_htree.c optional ext2fs fs/ext2fs/ext2_lookup.c optional ext2fs fs/ext2fs/ext2_subr.c optional ext2fs fs/ext2fs/ext2_vfsops.c optional ext2fs fs/ext2fs/ext2_vnops.c optional ext2fs gnu/fs/reiserfs/reiserfs_hashes.c optional reiserfs \ warning "kernel contains GPL contaminated ReiserFS filesystem" gnu/fs/reiserfs/reiserfs_inode.c optional reiserfs gnu/fs/reiserfs/reiserfs_item_ops.c optional reiserfs gnu/fs/reiserfs/reiserfs_namei.c optional reiserfs gnu/fs/reiserfs/reiserfs_prints.c optional reiserfs gnu/fs/reiserfs/reiserfs_stree.c optional reiserfs gnu/fs/reiserfs/reiserfs_vfsops.c optional reiserfs gnu/fs/reiserfs/reiserfs_vnops.c optional reiserfs # isa/isa_if.m standard isa/isa_common.c optional isa isa/isahint.c optional isa isa/pnp.c optional isa isapnp isa/pnpparse.c optional isa isapnp fs/cd9660/cd9660_bmap.c optional cd9660 fs/cd9660/cd9660_lookup.c optional cd9660 fs/cd9660/cd9660_node.c optional cd9660 fs/cd9660/cd9660_rrip.c optional cd9660 fs/cd9660/cd9660_util.c optional cd9660 fs/cd9660/cd9660_vfsops.c optional cd9660 fs/cd9660/cd9660_vnops.c optional cd9660 fs/cd9660/cd9660_iconv.c optional cd9660_iconv kern/bus_if.m standard kern/clock_if.m standard kern/cpufreq_if.m standard kern/device_if.m standard kern/imgact_elf.c standard kern/imgact_elf32.c optional compat_freebsd32 kern/imgact_shell.c standard kern/inflate.c optional gzip kern/init_main.c standard kern/init_sysent.c standard kern/ksched.c optional _kposix_priority_scheduling kern/kern_acct.c standard kern/kern_alq.c optional alq kern/kern_clock.c standard kern/kern_condvar.c standard kern/kern_conf.c standard kern/kern_cons.c standard kern/kern_cpu.c standard kern/kern_cpuset.c standard kern/kern_context.c standard kern/kern_descrip.c standard kern/kern_dtrace.c optional kdtrace_hooks kern/kern_environment.c standard kern/kern_et.c standard kern/kern_event.c standard kern/kern_exec.c standard kern/kern_exit.c standard kern/kern_fail.c standard kern/kern_ffclock.c standard kern/kern_fork.c standard kern/kern_gzio.c optional gzio kern/kern_hhook.c standard kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard kern/kern_khelp.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr kern/kern_ktrace.c standard kern/kern_linker.c standard kern/kern_lock.c standard kern/kern_lockf.c standard kern/kern_lockstat.c optional kdtrace_hooks kern/kern_loginclass.c standard kern/kern_malloc.c standard kern/kern_mbuf.c standard kern/kern_mib.c standard kern/kern_module.c standard kern/kern_mtxpool.c standard kern/kern_mutex.c standard kern/kern_ntptime.c standard kern/kern_osd.c standard kern/kern_physio.c standard kern/kern_pmc.c standard kern/kern_poll.c optional device_polling kern/kern_priv.c standard kern/kern_proc.c standard kern/kern_prot.c standard kern/kern_racct.c standard kern/kern_rangelock.c standard kern/kern_rctl.c standard kern/kern_resource.c standard kern/kern_rmlock.c standard kern/kern_rwlock.c standard kern/kern_sdt.c optional kdtrace_hooks kern/kern_sema.c standard kern/kern_sharedpage.c standard kern/kern_shutdown.c standard kern/kern_sig.c standard kern/kern_switch.c standard kern/kern_sx.c standard kern/kern_synch.c standard kern/kern_syscalls.c standard kern/kern_sysctl.c standard kern/kern_tc.c standard kern/kern_thr.c standard kern/kern_thread.c standard kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_umtx.c standard kern/kern_uuid.c standard kern/kern_xxx.c standard kern/link_elf.c standard kern/linker_if.m standard kern/md4c.c optional netsmb kern/md5c.c standard kern/p1003_1b.c standard kern/posix4_mib.c standard kern/sched_4bsd.c optional sched_4bsd kern/sched_ule.c optional sched_ule kern/serdev_if.m standard kern/stack_protector.c standard \ compile-with "${NORMAL_C:N-fstack-protector*}" kern/subr_acl_nfs4.c optional ufs_acl | zfs kern/subr_acl_posix1e.c optional ufs_acl kern/subr_autoconf.c standard kern/subr_blist.c standard kern/subr_bus.c standard kern/subr_bus_dma.c standard kern/subr_bufring.c standard kern/subr_clock.c standard kern/subr_counter.c standard kern/subr_devstat.c standard kern/subr_disk.c standard kern/subr_eventhandler.c standard kern/subr_fattime.c standard kern/subr_firmware.c optional firmware kern/subr_hash.c standard kern/subr_hints.c standard kern/subr_kdb.c standard kern/subr_kobj.c standard kern/subr_lock.c standard kern/subr_log.c standard kern/subr_mbpool.c optional libmbpool kern/subr_mchain.c optional libmchain kern/subr_module.c standard kern/subr_msgbuf.c standard kern/subr_param.c standard kern/subr_pcpu.c standard kern/subr_pctrie.c standard kern/subr_power.c standard kern/subr_prf.c standard kern/subr_prof.c standard kern/subr_rman.c standard kern/subr_rtc.c standard kern/subr_sbuf.c standard kern/subr_scanf.c standard kern/subr_sglist.c standard kern/subr_sleepqueue.c standard kern/subr_smp.c standard kern/subr_stack.c optional ddb | stack | ktr kern/subr_taskqueue.c standard kern/subr_trap.c standard kern/subr_turnstile.c standard kern/subr_uio.c standard kern/subr_unit.c standard kern/subr_vmem.c standard kern/subr_witness.c optional witness kern/sys_capability.c standard kern/sys_generic.c standard kern/sys_pipe.c standard kern/sys_procdesc.c standard kern/sys_process.c standard kern/sys_socket.c standard kern/syscalls.c standard kern/sysv_ipc.c standard kern/sysv_msg.c optional sysvmsg kern/sysv_sem.c optional sysvsem kern/sysv_shm.c optional sysvshm kern/tty.c standard kern/tty_compat.c optional compat_43tty kern/tty_info.c standard kern/tty_inq.c standard kern/tty_outq.c standard kern/tty_pts.c standard kern/tty_tty.c standard kern/tty_ttydisc.c standard kern/uipc_accf.c optional inet kern/uipc_cow.c optional socket_send_cow kern/uipc_debug.c optional ddb kern/uipc_domain.c standard kern/uipc_mbuf.c standard kern/uipc_mbuf2.c standard kern/uipc_mqueue.c optional p1003_1b_mqueue kern/uipc_sem.c optional p1003_1b_semaphores kern/uipc_shm.c standard kern/uipc_sockbuf.c standard kern/uipc_socket.c standard kern/uipc_syscalls.c standard kern/uipc_usrreq.c standard kern/vfs_acl.c standard kern/vfs_aio.c optional vfs_aio kern/vfs_bio.c standard kern/vfs_cache.c standard kern/vfs_cluster.c standard kern/vfs_default.c standard kern/vfs_export.c standard kern/vfs_extattr.c standard kern/vfs_hash.c standard kern/vfs_init.c standard kern/vfs_lookup.c standard kern/vfs_mount.c standard kern/vfs_mountroot.c standard kern/vfs_subr.c standard kern/vfs_syscalls.c standard kern/vfs_vnops.c standard # # Kernel GSS-API # gssd.h optional kgssapi \ dependency "$S/kgssapi/gssd.x" \ compile-with "RPCGEN_CPP='${CPP}' rpcgen -hM $S/kgssapi/gssd.x | grep -v pthread.h > gssd.h" \ no-obj no-implicit-rule before-depend local \ clean "gssd.h" gssd_xdr.c optional kgssapi \ dependency "$S/kgssapi/gssd.x gssd.h" \ compile-with "RPCGEN_CPP='${CPP}' rpcgen -c $S/kgssapi/gssd.x -o gssd_xdr.c" \ no-implicit-rule before-depend local \ clean "gssd_xdr.c" gssd_clnt.c optional kgssapi \ dependency "$S/kgssapi/gssd.x gssd.h" \ compile-with "RPCGEN_CPP='${CPP}' rpcgen -lM $S/kgssapi/gssd.x | grep -v string.h > gssd_clnt.c" \ no-implicit-rule before-depend local \ clean "gssd_clnt.c" kgssapi/gss_accept_sec_context.c optional kgssapi kgssapi/gss_add_oid_set_member.c optional kgssapi kgssapi/gss_acquire_cred.c optional kgssapi kgssapi/gss_canonicalize_name.c optional kgssapi kgssapi/gss_create_empty_oid_set.c optional kgssapi kgssapi/gss_delete_sec_context.c optional kgssapi kgssapi/gss_display_status.c optional kgssapi kgssapi/gss_export_name.c optional kgssapi kgssapi/gss_get_mic.c optional kgssapi kgssapi/gss_init_sec_context.c optional kgssapi kgssapi/gss_impl.c optional kgssapi kgssapi/gss_import_name.c optional kgssapi kgssapi/gss_names.c optional kgssapi kgssapi/gss_pname_to_uid.c optional kgssapi kgssapi/gss_release_buffer.c optional kgssapi kgssapi/gss_release_cred.c optional kgssapi kgssapi/gss_release_name.c optional kgssapi kgssapi/gss_release_oid_set.c optional kgssapi kgssapi/gss_set_cred_option.c optional kgssapi kgssapi/gss_test_oid_set_member.c optional kgssapi kgssapi/gss_unwrap.c optional kgssapi kgssapi/gss_verify_mic.c optional kgssapi kgssapi/gss_wrap.c optional kgssapi kgssapi/gss_wrap_size_limit.c optional kgssapi kgssapi/gssd_prot.c optional kgssapi kgssapi/krb5/krb5_mech.c optional kgssapi kgssapi/krb5/kcrypto.c optional kgssapi kgssapi/krb5/kcrypto_aes.c optional kgssapi kgssapi/krb5/kcrypto_arcfour.c optional kgssapi kgssapi/krb5/kcrypto_des.c optional kgssapi kgssapi/krb5/kcrypto_des3.c optional kgssapi kgssapi/kgss_if.m optional kgssapi kgssapi/gsstest.c optional kgssapi_debug # These files in libkern/ are those needed by all architectures. Some # of the files in libkern/ are only needed on some architectures, e.g., # libkern/divdi3.c is needed by i386 but not alpha. Also, some of these # routines may be optimized for a particular platform. In either case, # the file should be moved to conf/files. from here. # libkern/arc4random.c standard libkern/bcd.c standard libkern/bsearch.c standard libkern/crc32.c standard libkern/flsll.c standard libkern/fnmatch.c standard libkern/iconv.c optional libiconv libkern/iconv_converter_if.m optional libiconv libkern/iconv_ucs.c optional libiconv libkern/iconv_xlat.c optional libiconv libkern/iconv_xlat16.c optional libiconv libkern/inet_aton.c standard libkern/inet_ntoa.c standard libkern/inet_ntop.c standard libkern/inet_pton.c standard libkern/jenkins_hash.c standard libkern/mcount.c optional profiling-routine libkern/memcchr.c standard libkern/memchr.c optional fdt libkern/memcmp.c standard libkern/qsort.c standard libkern/qsort_r.c standard libkern/random.c standard libkern/scanc.c standard libkern/strcasecmp.c standard libkern/strcat.c standard libkern/strchr.c standard libkern/strcmp.c standard libkern/strcpy.c standard libkern/strcspn.c standard libkern/strdup.c standard libkern/strlcat.c standard libkern/strlcpy.c standard libkern/strlen.c standard libkern/strncmp.c standard libkern/strncpy.c standard libkern/strnlen.c standard libkern/strrchr.c standard libkern/strsep.c standard libkern/strspn.c standard libkern/strstr.c standard libkern/strtol.c standard libkern/strtoq.c standard libkern/strtoul.c standard libkern/strtouq.c standard libkern/strvalid.c standard net/bpf.c standard net/bpf_buffer.c optional bpf net/bpf_jitter.c optional bpf_jitter net/bpf_filter.c optional bpf | netgraph_bpf net/bpf_zerocopy.c optional bpf net/bridgestp.c optional bridge | if_bridge net/flowtable.c optional flowtable inet | flowtable inet6 net/ieee8023ad_lacp.c optional lagg net/if.c standard net/if_arcsubr.c optional arcnet net/if_atmsubr.c optional atm net/if_bridge.c optional bridge inet | if_bridge inet net/if_clone.c standard net/if_dead.c standard net/if_debug.c optional ddb net/if_disc.c optional disc net/if_edsc.c optional edsc net/if_ef.c optional ef net/if_enc.c optional enc ipsec inet | enc ipsec inet6 net/if_epair.c optional epair net/if_ethersubr.c optional ether net/if_faith.c optional faith net/if_fddisubr.c optional fddi net/if_fwsubr.c optional fwip net/if_gif.c optional gif | netgraph_gif net/if_gre.c optional gre inet net/if_iso88025subr.c optional token net/if_lagg.c optional lagg net/if_loop.c optional loop net/if_llatbl.c standard net/if_media.c standard net/if_mib.c standard net/if_spppfr.c optional sppp | netgraph_sppp net/if_spppsubr.c optional sppp | netgraph_sppp net/if_stf.c optional stf inet inet6 net/if_tun.c optional tun net/if_tap.c optional tap net/if_vlan.c optional vlan net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression net/netisr.c standard net/pfil.c optional ether | inet net/radix.c standard net/radix_mpath.c standard net/raw_cb.c standard net/raw_usrreq.c standard net/route.c standard net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ netgraph_sppp net/vnet.c optional vimage net/zlib.c optional crypto | geom_uzip | ipsec | \ mxge | netgraph_deflate | \ ddb_ctf | gzio | geom_uncompress net80211/ieee80211.c optional wlan net80211/ieee80211_acl.c optional wlan wlan_acl net80211/ieee80211_action.c optional wlan net80211/ieee80211_ageq.c optional wlan net80211/ieee80211_adhoc.c optional wlan net80211/ieee80211_ageq.c optional wlan net80211/ieee80211_amrr.c optional wlan | wlan_amrr net80211/ieee80211_crypto.c optional wlan net80211/ieee80211_crypto_ccmp.c optional wlan wlan_ccmp net80211/ieee80211_crypto_none.c optional wlan net80211/ieee80211_crypto_tkip.c optional wlan wlan_tkip net80211/ieee80211_crypto_wep.c optional wlan wlan_wep net80211/ieee80211_ddb.c optional wlan ddb net80211/ieee80211_dfs.c optional wlan net80211/ieee80211_freebsd.c optional wlan net80211/ieee80211_hostap.c optional wlan net80211/ieee80211_ht.c optional wlan net80211/ieee80211_hwmp.c optional wlan ieee80211_support_mesh net80211/ieee80211_input.c optional wlan net80211/ieee80211_ioctl.c optional wlan net80211/ieee80211_mesh.c optional wlan ieee80211_support_mesh net80211/ieee80211_monitor.c optional wlan net80211/ieee80211_node.c optional wlan net80211/ieee80211_output.c optional wlan net80211/ieee80211_phy.c optional wlan net80211/ieee80211_power.c optional wlan net80211/ieee80211_proto.c optional wlan net80211/ieee80211_radiotap.c optional wlan net80211/ieee80211_ratectl.c optional wlan net80211/ieee80211_ratectl_none.c optional wlan net80211/ieee80211_regdomain.c optional wlan net80211/ieee80211_rssadapt.c optional wlan wlan_rssadapt net80211/ieee80211_scan.c optional wlan net80211/ieee80211_scan_sta.c optional wlan net80211/ieee80211_sta.c optional wlan net80211/ieee80211_superg.c optional wlan ieee80211_support_superg net80211/ieee80211_tdma.c optional wlan ieee80211_support_tdma net80211/ieee80211_wds.c optional wlan net80211/ieee80211_xauth.c optional wlan wlan_xauth net80211/ieee80211_alq.c optional wlan ieee80211_alq netatalk/aarp.c optional netatalk netatalk/at_control.c optional netatalk netatalk/at_proto.c optional netatalk netatalk/at_rmx.c optional netatalk netatalk/ddp_input.c optional netatalk netatalk/ddp_output.c optional netatalk netatalk/ddp_pcb.c optional netatalk netatalk/ddp_usrreq.c optional netatalk netgraph/atm/ccatm/ng_ccatm.c optional ngatm_ccatm \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/ng_atm.c optional ngatm_atm netgraph/atm/ngatmbase.c optional ngatm_atmbase \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/sscfu/ng_sscfu.c optional ngatm_sscfu \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/sscop/ng_sscop.c optional ngatm_sscop \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/atm/uni/ng_uni.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" netgraph/bluetooth/common/ng_bluetooth.c optional netgraph_bluetooth netgraph/bluetooth/drivers/bt3c/ng_bt3c_pccard.c optional netgraph_bluetooth_bt3c netgraph/bluetooth/drivers/h4/ng_h4.c optional netgraph_bluetooth_h4 netgraph/bluetooth/drivers/ubt/ng_ubt.c optional netgraph_bluetooth_ubt usb netgraph/bluetooth/drivers/ubtbcmfw/ubtbcmfw.c optional netgraph_bluetooth_ubtbcmfw usb netgraph/bluetooth/hci/ng_hci_cmds.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_evnt.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_main.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_misc.c optional netgraph_bluetooth_hci netgraph/bluetooth/hci/ng_hci_ulpi.c optional netgraph_bluetooth_hci netgraph/bluetooth/l2cap/ng_l2cap_cmds.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_evnt.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_llpi.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_main.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_misc.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c optional netgraph_bluetooth_l2cap netgraph/bluetooth/socket/ng_btsocket.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_hci_raw.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_l2cap.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket netgraph/netflow/netflow.c optional netgraph_netflow netgraph/netflow/netflow_v9.c optional netgraph_netflow netgraph/netflow/ng_netflow.c optional netgraph_netflow netgraph/ng_UI.c optional netgraph_UI netgraph/ng_async.c optional netgraph_async netgraph/ng_atmllc.c optional netgraph_atmllc netgraph/ng_base.c optional netgraph netgraph/ng_bpf.c optional netgraph_bpf netgraph/ng_bridge.c optional netgraph_bridge netgraph/ng_car.c optional netgraph_car netgraph/ng_cisco.c optional netgraph_cisco netgraph/ng_deflate.c optional netgraph_deflate netgraph/ng_device.c optional netgraph_device netgraph/ng_echo.c optional netgraph_echo netgraph/ng_eiface.c optional netgraph_eiface netgraph/ng_ether.c optional netgraph_ether netgraph/ng_ether_echo.c optional netgraph_ether_echo netgraph/ng_fec.c optional netgraph_fec netgraph/ng_frame_relay.c optional netgraph_frame_relay netgraph/ng_gif.c optional netgraph_gif netgraph/ng_gif_demux.c optional netgraph_gif_demux netgraph/ng_hole.c optional netgraph_hole netgraph/ng_iface.c optional netgraph_iface netgraph/ng_ip_input.c optional netgraph_ip_input netgraph/ng_ipfw.c optional netgraph_ipfw inet ipfirewall netgraph/ng_ksocket.c optional netgraph_ksocket netgraph/ng_l2tp.c optional netgraph_l2tp netgraph/ng_lmi.c optional netgraph_lmi netgraph/ng_mppc.c optional netgraph_mppc_compression | \ netgraph_mppc_encryption netgraph/ng_nat.c optional netgraph_nat inet libalias netgraph/ng_one2many.c optional netgraph_one2many netgraph/ng_parse.c optional netgraph netgraph/ng_patch.c optional netgraph_patch netgraph/ng_pipe.c optional netgraph_pipe netgraph/ng_ppp.c optional netgraph_ppp netgraph/ng_pppoe.c optional netgraph_pppoe netgraph/ng_pptpgre.c optional netgraph_pptpgre netgraph/ng_pred1.c optional netgraph_pred1 netgraph/ng_rfc1490.c optional netgraph_rfc1490 netgraph/ng_socket.c optional netgraph_socket netgraph/ng_split.c optional netgraph_split netgraph/ng_sppp.c optional netgraph_sppp netgraph/ng_tag.c optional netgraph_tag netgraph/ng_tcpmss.c optional netgraph_tcpmss netgraph/ng_tee.c optional netgraph_tee netgraph/ng_tty.c optional netgraph_tty netgraph/ng_vjc.c optional netgraph_vjc netgraph/ng_vlan.c optional netgraph_vlan netinet/accf_data.c optional accept_filter_data inet netinet/accf_dns.c optional accept_filter_dns inet netinet/accf_http.c optional accept_filter_http inet netinet/if_atm.c optional atm netinet/if_ether.c optional inet ether netinet/igmp.c optional inet netinet/in.c optional inet netinet/in_debug.c optional inet ddb netinet/in_kdtrace.c optional inet | inet6 netinet/ip_carp.c optional inet carp | inet6 carp netinet/in_gif.c optional gif inet | netgraph_gif inet netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet netinet/in_mcast.c optional inet netinet/in_pcb.c optional inet | inet6 netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup netinet/in_proto.c optional inet | inet6 netinet/in_rmx.c optional inet netinet/ip_divert.c optional inet ipdivert ipfirewall netinet/ip_ecn.c optional inet | inet6 netinet/ip_encap.c optional inet | inet6 netinet/ip_fastfwd.c optional inet netinet/ip_icmp.c optional inet | inet6 netinet/ip_input.c optional inet netinet/ip_ipsec.c optional inet ipsec netinet/ip_mroute.c optional mrouting inet netinet/ip_options.c optional inet netinet/ip_output.c optional inet netinet/raw_ip.c optional inet | inet6 netinet/cc/cc.c optional inet | inet6 netinet/cc/cc_newreno.c optional inet | inet6 netinet/sctp_asconf.c optional inet sctp | inet6 sctp netinet/sctp_auth.c optional inet sctp | inet6 sctp netinet/sctp_bsd_addr.c optional inet sctp | inet6 sctp netinet/sctp_cc_functions.c optional inet sctp | inet6 sctp netinet/sctp_crc32.c optional inet sctp | inet6 sctp netinet/sctp_indata.c optional inet sctp | inet6 sctp netinet/sctp_input.c optional inet sctp | inet6 sctp netinet/sctp_output.c optional inet sctp | inet6 sctp netinet/sctp_pcb.c optional inet sctp | inet6 sctp netinet/sctp_peeloff.c optional inet sctp | inet6 sctp netinet/sctp_ss_functions.c optional inet sctp | inet6 sctp netinet/sctp_sysctl.c optional inet sctp | inet6 sctp netinet/sctp_timer.c optional inet sctp | inet6 sctp netinet/sctp_usrreq.c optional inet sctp | inet6 sctp netinet/sctputil.c optional inet sctp | inet6 sctp netinet/tcp_debug.c optional tcpdebug netinet/tcp_hostcache.c optional inet | inet6 netinet/tcp_input.c optional inet | inet6 netinet/tcp_lro.c optional inet | inet6 netinet/tcp_output.c optional inet | inet6 netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6 netinet/tcp_reass.c optional inet | inet6 netinet/tcp_sack.c optional inet | inet6 netinet/tcp_subr.c optional inet | inet6 netinet/tcp_syncache.c optional inet | inet6 netinet/tcp_timer.c optional inet | inet6 netinet/tcp_timewait.c optional inet | inet6 netinet/tcp_usrreq.c optional inet | inet6 netinet/udp_usrreq.c optional inet | inet6 netinet/libalias/alias.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_db.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_mod.c optional libalias | netgraph_nat netinet/libalias/alias_proxy.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_util.c optional libalias inet | netgraph_nat inet netinet/libalias/alias_sctp.c optional libalias inet | netgraph_nat inet netinet6/dest6.c optional inet6 netinet6/frag6.c optional inet6 netinet6/icmp6.c optional inet6 netinet6/in6.c optional inet6 netinet6/in6_cksum.c optional inet6 netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6 netinet6/in6_ifattach.c optional inet6 netinet6/in6_mcast.c optional inet6 netinet6/in6_pcb.c optional inet6 netinet6/in6_pcbgroup.c optional inet6 pcbgroup netinet6/in6_proto.c optional inet6 netinet6/in6_rmx.c optional inet6 netinet6/in6_src.c optional inet6 netinet6/ip6_forward.c optional inet6 netinet6/ip6_id.c optional inet6 netinet6/ip6_input.c optional inet6 netinet6/ip6_mroute.c optional mrouting inet6 netinet6/ip6_output.c optional inet6 netinet6/ip6_ipsec.c optional inet6 ipsec netinet6/mld6.c optional inet6 netinet6/nd6.c optional inet6 netinet6/nd6_nbr.c optional inet6 netinet6/nd6_rtr.c optional inet6 netinet6/raw_ip6.c optional inet6 netinet6/route6.c optional inet6 netinet6/scope6.c optional inet6 netinet6/sctp6_usrreq.c optional inet6 sctp netinet6/udp6_usrreq.c optional inet6 netipsec/ipsec.c optional ipsec inet | ipsec inet6 netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 netipsec/key.c optional ipsec inet | ipsec inet6 netipsec/key_debug.c optional ipsec inet | ipsec inet6 netipsec/keysock.c optional ipsec inet | ipsec inet6 netipsec/xform_ah.c optional ipsec inet | ipsec inet6 netipsec/xform_esp.c optional ipsec inet | ipsec inet6 netipsec/xform_ipcomp.c optional ipsec inet | ipsec inet6 netipsec/xform_ipip.c optional ipsec inet | ipsec inet6 netipsec/xform_tcp.c optional ipsec inet tcp_signature | \ ipsec inet6 tcp_signature netipx/ipx.c optional ipx netipx/ipx_cksum.c optional ipx netipx/ipx_input.c optional ipx netipx/ipx_outputfl.c optional ipx netipx/ipx_pcb.c optional ipx netipx/ipx_proto.c optional ipx netipx/ipx_usrreq.c optional ipx netipx/spx_debug.c optional ipx netipx/spx_reass.c optional ipx netipx/spx_usrreq.c optional ipx netnatm/natm.c optional natm netnatm/natm_pcb.c optional natm netnatm/natm_proto.c optional natm netpfil/ipfw/dn_heap.c optional inet dummynet netpfil/ipfw/dn_sched_fifo.c optional inet dummynet netpfil/ipfw/dn_sched_prio.c optional inet dummynet netpfil/ipfw/dn_sched_qfq.c optional inet dummynet netpfil/ipfw/dn_sched_rr.c optional inet dummynet netpfil/ipfw/dn_sched_wf2q.c optional inet dummynet netpfil/ipfw/ip_dummynet.c optional inet dummynet netpfil/ipfw/ip_dn_io.c optional inet dummynet netpfil/ipfw/ip_dn_glue.c optional inet dummynet netpfil/ipfw/ip_fw2.c optional inet ipfirewall netpfil/ipfw/ip_fw_dynamic.c optional inet ipfirewall netpfil/ipfw/ip_fw_log.c optional inet ipfirewall netpfil/ipfw/ip_fw_pfil.c optional inet ipfirewall netpfil/ipfw/ip_fw_sockopt.c optional inet ipfirewall netpfil/ipfw/ip_fw_table.c optional inet ipfirewall netpfil/ipfw/ip_fw_nat.c optional inet ipfirewall_nat netpfil/pf/if_pflog.c optional pflog pf inet netpfil/pf/if_pfsync.c optional pfsync pf inet netpfil/pf/pf.c optional pf inet netpfil/pf/pf_if.c optional pf inet netpfil/pf/pf_ioctl.c optional pf inet netpfil/pf/pf_lb.c optional pf inet netpfil/pf/pf_norm.c optional pf inet netpfil/pf/pf_osfp.c optional pf inet netpfil/pf/pf_ruleset.c optional pf inet netpfil/pf/pf_table.c optional pf inet netpfil/pf/in4_cksum.c optional pf inet netsmb/smb_conn.c optional netsmb netsmb/smb_crypt.c optional netsmb netsmb/smb_dev.c optional netsmb netsmb/smb_iod.c optional netsmb netsmb/smb_rq.c optional netsmb netsmb/smb_smb.c optional netsmb netsmb/smb_subr.c optional netsmb netsmb/smb_trantcp.c optional netsmb netsmb/smb_usr.c optional netsmb nfs/bootp_subr.c optional bootp nfsclient | bootp nfscl nfs/krpc_subr.c optional bootp nfsclient | bootp nfscl nfs/nfs_common.c optional nfsclient | nfsserver nfs/nfs_diskless.c optional nfsclient nfs_root | nfscl nfs_root nfs/nfs_fha.c optional nfsserver | nfsd nfs/nfs_lock.c optional nfsclient | nfscl | nfslockd | nfsd nfsclient/nfs_bio.c optional nfsclient nfsclient/nfs_node.c optional nfsclient nfsclient/nfs_krpc.c optional nfsclient nfsclient/nfs_subs.c optional nfsclient nfsclient/nfs_nfsiod.c optional nfsclient nfsclient/nfs_vfsops.c optional nfsclient nfsclient/nfs_vnops.c optional nfsclient nfsserver/nfs_fha_old.c optional nfsserver nfsserver/nfs_serv.c optional nfsserver nfsserver/nfs_srvkrpc.c optional nfsserver nfsserver/nfs_srvsubs.c optional nfsserver nfs/nfs_nfssvc.c optional nfsserver | nfscl | nfsd nlm/nlm_advlock.c optional nfslockd | nfsd nlm/nlm_prot_clnt.c optional nfslockd | nfsd nlm/nlm_prot_impl.c optional nfslockd | nfsd nlm/nlm_prot_server.c optional nfslockd | nfsd nlm/nlm_prot_svc.c optional nfslockd | nfsd nlm/nlm_prot_xdr.c optional nfslockd | nfsd nlm/sm_inter_xdr.c optional nfslockd | nfsd # OpenFabrics Enterprise Distribution (Infiniband) ofed/include/linux/linux_compat.c optional ofed \ no-depend compile-with "${OFED_C}" ofed/include/linux/linux_idr.c optional ofed \ no-depend compile-with "${OFED_C}" ofed/include/linux/linux_radix.c optional ofed \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/core/addr.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/agent.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/cache.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" # XXX Mad.c must be ordered before cm.c for sysinit sets to occur in # the correct order. ofed/drivers/infiniband/core/mad.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/cm.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/cma.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/device.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/fmr_pool.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/iwcm.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/local_sa.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/mad_rmpp.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/multicast.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/notice.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/packer.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/sa_query.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/smi.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/sysfs.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/ucm.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/ucma.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/ud_header.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/umem.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/user_mad.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/uverbs_cmd.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/uverbs_main.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/uverbs_marshall.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/core/verbs.c optional ofed \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/" ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c optional ipoib \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" #ofed/drivers/infiniband/ulp/ipoib/ipoib_fs.c optional ipoib \ # no-depend \ # compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c optional ipoib \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c optional ipoib \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c optional ipoib \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c optional ipoib \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" #ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c optional ipoib \ # no-depend \ # compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/" ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c optional sdp inet \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_main.c optional sdp inet \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_rx.c optional sdp inet \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_cma.c optional sdp inet \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/ulp/sdp/sdp_tx.c optional sdp inet \ no-depend \ compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/" ofed/drivers/infiniband/hw/mlx4/ah.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/cq.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/doorbell.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/mad.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/main.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/mr.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/qp.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/srq.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/infiniband/hw/mlx4/wc.c optional mlx4ib \ no-depend obj-prefix "mlx4ib_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/" ofed/drivers/net/mlx4/alloc.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/catas.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/cmd.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/cq.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/eq.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/fw.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/icm.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/intf.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/main.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/mcg.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/mr.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/pd.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/port.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/profile.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/qp.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/reset.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/sense.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/srq.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/xrcd.c optional mlx4ib | mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_cq.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_frag.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_main.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_netdev.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_port.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_resources.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_rx.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/net/mlx4/en_tx.c optional mlxen \ no-depend obj-prefix "mlx4_" \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/infiniband/hw/mthca/mthca_allocator.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_av.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_catas.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cmd.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cq.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_eq.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mad.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_main.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mcg.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_memfree.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mr.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_pd.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_profile.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_provider.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_qp.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_reset.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_srq.c optional mthca \ no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_uar.c optional mthca \ no-depend compile-with "${OFED_C}" # crypto support opencrypto/cast.c optional crypto | ipsec opencrypto/criov.c optional crypto opencrypto/crypto.c optional crypto opencrypto/cryptodev.c optional cryptodev opencrypto/cryptodev_if.m optional crypto opencrypto/cryptosoft.c optional crypto opencrypto/deflate.c optional crypto opencrypto/rmd160.c optional crypto | ipsec opencrypto/skipjack.c optional crypto opencrypto/xform.c optional crypto pci/alpm.c optional alpm pci pci/amdpm.c optional amdpm pci | nfpm pci pci/amdsmb.c optional amdsmb pci pci/if_rl.c optional rl pci pci/intpm.c optional intpm pci pci/ncr.c optional ncr pci pci/nfsmb.c optional nfsmb pci pci/viapm.c optional viapm pci rpc/auth_none.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/auth_unix.c optional krpc | nfslockd | nfsclient | nfscl | nfsd rpc/authunix_prot.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/clnt_dg.c optional krpc | nfslockd | nfsclient | nfscl | nfsd rpc/clnt_rc.c optional krpc | nfslockd | nfsclient | nfscl | nfsd rpc/clnt_vc.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/getnetconfig.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/replay.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/rpc_callmsg.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/rpc_generic.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/rpc_prot.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/rpcb_clnt.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/rpcb_prot.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/svc.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/svc_auth.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/svc_auth_unix.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd rpc/svc_dg.c optional krpc | nfslockd | nfsserver | nfscl | nfsd rpc/svc_generic.c optional krpc | nfslockd | nfsserver | nfscl | nfsd rpc/svc_vc.c optional krpc | nfslockd | nfsserver | nfscl | nfsd rpc/rpcsec_gss/rpcsec_gss.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/rpcsec_gss_conf.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/rpcsec_gss_misc.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/rpcsec_gss_prot.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi rpc/rpcsec_gss/svc_rpcsec_gss.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi security/audit/audit.c optional audit security/audit/audit_arg.c optional audit security/audit/audit_bsm.c optional audit security/audit/audit_bsm_klib.c optional audit security/audit/audit_pipe.c optional audit security/audit/audit_syscalls.c standard security/audit/audit_trigger.c optional audit security/audit/audit_worker.c optional audit security/audit/bsm_domain.c optional audit security/audit/bsm_errno.c optional audit security/audit/bsm_fcntl.c optional audit security/audit/bsm_socket_type.c optional audit security/audit/bsm_token.c optional audit security/mac/mac_atalk.c optional mac netatalk security/mac/mac_audit.c optional mac audit security/mac/mac_cred.c optional mac security/mac/mac_framework.c optional mac security/mac/mac_inet.c optional mac inet | mac inet6 security/mac/mac_inet6.c optional mac inet6 security/mac/mac_label.c optional mac security/mac/mac_net.c optional mac security/mac/mac_pipe.c optional mac security/mac/mac_posix_sem.c optional mac security/mac/mac_posix_shm.c optional mac security/mac/mac_priv.c optional mac security/mac/mac_process.c optional mac security/mac/mac_socket.c optional mac security/mac/mac_syscalls.c standard security/mac/mac_system.c optional mac security/mac/mac_sysv_msg.c optional mac security/mac/mac_sysv_sem.c optional mac security/mac/mac_sysv_shm.c optional mac security/mac/mac_vfs.c optional mac security/mac_biba/mac_biba.c optional mac_biba security/mac_bsdextended/mac_bsdextended.c optional mac_bsdextended security/mac_bsdextended/ugidfw_system.c optional mac_bsdextended security/mac_bsdextended/ugidfw_vnode.c optional mac_bsdextended security/mac_ifoff/mac_ifoff.c optional mac_ifoff security/mac_lomac/mac_lomac.c optional mac_lomac security/mac_mls/mac_mls.c optional mac_mls security/mac_none/mac_none.c optional mac_none security/mac_partition/mac_partition.c optional mac_partition security/mac_portacl/mac_portacl.c optional mac_portacl security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids security/mac_stub/mac_stub.c optional mac_stub security/mac_test/mac_test.c optional mac_test teken/teken.c optional sc ufs/ffs/ffs_alloc.c optional ffs ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_inode.c optional ffs ufs/ffs/ffs_snapshot.c optional ffs ufs/ffs/ffs_softdep.c optional ffs ufs/ffs/ffs_subr.c optional ffs ufs/ffs/ffs_tables.c optional ffs ufs/ffs/ffs_vfsops.c optional ffs ufs/ffs/ffs_vnops.c optional ffs ufs/ffs/ffs_rawread.c optional directio ufs/ffs/ffs_suspend.c optional ffs ufs/ufs/ufs_acl.c optional ffs ufs/ufs/ufs_bmap.c optional ffs ufs/ufs/ufs_dirhash.c optional ffs ufs/ufs/ufs_extattr.c optional ffs ufs/ufs/ufs_gjournal.c optional ffs UFS_GJOURNAL ufs/ufs/ufs_inode.c optional ffs ufs/ufs/ufs_lookup.c optional ffs ufs/ufs/ufs_quota.c optional ffs ufs/ufs/ufs_vfsops.c optional ffs ufs/ufs/ufs_vnops.c optional ffs vm/default_pager.c standard vm/device_pager.c standard vm/phys_pager.c standard vm/redzone.c optional DEBUG_REDZONE vm/sg_pager.c standard vm/swap_pager.c standard vm/uma_core.c standard vm/uma_dbg.c standard vm/memguard.c optional DEBUG_MEMGUARD vm/vm_fault.c standard vm/vm_glue.c standard vm/vm_init.c standard vm/vm_kern.c standard vm/vm_map.c standard vm/vm_meter.c standard vm/vm_mmap.c standard vm/vm_object.c standard vm/vm_page.c standard vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_phys.c standard vm/vm_radix.c standard vm/vm_reserv.c standard vm/vm_unix.c standard vm/vm_zeroidle.c standard vm/vnode_pager.c standard xen/gnttab.c optional xen | xenhvm xen/features.c optional xen | xenhvm -xen/evtchn/evtchn.c optional xen xen/evtchn/evtchn_dev.c optional xen | xenhvm xen/xenbus/xenbus_if.m optional xen | xenhvm xen/xenbus/xenbus.c optional xen | xenhvm xen/xenbus/xenbusb_if.m optional xen | xenhvm xen/xenbus/xenbusb.c optional xen | xenhvm xen/xenbus/xenbusb_front.c optional xen | xenhvm xen/xenbus/xenbusb_back.c optional xen | xenhvm xen/xenstore/xenstore.c optional xen | xenhvm xen/xenstore/xenstore_dev.c optional xen | xenhvm xdr/xdr.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd xdr/xdr_array.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd xdr/xdr_mbuf.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd xdr/xdr_mem.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd xdr/xdr_reference.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd xdr/xdr_sizeof.c optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 1d5fa769b46e..6d35d1f678db 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -1,533 +1,535 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # # linux32_genassym.o optional compat_linux32 \ dependency "$S/amd64/linux32/linux32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux32_genassym.o" # linux32_assym.h optional compat_linux32 \ dependency "$S/kern/genassym.sh linux32_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux32_assym.h" # ia32_genassym.o standard \ dependency "$S/compat/ia32/ia32_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "ia32_genassym.o" # ia32_assym.h standard \ dependency "$S/kern/genassym.sh ia32_genassym.o" \ compile-with "env NM='${NM}' sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "ia32_assym.h" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # nvenetlib.o optional nve pci \ dependency "$S/contrib/dev/nve/amd64/nvenetlib.o.bz2.uu" \ compile-with "uudecode $S/contrib/dev/nve/amd64/nvenetlib.o.bz2.uu ; bzip2 -df nvenetlib.o.bz2" \ no-implicit-rule # os+%DIKED-nve.h optional nve pci \ dependency "$S/contrib/dev/nve/os.h" \ compile-with "sed -e 's/^.*#include.*phy\.h.*$$//' $S/contrib/dev/nve/os.h > os+%DIKED-nve.h" \ no-implicit-rule no-obj before-depend \ clean "os+%DIKED-nve.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/amd64-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/amd64-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/amd64-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/amd64-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \ no-implicit-rule # amd64/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # amd64/amd64/amd64_mem.c optional mem #amd64/amd64/apic_vector.S standard amd64/amd64/atomic.c standard amd64/amd64/autoconf.c standard amd64/amd64/bios.c standard amd64/amd64/bpf_jit_machdep.c optional bpf_jitter amd64/amd64/cpu_switch.S standard amd64/amd64/db_disasm.c optional ddb amd64/amd64/db_interface.c optional ddb amd64/amd64/db_trace.c optional ddb amd64/amd64/elf_machdep.c standard amd64/amd64/exception.S standard amd64/amd64/fpu.c standard amd64/amd64/gdb_machdep.c optional gdb amd64/amd64/identcpu.c standard amd64/amd64/in_cksum.c optional inet | inet6 amd64/amd64/initcpu.c standard amd64/amd64/io.c optional io amd64/amd64/locore.S standard no-obj amd64/amd64/machdep.c standard amd64/amd64/mem.c optional mem amd64/amd64/minidump_machdep.c standard amd64/amd64/mp_machdep.c optional smp amd64/amd64/mp_watchdog.c optional mp_watchdog smp amd64/amd64/mpboot.S optional smp amd64/amd64/pmap.c standard amd64/amd64/prof_machdep.c optional profiling-routine amd64/amd64/ptrace_machdep.c standard amd64/amd64/sigtramp.S standard amd64/amd64/stack_machdep.c optional ddb | stack amd64/amd64/support.S standard amd64/amd64/sys_machdep.c standard amd64/amd64/trap.c standard amd64/amd64/uio_machdep.c standard amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard amd64/pci/pci_cfgreg.c optional pci cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" crypto/aesni/aesencdec_amd64.S optional aesni crypto/aesni/aeskeys_amd64.S optional aesni crypto/aesni/aesni.c optional aesni crypto/aesni/aesni_wrap.c optional aesni crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/acpica/acpi_if.m standard dev/acpi_support/acpi_wmi_if.m standard dev/agp/agp_amd64.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_intel.c optional agp dev/agp/agp_via.c optional agp dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/coretemp/coretemp.c optional coretemp dev/cpuctl/cpuctl.c optional cpuctl dev/dpms/dpms.c optional dpms # There are no systems with isa slots, so all ed isa entries should go.. dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/io/iodev.c optional io dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux32 dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/fdt/fdt_x86.c optional fdt dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_piv.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/kbd/kbd.c optional atkbd | sc | ukbd dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_hw/ntb_hw.c optional if_ntb ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nve/if_nve.c optional nve pci dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/random/ivy.c optional random rdrand_rng dev/random/nehemiah.c optional random padlock_rng dev/qlxge/qls_dbg.c optional qlxge pci dev/qlxge/qls_dump.c optional qlxge pci dev/qlxge/qls_hw.c optional qlxge pci dev/qlxge/qls_ioctl.c optional qlxge pci dev/qlxge/qls_isr.c optional qlxge pci dev/qlxge/qls_os.c optional qlxge pci dev/qlxgb/qla_dbg.c optional qlxgb pci dev/qlxgb/qla_hw.c optional qlxgb pci dev/qlxgb/qla_ioctl.c optional qlxgb pci dev/qlxgb/qla_isr.c optional qlxgb pci dev/qlxgb/qla_misc.c optional qlxgb pci dev/qlxgb/qla_os.c optional qlxgb pci dev/qlxgbe/ql_dbg.c optional qlxgbe pci dev/qlxgbe/ql_hw.c optional qlxgbe pci dev/qlxgbe/ql_ioctl.c optional qlxgbe pci dev/qlxgbe/ql_isr.c optional qlxgbe pci dev/qlxgbe/ql_misc.c optional qlxgbe pci dev/qlxgbe/ql_os.c optional qlxgbe pci dev/qlxgbe/ql_reset.c optional qlxgbe pci dev/sfxge/common/efx_bootcfg.c optional sfxge inet pci dev/sfxge/common/efx_ev.c optional sfxge inet pci dev/sfxge/common/efx_filter.c optional sfxge inet pci dev/sfxge/common/efx_intr.c optional sfxge inet pci dev/sfxge/common/efx_mac.c optional sfxge inet pci dev/sfxge/common/efx_mcdi.c optional sfxge inet pci dev/sfxge/common/efx_mon.c optional sfxge inet pci dev/sfxge/common/efx_nic.c optional sfxge inet pci dev/sfxge/common/efx_nvram.c optional sfxge inet pci dev/sfxge/common/efx_phy.c optional sfxge inet pci dev/sfxge/common/efx_port.c optional sfxge inet pci dev/sfxge/common/efx_rx.c optional sfxge inet pci dev/sfxge/common/efx_sram.c optional sfxge inet pci dev/sfxge/common/efx_tx.c optional sfxge inet pci dev/sfxge/common/efx_vpd.c optional sfxge inet pci dev/sfxge/common/efx_wol.c optional sfxge inet pci dev/sfxge/common/siena_mac.c optional sfxge inet pci dev/sfxge/common/siena_mon.c optional sfxge inet pci dev/sfxge/common/siena_nic.c optional sfxge inet pci dev/sfxge/common/siena_nvram.c optional sfxge inet pci dev/sfxge/common/siena_phy.c optional sfxge inet pci dev/sfxge/common/siena_sram.c optional sfxge inet pci dev/sfxge/common/siena_vpd.c optional sfxge inet pci dev/sfxge/sfxge.c optional sfxge inet pci dev/sfxge/sfxge_dma.c optional sfxge inet pci dev/sfxge/sfxge_ev.c optional sfxge inet pci dev/sfxge/sfxge_intr.c optional sfxge inet pci dev/sfxge/sfxge_mcdi.c optional sfxge inet pci dev/sfxge/sfxge_port.c optional sfxge inet pci dev/sfxge/sfxge_rx.c optional sfxge inet pci dev/sfxge/sfxge_tx.c optional sfxge inet pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/wbwd/wbwd.c optional wbwd dev/wpi/if_wpi.c optional wpi dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci dev/virtio/virtio.c optional virtio dev/virtio/virtqueue.c optional virtio dev/virtio/virtio_bus_if.m optional virtio dev/virtio/virtio_if.m optional virtio dev/virtio/pci/virtio_pci.c optional virtio_pci dev/virtio/network/if_vtnet.c optional vtnet dev/virtio/block/virtio_blk.c optional virtio_blk dev/virtio/balloon/virtio_balloon.c optional virtio_balloon dev/virtio/scsi/virtio_scsi.c optional virtio_scsi isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/link_elf_obj.c standard # # IA32 binary support # #amd64/ia32/ia32_exception.S optional compat_freebsd32 amd64/ia32/ia32_reg.c optional compat_freebsd32 amd64/ia32/ia32_signal.c optional compat_freebsd32 amd64/ia32/ia32_sigtramp.S optional compat_freebsd32 amd64/ia32/ia32_syscall.c optional compat_freebsd32 amd64/ia32/ia32_misc.c optional compat_freebsd32 compat/ia32/ia32_sysvec.c optional compat_freebsd32 compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs # # Linux/i386 binary support # amd64/linux32/linux32_dummy.c optional compat_linux32 amd64/linux32/linux32_locore.s optional compat_linux32 \ dependency "linux32_assym.h" amd64/linux32/linux32_machdep.c optional compat_linux32 amd64/linux32/linux32_support.s optional compat_linux32 \ dependency "linux32_assym.h" amd64/linux32/linux32_sysent.c optional compat_linux32 amd64/linux32/linux32_sysvec.c optional compat_linux32 compat/linux/linux_emul.c optional compat_linux32 compat/linux/linux_file.c optional compat_linux32 compat/linux/linux_fork.c optional compat_linux32 compat/linux/linux_futex.c optional compat_linux32 compat/linux/linux_getcwd.c optional compat_linux32 compat/linux/linux_ioctl.c optional compat_linux32 compat/linux/linux_ipc.c optional compat_linux32 compat/linux/linux_mib.c optional compat_linux32 compat/linux/linux_misc.c optional compat_linux32 compat/linux/linux_signal.c optional compat_linux32 compat/linux/linux_socket.c optional compat_linux32 compat/linux/linux_stats.c optional compat_linux32 compat/linux/linux_sysctl.c optional compat_linux32 compat/linux/linux_time.c optional compat_linux32 compat/linux/linux_uid16.c optional compat_linux32 compat/linux/linux_util.c optional compat_linux32 dev/amr/amr_linux.c optional compat_linux32 amr dev/mfi/mfi_linux.c optional compat_linux32 mfi # # Windows NDIS driver support # compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx64_wrap.S optional ndisapi pci # libkern/memmove.c standard libkern/memset.c standard # # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa # compat/x86bios/x86bios.c optional x86bios | atkbd | dpms | vesa contrib/x86emu/x86emu.c optional x86bios | atkbd | dpms | vesa # # bvm console # dev/bvm/bvm_console.c optional bvmconsole dev/bvm/bvm_dbg.c optional bvmdebug # # x86 shared code between IA32, AMD64 and PC98 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/isa/atpic.c optional atpic isa x86/isa/atrtc.c standard x86/isa/clock.c standard x86/isa/elcr.c optional atpic isa | mptable x86/isa/isa.c standard x86/isa/isa_dma.c standard x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/intr_machdep.c standard x86/x86/io_apic.c standard x86/x86/legacy.c standard x86/x86/local_apic.c standard x86/x86/mca.c standard x86/x86/mptable.c optional mptable x86/x86/mptable_pci.c optional mptable pci x86/x86/msi.c optional pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/xen/hvm.c optional xenhvm +x86/xen/xen_intr.c optional xen | xenhvm diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 4f8f8ed16a18..0613337132c3 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -1,570 +1,572 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # linux_genassym.o optional compat_linux \ dependency "$S/i386/linux/linux_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux_genassym.o" # linux_assym.h optional compat_linux \ dependency "$S/kern/genassym.sh linux_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux_assym.h" # svr4_genassym.o optional compat_svr4 \ dependency "$S/i386/svr4/svr4_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "svr4_genassym.o" # svr4_assym.h optional compat_svr4 \ dependency "$S/kern/genassym.sh svr4_genassym.o" \ compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "svr4_assym.h" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # nvenetlib.o optional nve pci \ dependency "$S/contrib/dev/nve/i386/nvenetlib.o.bz2.uu" \ compile-with "uudecode $S/contrib/dev/nve/i386/nvenetlib.o.bz2.uu ; bzip2 -df nvenetlib.o.bz2" \ no-implicit-rule # os+%DIKED-nve.h optional nve pci \ dependency "$S/contrib/dev/nve/os.h" \ compile-with "sed -e 's/^.*#include.*phy\.h.*$$//' $S/contrib/dev/nve/os.h > os+%DIKED-nve.h" \ no-implicit-rule no-obj before-depend \ clean "os+%DIKED-nve.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/i386-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/i386-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ no-implicit-rule # cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux compat/linux/linux_futex.c optional compat_linux compat/linux/linux_getcwd.c optional compat_linux compat/linux/linux_ioctl.c optional compat_linux compat/linux/linux_ipc.c optional compat_linux compat/linux/linux_mib.c optional compat_linux compat/linux/linux_misc.c optional compat_linux compat/linux/linux_signal.c optional compat_linux compat/linux/linux_socket.c optional compat_linux compat/linux/linux_stats.c optional compat_linux compat/linux/linux_sysctl.c optional compat_linux compat/linux/linux_time.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx32_wrap.S optional ndisapi pci compat/svr4/imgact_svr4.c optional compat_svr4 compat/svr4/svr4_fcntl.c optional compat_svr4 compat/svr4/svr4_filio.c optional compat_svr4 compat/svr4/svr4_ioctl.c optional compat_svr4 compat/svr4/svr4_ipc.c optional compat_svr4 compat/svr4/svr4_misc.c optional compat_svr4 compat/svr4/svr4_resource.c optional compat_svr4 compat/svr4/svr4_signal.c optional compat_svr4 compat/svr4/svr4_socket.c optional compat_svr4 compat/svr4/svr4_sockio.c optional compat_svr4 compat/svr4/svr4_stat.c optional compat_svr4 compat/svr4/svr4_stream.c optional compat_svr4 compat/svr4/svr4_syscallnames.c optional compat_svr4 compat/svr4/svr4_sysent.c optional compat_svr4 compat/svr4/svr4_sysvec.c optional compat_svr4 compat/svr4/svr4_termios.c optional compat_svr4 bf_enc.o optional crypto | ipsec \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule crypto/aesni/aesencdec_i386.S optional aesni crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni crypto/aesni/aesni_wrap.c optional aesni crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/advansys/adv_isa.c optional adv isa dev/agp/agp_ali.c optional agp dev/agp/agp_amd.c optional agp dev/agp/agp_amd64.c optional agp dev/agp/agp_ati.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_intel.c optional agp dev/agp/agp_nvidia.c optional agp dev/agp/agp_sis.c optional agp dev/agp/agp_via.c optional agp dev/aic/aic_isa.c optional aic isa dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/ce/ceddk.c optional ce dev/ce/if_ce.c optional ce dev/ce/tau32-ddk.c optional ce \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/cm/if_cm_isa.c optional cm isa dev/coretemp/coretemp.c optional coretemp dev/cp/cpddk.c optional cp dev/cp/if_cp.c optional cp dev/cpuctl/cpuctl.c optional cpuctl dev/ctau/ctau.c optional ctau dev/ctau/ctddk.c optional ctau dev/ctau/if_ct.c optional ctau dev/cx/csigma.c optional cx dev/cx/cxddk.c optional cx dev/cx/if_cx.c optional cx dev/dpms/dpms.c optional dpms dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/fdt/fdt_x86.c optional fdt dev/fe/if_fe_isa.c optional fe isa dev/glxiic/glxiic.c optional glxiic dev/glxsb/glxsb.c optional glxsb dev/glxsb/glxsb_hash.c optional glxsb dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_pentium.c optional hwpmc dev/hwpmc/hwpmc_piv.c optional hwpmc dev/hwpmc/hwpmc_ppro.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/io/iodev.c optional io dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux dev/kbd/kbd.c optional atkbd | sc | ukbd dev/le/if_le_isa.c optional le isa dev/lindev/full.c optional lindev dev/lindev/lindev.c optional lindev dev/mse/mse.c optional mse dev/mse/mse_isa.c optional mse isa dev/nfe/if_nfe.c optional nfe pci dev/nvd/nvd.c optional nvd nvme dev/nve/if_nve.c optional nve pci dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/pcf/pcf_isa.c optional pcf dev/random/ivy.c optional random rdrand_rng dev/random/nehemiah.c optional random padlock_rng dev/sbni/if_sbni.c optional sbni dev/sbni/if_sbni_isa.c optional sbni isa dev/sbni/if_sbni_pci.c optional sbni pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/acpica/acpi_if.m standard dev/acpi_support/acpi_wmi_if.m standard dev/wbwd/wbwd.c optional wbwd dev/wpi/if_wpi.c optional wpi dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci dev/virtio/virtio.c optional virtio dev/virtio/virtqueue.c optional virtio dev/virtio/virtio_bus_if.m optional virtio dev/virtio/virtio_if.m optional virtio dev/virtio/pci/virtio_pci.c optional virtio_pci dev/virtio/network/if_vtnet.c optional vtnet dev/virtio/block/virtio_blk.c optional virtio_blk dev/virtio/balloon/virtio_balloon.c optional virtio_balloon dev/virtio/scsi/virtio_scsi.c optional virtio_scsi i386/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/i386/acpica/acpi_wakecode.S assym.s" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # i386/bios/apm.c optional apm i386/bios/mca_machdep.c optional mca i386/bios/smapi.c optional smapi i386/bios/smapi_bios.S optional smapi #i386/i386/apic_vector.s optional apic i386/i386/atomic.c standard \ compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" i386/i386/autoconf.c standard i386/i386/bios.c optional native i386/i386/bioscall.s optional native i386/i386/bpf_jit_machdep.c optional bpf_jitter i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris i386/i386/elf_machdep.c standard i386/i386/exception.s optional native i386/xen/exception.s optional xen i386/i386/gdb_machdep.c optional gdb i386/i386/geode.c optional cpu_geode i386/i386/i686_mem.c optional mem i386/i386/identcpu.c standard i386/i386/in_cksum.c optional inet | inet6 i386/i386/initcpu.c standard i386/i386/io.c optional io i386/i386/k6_mem.c optional mem i386/i386/locore.s optional native no-obj i386/xen/locore.s optional xen no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard i386/xen/xen_machdep.c optional xen i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard i386/i386/mp_clock.c optional smp i386/i386/mp_machdep.c optional native smp i386/xen/mp_machdep.c optional xen smp i386/i386/mp_watchdog.c optional mp_watchdog smp i386/i386/mpboot.s optional smp native i386/xen/mptable.c optional apic xen i386/i386/perfmon.c optional perfmon i386/i386/pmap.c optional native i386/xen/pmap.c optional xen i386/i386/ptrace_machdep.c standard i386/i386/stack_machdep.c optional ddb | stack i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard i386/ibcs2/ibcs2_errno.c optional ibcs2 i386/ibcs2/ibcs2_fcntl.c optional ibcs2 i386/ibcs2/ibcs2_ioctl.c optional ibcs2 i386/ibcs2/ibcs2_ipc.c optional ibcs2 i386/ibcs2/ibcs2_isc.c optional ibcs2 i386/ibcs2/ibcs2_isc_sysent.c optional ibcs2 i386/ibcs2/ibcs2_misc.c optional ibcs2 i386/ibcs2/ibcs2_msg.c optional ibcs2 i386/ibcs2/ibcs2_other.c optional ibcs2 i386/ibcs2/ibcs2_signal.c optional ibcs2 i386/ibcs2/ibcs2_socksys.c optional ibcs2 i386/ibcs2/ibcs2_stat.c optional ibcs2 i386/ibcs2/ibcs2_sysent.c optional ibcs2 i386/ibcs2/ibcs2_sysi86.c optional ibcs2 i386/ibcs2/ibcs2_sysvec.c optional ibcs2 i386/ibcs2/ibcs2_util.c optional ibcs2 i386/ibcs2/ibcs2_xenix.c optional ibcs2 i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2 i386/ibcs2/imgact_coff.c optional ibcs2 i386/xen/clock.c optional xen i386/xen/xen_clock_util.c optional xen i386/xen/xen_rtc.c optional xen i386/isa/elink.c optional ep | ie i386/isa/npx.c optional npx i386/isa/pmtimer.c optional pmtimer i386/isa/prof_machdep.c optional profiling-routine i386/isa/spic.c optional spic i386/linux/imgact_linux.c optional compat_linux i386/linux/linux_dummy.c optional compat_linux i386/linux/linux_locore.s optional compat_linux \ dependency "linux_assym.h" i386/linux/linux_machdep.c optional compat_linux i386/linux/linux_ptrace.c optional compat_linux i386/linux/linux_support.s optional compat_linux \ dependency "linux_assym.h" i386/linux/linux_sysent.c optional compat_linux i386/linux/linux_sysvec.c optional compat_linux i386/pci/pci_cfgreg.c optional pci i386/pci/pci_pir.c optional pci i386/svr4/svr4_locore.s optional compat_svr4 \ dependency "svr4_assym.h" \ warning "COMPAT_SVR4 is broken and should be avoided" i386/svr4/svr4_machdep.c optional compat_svr4 # isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip libkern/divdi3.c standard libkern/ffsl.c standard libkern/flsl.c standard libkern/memmove.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard i386/xbox/xbox.c optional xbox i386/xbox/xboxfb.c optional xboxfb dev/fb/boot_font.c optional xboxfb i386/xbox/pic16l.s optional xbox # # x86 real mode BIOS support, required by atkbdc/dpms/vesa # compat/x86bios/x86bios.c optional x86bios | atkbd | dpms | vesa # # x86 shared code between IA32, AMD64 and PC98 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi apic x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/smist.c optional cpufreq x86/isa/atpic.c optional atpic x86/isa/atrtc.c optional native x86/isa/clock.c optional native x86/isa/elcr.c optional atpic | apic native x86/isa/isa.c optional isa x86/isa/isa_dma.c optional isa x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/intr_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/legacy.c optional native x86/x86/local_apic.c optional apic x86/x86/mca.c standard x86/x86/mptable.c optional apic native x86/x86/mptable_pci.c optional apic native pci x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/xen/hvm.c optional xenhvm +x86/xen/xen_intr.c optional xen | xenhvm diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c index d4f94ab9ee00..9021abb1172d 100644 --- a/sys/dev/xen/balloon/balloon.c +++ b/sys/dev/xen/balloon/balloon.c @@ -1,571 +1,572 @@ /****************************************************************************** * balloon.c * * Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include -#include -#include -#include +#include +#include + +#include #include +#include #include -#include -#include +#include static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); struct mtx balloon_mutex; /* * Protects atomic reservation decrease/increase against concurrent increases. * Also protects non-atomic updates of current_pages and driver_pages, and * balloon lists. */ struct mtx balloon_lock; /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; #define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0])) struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ unsigned long current_pages; unsigned long target_pages; /* We may hit the hard limit in Xen. If we do then we remember it. */ unsigned long hard_limit; /* * Drivers may alter the memory reservation independently, but they * must inform the balloon driver so we avoid hitting the hard limit. */ unsigned long driver_pages; /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; }; static struct balloon_stats balloon_stats; #define bs balloon_stats SYSCTL_DECL(_dev_xen); static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, &bs.current_pages, 0, "Current allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, &bs.target_pages, 0, "Target allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, &bs.driver_pages, 0, "Driver pages"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, &bs.hard_limit, 0, "Xen hard limit"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, &bs.balloon_low, 0, "Low-mem balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, &bs.balloon_high, 0, "High-mem balloon"); struct balloon_entry { vm_page_t page; STAILQ_ENTRY(balloon_entry) list; }; /* List of ballooned pages, threaded through the mem_map array. */ static STAILQ_HEAD(,balloon_entry) ballooned_pages; /* Main work function, always executed in process context. */ static void balloon_process(void *unused); #define IPRINTK(fmt, args...) \ printk(KERN_INFO "xen_mem: " fmt, ##args) #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "xen_mem: " fmt, ##args) /* balloon_append: add the given page to the balloon. */ static void balloon_append(vm_page_t page) { struct balloon_entry *entry; entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK); entry->page = page; STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); bs.balloon_low++; } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ static vm_page_t balloon_retrieve(void) { vm_page_t page; struct balloon_entry *entry; if (STAILQ_EMPTY(&ballooned_pages)) return NULL; entry = STAILQ_FIRST(&ballooned_pages); STAILQ_REMOVE_HEAD(&ballooned_pages, list); page = entry->page; free(entry, M_BALLOON); bs.balloon_low--; return page; } static unsigned long current_target(void) { unsigned long target = min(bs.target_pages, bs.hard_limit); if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) target = bs.current_pages + bs.balloon_low + bs.balloon_high; return target; } static unsigned long minimum_target(void) { #ifdef XENHVM #define max_pfn physmem #else #define max_pfn HYPERVISOR_shared_info->arch.max_pfn #endif unsigned long min_pages, curr_pages = current_target(); #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) /* Simple continuous piecewiese linear function: * max MiB -> min MiB gradient * 0 0 * 16 16 * 32 24 * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) * 8192 552 (1/32) * 32768 1320 * 131072 4392 */ if (max_pfn < MB2PAGES(128)) min_pages = MB2PAGES(8) + (max_pfn >> 1); else if (max_pfn < MB2PAGES(512)) min_pages = MB2PAGES(40) + (max_pfn >> 2); else if (max_pfn < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (max_pfn >> 3); else min_pages = MB2PAGES(296) + (max_pfn >> 5); #undef MB2PAGES /* Don't enforce growth */ return min(min_pages, curr_pages); #ifndef CONFIG_XEN #undef max_pfn #endif } static int increase_reservation(unsigned long nr_pages) { unsigned long pfn, i; struct balloon_entry *entry; vm_page_t page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); mtx_lock(&balloon_lock); for (entry = STAILQ_FIRST(&ballooned_pages), i = 0; i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) { KASSERT(entry, ("ballooned_pages list corrupt")); page = entry->page; frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { int ret; /* We hit the Xen hard limit: reprobe. */ reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); } if (rc >= 0) bs.hard_limit = (bs.current_pages + rc - bs.driver_pages); goto out; } for (i = 0; i < nr_pages; i++) { page = balloon_retrieve(); KASSERT(page, ("balloon_retrieve failed")); pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || !phys_to_machine_mapping_valid(pfn)), ("auto translated physmap but mapping is valid")); set_phys_to_machine(pfn, frame_list[i]); #if 0 #ifndef XENHVM /* Link back into the page tables if not highmem. */ if (pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte_ma(frame_list[i], PAGE_KERNEL), 0); PASSING(ret == 0, ("HYPERVISOR_update_va_mapping failed")); } #endif #endif /* Relinquish the page back to the allocator. */ vm_page_unwire(page, 0); vm_page_free(page); } bs.current_pages += nr_pages; //totalram_pages = bs.current_pages; out: mtx_unlock(&balloon_lock); return 0; } static int decrease_reservation(unsigned long nr_pages) { unsigned long pfn, i; vm_page_t page; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); frame_list[i] = PFNTOMFN(pfn); #if 0 if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); #ifdef CONFIG_XEN ret = HYPERVISOR_update_va_mapping( (unsigned long)v, __pte_ma(0), 0); BUG_ON(ret); #endif } #endif #ifdef CONFIG_XEN_SCRUB_PAGES else { v = kmap(page); scrub_pages(v, 1); kunmap(page); } #endif } #ifdef CONFIG_XEN /* Ensure that ballooned highmem pages don't have kmaps. */ kmap_flush_unused(); flush_tlb_all(); #endif mtx_lock(&balloon_lock); /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = MFNTOPFN(frame_list[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT)); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); bs.current_pages -= nr_pages; //totalram_pages = bs.current_pages; mtx_unlock(&balloon_lock); return (need_sleep); } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; mtx_lock(&balloon_mutex); for (;;) { int sleep_time; do { credit = current_target() - bs.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != bs.current_pages) sleep_time = hz; else sleep_time = 0; msleep(balloon_process, &balloon_mutex, 0, "balloon", sleep_time); } mtx_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ bs.hard_limit = ~0UL; bs.target_pages = max(target, minimum_target()); wakeup(balloon_process); } static struct xs_watch target_watch = { .node = "memory/target" }; /* React to a change in the target key */ static void watch_target(struct xs_watch *watch, const char **vec, unsigned int len) { unsigned long long new_target; int err; err = xs_scanf(XST_NIL, "memory", "target", NULL, "%llu", &new_target); if (err) { /* This is ok (for domain0 at least) - so just return */ return; } /* The given memory/target value is in KiB, so it needs converting to pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ set_new_target(new_target >> (PAGE_SHIFT - 10)); } static void balloon_init_watcher(void *arg) { int err; if (!is_running_on_xen()) return; err = xs_register_watch(&target_watch); if (err) printf("Failed to set balloon watcher\n"); } SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init_watcher, NULL); static void balloon_init(void *arg) { #ifndef XENHVM vm_page_t page; unsigned long pfn; #define max_pfn HYPERVISOR_shared_info->arch.max_pfn #endif if (!is_running_on_xen()) return; mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF); mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); #ifndef XENHVM bs.current_pages = min(xen_start_info->nr_pages, max_pfn); #else bs.current_pages = physmem; #endif bs.target_pages = bs.current_pages; bs.balloon_low = 0; bs.balloon_high = 0; bs.driver_pages = 0UL; bs.hard_limit = ~0UL; kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); #ifndef XENHVM /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); balloon_append(page); } #undef max_pfn #endif target_watch.callback = watch_target; return; } SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL); void balloon_update_driver_allowance(long delta); void balloon_update_driver_allowance(long delta) { mtx_lock(&balloon_lock); bs.driver_pages += delta; mtx_unlock(&balloon_lock); } #if 0 static int dealloc_pte_fn( pte_t *pte, struct page *pte_page, unsigned long addr, void *data) { unsigned long mfn = pte_mfn(*pte); int ret; struct xen_memory_reservation reservation = { .extent_start = &mfn, .nr_extents = 1, .extent_order = 0, .domid = DOMID_SELF }; set_pte_at(&init_mm, addr, pte, __pte_ma(0)); set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == 1, ("HYPERVISOR_memory_op failed")); return 0; } #endif #if 0 vm_page_t balloon_alloc_empty_page_range(unsigned long nr_pages) { vm_page_t pages; int i, rc; unsigned long *mfn_list; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4) if (pages == NULL) return NULL; mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK); for (i = 0; i < nr_pages; i++) { mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT); PFNTOMFN(i) = INVALID_P2M_ENTRY; reservation.extent_start = mfn_list; reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed")); } current_pages -= nr_pages; wakeup(balloon_process); return pages; } void balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages) { unsigned long i; for (i = 0; i < nr_pages; i++) balloon_append(page + i); wakeup(balloon_process); } #endif diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c index 4208702c4016..33f6fafc6d27 100644 --- a/sys/dev/xen/blkback/blkback.c +++ b/sys/dev/xen/blkback/blkback.c @@ -1,3981 +1,3981 @@ /*- * Copyright (c) 2009-2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * Ken Merry (Spectra Logic Corporation) */ #include __FBSDID("$FreeBSD$"); /** * \file blkback.c * * \brief Device driver supporting the vending of block storage from * a FreeBSD domain to other domains. */ #include "opt_kdtrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include +#include #include -#include #include #include #include #include #include /*--------------------------- Compile-time Tunables --------------------------*/ /** * The maximum number of outstanding request blocks (request headers plus * additional segment blocks) we will allow in a negotiated block-front/back * communication channel. */ #define XBB_MAX_REQUESTS 256 /** * \brief Define to force all I/O to be performed on memory owned by the * backend device, with a copy-in/out to the remote domain's memory. * * \note This option is currently required when this driver's domain is * operating in HVM mode on a system using an IOMMU. * * This driver uses Xen's grant table API to gain access to the memory of * the remote domains it serves. When our domain is operating in PV mode, * the grant table mechanism directly updates our domain's page table entries * to point to the physical pages of the remote domain. This scheme guarantees * that blkback and the backing devices it uses can safely perform DMA * operations to satisfy requests. In HVM mode, Xen may use a HW IOMMU to * insure that our domain cannot DMA to pages owned by another domain. As * of Xen 4.0, IOMMU mappings for HVM guests are not updated via the grant * table API. For this reason, in HVM mode, we must bounce all requests into * memory that is mapped into our domain at domain startup and thus has * valid IOMMU mappings. */ #define XBB_USE_BOUNCE_BUFFERS /** * \brief Define to enable rudimentary request logging to the console. */ #undef XBB_DEBUG /*---------------------------------- Macros ----------------------------------*/ /** * Custom malloc type for all driver allocations. */ static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data"); #ifdef XBB_DEBUG #define DPRINTF(fmt, args...) \ printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while(0) #endif /** * The maximum mapped region size per request we will allow in a negotiated * block-front/back communication channel. */ #define XBB_MAX_REQUEST_SIZE \ MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) /** * The maximum number of segments (within a request header and accompanying * segment blocks) per request we will allow in a negotiated block-front/back * communication channel. */ #define XBB_MAX_SEGMENTS_PER_REQUEST \ (MIN(UIO_MAXIOV, \ MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ (XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1))) /** * The maximum number of shared memory ring pages we will allow in a * negotiated block-front/back communication channel. Allow enough * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd. */ #define XBB_MAX_RING_PAGES \ BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \ * XBB_MAX_REQUESTS) /** * The maximum number of ring pages that we can allow per request list. * We limit this to the maximum number of segments per request, because * that is already a reasonable number of segments to aggregate. This * number should never be smaller than XBB_MAX_SEGMENTS_PER_REQUEST, * because that would leave situations where we can't dispatch even one * large request. */ #define XBB_MAX_SEGMENTS_PER_REQLIST XBB_MAX_SEGMENTS_PER_REQUEST /*--------------------------- Forward Declarations ---------------------------*/ struct xbb_softc; struct xbb_xen_req; static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...) __attribute__((format(printf, 3, 4))); static int xbb_shutdown(struct xbb_softc *xbb); static int xbb_detach(device_t dev); /*------------------------------ Data Structures -----------------------------*/ STAILQ_HEAD(xbb_xen_req_list, xbb_xen_req); typedef enum { XBB_REQLIST_NONE = 0x00, XBB_REQLIST_MAPPED = 0x01 } xbb_reqlist_flags; struct xbb_xen_reqlist { /** * Back reference to the parent block back instance for this * request. Used during bio_done handling. */ struct xbb_softc *xbb; /** * BLKIF_OP code for this request. */ int operation; /** * Set to BLKIF_RSP_* to indicate request status. * * This field allows an error status to be recorded even if the * delivery of this status must be deferred. Deferred reporting * is necessary, for example, when an error is detected during * completion processing of one bio when other bios for this * request are still outstanding. */ int status; /** * Number of 512 byte sectors not transferred. */ int residual_512b_sectors; /** * Starting sector number of the first request in the list. */ off_t starting_sector_number; /** * If we're going to coalesce, the next contiguous sector would be * this one. */ off_t next_contig_sector; /** * Number of child requests in the list. */ int num_children; /** * Number of I/O requests dispatched to the backend. */ int pendcnt; /** * Total number of segments for requests in the list. */ int nr_segments; /** * Flags for this particular request list. */ xbb_reqlist_flags flags; /** * Kernel virtual address space reserved for this request * list structure and used to map the remote domain's pages for * this I/O, into our domain's address space. */ uint8_t *kva; /** * Base, psuedo-physical address, corresponding to the start * of this request's kva region. */ uint64_t gnt_base; #ifdef XBB_USE_BOUNCE_BUFFERS /** * Pre-allocated domain local memory used to proxy remote * domain memory during I/O operations. */ uint8_t *bounce; #endif /** * Array of grant handles (one per page) used to map this request. */ grant_handle_t *gnt_handles; /** * Device statistics request ordering type (ordered or simple). */ devstat_tag_type ds_tag_type; /** * Device statistics request type (read, write, no_data). */ devstat_trans_flags ds_trans_type; /** * The start time for this request. */ struct bintime ds_t0; /** * Linked list of contiguous requests with the same operation type. */ struct xbb_xen_req_list contig_req_list; /** * Linked list links used to aggregate idle requests in the * request list free pool (xbb->reqlist_free_stailq) and pending * requests waiting for execution (xbb->reqlist_pending_stailq). */ STAILQ_ENTRY(xbb_xen_reqlist) links; }; STAILQ_HEAD(xbb_xen_reqlist_list, xbb_xen_reqlist); /** * \brief Object tracking an in-flight I/O from a Xen VBD consumer. */ struct xbb_xen_req { /** * Linked list links used to aggregate requests into a reqlist * and to store them in the request free pool. */ STAILQ_ENTRY(xbb_xen_req) links; /** * The remote domain's identifier for this I/O request. */ uint64_t id; /** * The number of pages currently mapped for this request. */ int nr_pages; /** * The number of 512 byte sectors comprising this requests. */ int nr_512b_sectors; /** * The number of struct bio requests still outstanding for this * request on the backend device. This field is only used for * device (rather than file) backed I/O. */ int pendcnt; /** * BLKIF_OP code for this request. */ int operation; /** * Storage used for non-native ring requests. */ blkif_request_t ring_req_storage; /** * Pointer to the Xen request in the ring. */ blkif_request_t *ring_req; /** * Consumer index for this request. */ RING_IDX req_ring_idx; /** * The start time for this request. */ struct bintime ds_t0; /** * Pointer back to our parent request list. */ struct xbb_xen_reqlist *reqlist; }; SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req); /** * \brief Configuration data for the shared memory request ring * used to communicate with the front-end client of this * this driver. */ struct xbb_ring_config { /** KVA address where ring memory is mapped. */ vm_offset_t va; /** The pseudo-physical address where ring memory is mapped.*/ uint64_t gnt_addr; /** * Grant table handles, one per-ring page, returned by the * hyperpervisor upon mapping of the ring and required to * unmap it when a connection is torn down. */ grant_handle_t handle[XBB_MAX_RING_PAGES]; /** * The device bus address returned by the hypervisor when * mapping the ring and required to unmap it when a connection * is torn down. */ uint64_t bus_addr[XBB_MAX_RING_PAGES]; /** The number of ring pages mapped for the current connection. */ u_int ring_pages; /** * The grant references, one per-ring page, supplied by the * front-end, allowing us to reference the ring pages in the * front-end's domain and to map these pages into our own domain. */ grant_ref_t ring_ref[XBB_MAX_RING_PAGES]; /** The interrupt driven even channel used to signal ring events. */ evtchn_port_t evtchn; }; /** * Per-instance connection state flags. */ typedef enum { /** * The front-end requested a read-only mount of the * back-end device/file. */ XBBF_READ_ONLY = 0x01, /** Communication with the front-end has been established. */ XBBF_RING_CONNECTED = 0x02, /** * Front-end requests exist in the ring and are waiting for * xbb_xen_req objects to free up. */ XBBF_RESOURCE_SHORTAGE = 0x04, /** Connection teardown in progress. */ XBBF_SHUTDOWN = 0x08, /** A thread is already performing shutdown processing. */ XBBF_IN_SHUTDOWN = 0x10 } xbb_flag_t; /** Backend device type. */ typedef enum { /** Backend type unknown. */ XBB_TYPE_NONE = 0x00, /** * Backend type disk (access via cdev switch * strategy routine). */ XBB_TYPE_DISK = 0x01, /** Backend type file (access vnode operations.). */ XBB_TYPE_FILE = 0x02 } xbb_type; /** * \brief Structure used to memoize information about a per-request * scatter-gather list. * * The chief benefit of using this data structure is it avoids having * to reparse the possibly discontiguous S/G list in the original * request. Due to the way that the mapping of the memory backing an * I/O transaction is handled by Xen, a second pass is unavoidable. * At least this way the second walk is a simple array traversal. * * \note A single Scatter/Gather element in the block interface covers * at most 1 machine page. In this context a sector (blkif * nomenclature, not what I'd choose) is a 512b aligned unit * of mapping within the machine page referenced by an S/G * element. */ struct xbb_sg { /** The number of 512b data chunks mapped in this S/G element. */ int16_t nsect; /** * The index (0 based) of the first 512b data chunk mapped * in this S/G element. */ uint8_t first_sect; /** * The index (0 based) of the last 512b data chunk mapped * in this S/G element. */ uint8_t last_sect; }; /** * Character device backend specific configuration data. */ struct xbb_dev_data { /** Cdev used for device backend access. */ struct cdev *cdev; /** Cdev switch used for device backend access. */ struct cdevsw *csw; /** Used to hold a reference on opened cdev backend devices. */ int dev_ref; }; /** * File backend specific configuration data. */ struct xbb_file_data { /** Credentials to use for vnode backed (file based) I/O. */ struct ucred *cred; /** * \brief Array of io vectors used to process file based I/O. * * Only a single file based request is outstanding per-xbb instance, * so we only need one of these. */ struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; #ifdef XBB_USE_BOUNCE_BUFFERS /** * \brief Array of io vectors used to handle bouncing of file reads. * * Vnode operations are free to modify uio data during their * exectuion. In the case of a read with bounce buffering active, * we need some of the data from the original uio in order to * bounce-out the read data. This array serves as the temporary * storage for this saved data. */ struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; /** * \brief Array of memoized bounce buffer kva offsets used * in the file based backend. * * Due to the way that the mapping of the memory backing an * I/O transaction is handled by Xen, a second pass through * the request sg elements is unavoidable. We memoize the computed * bounce address here to reduce the cost of the second walk. */ void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST]; #endif /* XBB_USE_BOUNCE_BUFFERS */ }; /** * Collection of backend type specific data. */ union xbb_backend_data { struct xbb_dev_data dev; struct xbb_file_data file; }; /** * Function signature of backend specific I/O handlers. */ typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, int operation, int flags); /** * Per-instance configuration data. */ struct xbb_softc { /** * Task-queue used to process I/O requests. */ struct taskqueue *io_taskqueue; /** * Single "run the request queue" task enqueued * on io_taskqueue. */ struct task io_task; /** Device type for this instance. */ xbb_type device_type; /** NewBus device corresponding to this instance. */ device_t dev; /** Backend specific dispatch routine for this instance. */ xbb_dispatch_t dispatch_io; /** The number of requests outstanding on the backend device/file. */ int active_request_count; /** Free pool of request tracking structures. */ struct xbb_xen_req_list request_free_stailq; /** Array, sized at connection time, of request tracking structures. */ struct xbb_xen_req *requests; /** Free pool of request list structures. */ struct xbb_xen_reqlist_list reqlist_free_stailq; /** List of pending request lists awaiting execution. */ struct xbb_xen_reqlist_list reqlist_pending_stailq; /** Array, sized at connection time, of request list structures. */ struct xbb_xen_reqlist *request_lists; /** * Global pool of kva used for mapping remote domain ring * and I/O transaction data. */ vm_offset_t kva; /** Psuedo-physical address corresponding to kva. */ uint64_t gnt_base_addr; /** The size of the global kva pool. */ int kva_size; /** The size of the KVA area used for request lists. */ int reqlist_kva_size; /** The number of pages of KVA used for request lists */ int reqlist_kva_pages; /** Bitmap of free KVA pages */ bitstr_t *kva_free; /** * \brief Cached value of the front-end's domain id. * * This value is used at once for each mapped page in * a transaction. We cache it to avoid incuring the * cost of an ivar access every time this is needed. */ domid_t otherend_id; /** * \brief The blkif protocol abi in effect. * * There are situations where the back and front ends can * have a different, native abi (e.g. intel x86_64 and * 32bit x86 domains on the same machine). The back-end * always accomodates the front-end's native abi. That * value is pulled from the XenStore and recorded here. */ int abi; /** * \brief The maximum number of requests and request lists allowed * to be in flight at a time. * * This value is negotiated via the XenStore. */ u_int max_requests; /** * \brief The maximum number of segments (1 page per segment) * that can be mapped by a request. * * This value is negotiated via the XenStore. */ u_int max_request_segments; /** * \brief Maximum number of segments per request list. * * This value is derived from and will generally be larger than * max_request_segments. */ u_int max_reqlist_segments; /** * The maximum size of any request to this back-end * device. * * This value is negotiated via the XenStore. */ u_int max_request_size; /** * The maximum size of any request list. This is derived directly * from max_reqlist_segments. */ u_int max_reqlist_size; /** Various configuration and state bit flags. */ xbb_flag_t flags; /** Ring mapping and interrupt configuration data. */ struct xbb_ring_config ring_config; /** Runtime, cross-abi safe, structures for ring access. */ blkif_back_rings_t rings; /** IRQ mapping for the communication ring event channel. */ - int irq; + xen_intr_handle_t xen_intr_handle; /** * \brief Backend access mode flags (e.g. write, or read-only). * * This value is passed to us by the front-end via the XenStore. */ char *dev_mode; /** * \brief Backend device type (e.g. "disk", "cdrom", "floppy"). * * This value is passed to us by the front-end via the XenStore. * Currently unused. */ char *dev_type; /** * \brief Backend device/file identifier. * * This value is passed to us by the front-end via the XenStore. * We expect this to be a POSIX path indicating the file or * device to open. */ char *dev_name; /** * Vnode corresponding to the backend device node or file * we are acessing. */ struct vnode *vn; union xbb_backend_data backend; /** The native sector size of the backend. */ u_int sector_size; /** log2 of sector_size. */ u_int sector_size_shift; /** Size in bytes of the backend device or file. */ off_t media_size; /** * \brief media_size expressed in terms of the backend native * sector size. * * (e.g. xbb->media_size >> xbb->sector_size_shift). */ uint64_t media_num_sectors; /** * \brief Array of memoized scatter gather data computed during the * conversion of blkif ring requests to internal xbb_xen_req * structures. * * Ring processing is serialized so we only need one of these. */ struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST]; /** * Temporary grant table map used in xbb_dispatch_io(). When * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the * stack could cause a stack overflow. */ struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQLIST]; /** Mutex protecting per-instance data. */ struct mtx lock; #ifdef XENHVM /** * Resource representing allocated physical address space * associated with our per-instance kva region. */ struct resource *pseudo_phys_res; /** Resource id for allocated physical address space. */ int pseudo_phys_res_id; #endif /** * I/O statistics from BlockBack dispatch down. These are * coalesced requests, and we start them right before execution. */ struct devstat *xbb_stats; /** * I/O statistics coming into BlockBack. These are the requests as * we get them from BlockFront. They are started as soon as we * receive a request, and completed when the I/O is complete. */ struct devstat *xbb_stats_in; /** Disable sending flush to the backend */ int disable_flush; /** Send a real flush for every N flush requests */ int flush_interval; /** Count of flush requests in the interval */ int flush_count; /** Don't coalesce requests if this is set */ int no_coalesce_reqs; /** Number of requests we have received */ uint64_t reqs_received; /** Number of requests we have completed*/ uint64_t reqs_completed; /** How many forced dispatches (i.e. without coalescing) have happend */ uint64_t forced_dispatch; /** How many normal dispatches have happend */ uint64_t normal_dispatch; /** How many total dispatches have happend */ uint64_t total_dispatch; /** How many times we have run out of KVA */ uint64_t kva_shortages; /** How many times we have run out of request structures */ uint64_t request_shortages; }; /*---------------------------- Request Processing ----------------------------*/ /** * Allocate an internal transaction tracking structure from the free pool. * * \param xbb Per-instance xbb configuration structure. * * \return On success, a pointer to the allocated xbb_xen_req structure. * Otherwise NULL. */ static inline struct xbb_xen_req * xbb_get_req(struct xbb_softc *xbb) { struct xbb_xen_req *req; req = NULL; mtx_assert(&xbb->lock, MA_OWNED); if ((req = STAILQ_FIRST(&xbb->request_free_stailq)) != NULL) { STAILQ_REMOVE_HEAD(&xbb->request_free_stailq, links); xbb->active_request_count++; } return (req); } /** * Return an allocated transaction tracking structure to the free pool. * * \param xbb Per-instance xbb configuration structure. * \param req The request structure to free. */ static inline void xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req) { mtx_assert(&xbb->lock, MA_OWNED); STAILQ_INSERT_HEAD(&xbb->request_free_stailq, req, links); xbb->active_request_count--; KASSERT(xbb->active_request_count >= 0, ("xbb_release_req: negative active count")); } /** * Return an xbb_xen_req_list of allocated xbb_xen_reqs to the free pool. * * \param xbb Per-instance xbb configuration structure. * \param req_list The list of requests to free. * \param nreqs The number of items in the list. */ static inline void xbb_release_reqs(struct xbb_softc *xbb, struct xbb_xen_req_list *req_list, int nreqs) { mtx_assert(&xbb->lock, MA_OWNED); STAILQ_CONCAT(&xbb->request_free_stailq, req_list); xbb->active_request_count -= nreqs; KASSERT(xbb->active_request_count >= 0, ("xbb_release_reqs: negative active count")); } /** * Given a page index and 512b sector offset within that page, * calculate an offset into a request's kva region. * * \param reqlist The request structure whose kva region will be accessed. * \param pagenr The page index used to compute the kva offset. * \param sector The 512b sector index used to compute the page relative * kva offset. * * \return The computed global KVA offset. */ static inline uint8_t * xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9)); } #ifdef XBB_USE_BOUNCE_BUFFERS /** * Given a page index and 512b sector offset within that page, * calculate an offset into a request's local bounce memory region. * * \param reqlist The request structure whose bounce region will be accessed. * \param pagenr The page index used to compute the bounce offset. * \param sector The 512b sector index used to compute the page relative * bounce offset. * * \return The computed global bounce buffer address. */ static inline uint8_t * xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9)); } #endif /** * Given a page number and 512b sector offset within that page, * calculate an offset into the request's memory region that the * underlying backend device/file should use for I/O. * * \param reqlist The request structure whose I/O region will be accessed. * \param pagenr The page index used to compute the I/O offset. * \param sector The 512b sector index used to compute the page relative * I/O offset. * * \return The computed global I/O address. * * Depending on configuration, this will either be a local bounce buffer * or a pointer to the memory mapped in from the front-end domain for * this request. */ static inline uint8_t * xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { #ifdef XBB_USE_BOUNCE_BUFFERS return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector)); #else return (xbb_reqlist_vaddr(reqlist, pagenr, sector)); #endif } /** * Given a page index and 512b sector offset within that page, calculate * an offset into the local psuedo-physical address space used to map a * front-end's request data into a request. * * \param reqlist The request list structure whose pseudo-physical region * will be accessed. * \param pagenr The page index used to compute the pseudo-physical offset. * \param sector The 512b sector index used to compute the page relative * pseudo-physical offset. * * \return The computed global pseudo-phsyical address. * * Depending on configuration, this will either be a local bounce buffer * or a pointer to the memory mapped in from the front-end domain for * this request. */ static inline uintptr_t xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { struct xbb_softc *xbb; xbb = reqlist->xbb; return ((uintptr_t)(xbb->gnt_base_addr + (uintptr_t)(reqlist->kva - xbb->kva) + (PAGE_SIZE * pagenr) + (sector << 9))); } /** * Get Kernel Virtual Address space for mapping requests. * * \param xbb Per-instance xbb configuration structure. * \param nr_pages Number of pages needed. * \param check_only If set, check for free KVA but don't allocate it. * \param have_lock If set, xbb lock is already held. * * \return On success, a pointer to the allocated KVA region. Otherwise NULL. * * Note: This should be unnecessary once we have either chaining or * scatter/gather support for struct bio. At that point we'll be able to * put multiple addresses and lengths in one bio/bio chain and won't need * to map everything into one virtual segment. */ static uint8_t * xbb_get_kva(struct xbb_softc *xbb, int nr_pages) { intptr_t first_clear; intptr_t num_clear; uint8_t *free_kva; int i; KASSERT(nr_pages != 0, ("xbb_get_kva of zero length")); first_clear = 0; free_kva = NULL; mtx_lock(&xbb->lock); /* * Look for the first available page. If there are none, we're done. */ bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear); if (first_clear == -1) goto bailout; /* * Starting at the first available page, look for consecutive free * pages that will satisfy the user's request. */ for (i = first_clear, num_clear = 0; i < xbb->reqlist_kva_pages; i++) { /* * If this is true, the page is used, so we have to reset * the number of clear pages and the first clear page * (since it pointed to a region with an insufficient number * of clear pages). */ if (bit_test(xbb->kva_free, i)) { num_clear = 0; first_clear = -1; continue; } if (first_clear == -1) first_clear = i; /* * If this is true, we've found a large enough free region * to satisfy the request. */ if (++num_clear == nr_pages) { bit_nset(xbb->kva_free, first_clear, first_clear + nr_pages - 1); free_kva = xbb->kva + (uint8_t *)(first_clear * PAGE_SIZE); KASSERT(free_kva >= (uint8_t *)xbb->kva && free_kva + (nr_pages * PAGE_SIZE) <= (uint8_t *)xbb->ring_config.va, ("Free KVA %p len %d out of range, " "kva = %#jx, ring VA = %#jx\n", free_kva, nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva, (uintmax_t)xbb->ring_config.va)); break; } } bailout: if (free_kva == NULL) { xbb->flags |= XBBF_RESOURCE_SHORTAGE; xbb->kva_shortages++; } mtx_unlock(&xbb->lock); return (free_kva); } /** * Free allocated KVA. * * \param xbb Per-instance xbb configuration structure. * \param kva_ptr Pointer to allocated KVA region. * \param nr_pages Number of pages in the KVA region. */ static void xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages) { intptr_t start_page; mtx_assert(&xbb->lock, MA_OWNED); start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT; bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1); } /** * Unmap the front-end pages associated with this I/O request. * * \param req The request structure to unmap. */ static void xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist) { struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST]; u_int i; u_int invcount; int error; invcount = 0; for (i = 0; i < reqlist->nr_segments; i++) { if (reqlist->gnt_handles[i] == GRANT_REF_INVALID) continue; unmap[invcount].host_addr = xbb_get_gntaddr(reqlist, i, 0); unmap[invcount].dev_bus_addr = 0; unmap[invcount].handle = reqlist->gnt_handles[i]; reqlist->gnt_handles[i] = GRANT_REF_INVALID; invcount++; } error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, invcount); KASSERT(error == 0, ("Grant table operation failed")); } /** * Allocate an internal transaction tracking structure from the free pool. * * \param xbb Per-instance xbb configuration structure. * * \return On success, a pointer to the allocated xbb_xen_reqlist structure. * Otherwise NULL. */ static inline struct xbb_xen_reqlist * xbb_get_reqlist(struct xbb_softc *xbb) { struct xbb_xen_reqlist *reqlist; reqlist = NULL; mtx_assert(&xbb->lock, MA_OWNED); if ((reqlist = STAILQ_FIRST(&xbb->reqlist_free_stailq)) != NULL) { STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links); reqlist->flags = XBB_REQLIST_NONE; reqlist->kva = NULL; reqlist->status = BLKIF_RSP_OKAY; reqlist->residual_512b_sectors = 0; reqlist->num_children = 0; reqlist->nr_segments = 0; STAILQ_INIT(&reqlist->contig_req_list); } return (reqlist); } /** * Return an allocated transaction tracking structure to the free pool. * * \param xbb Per-instance xbb configuration structure. * \param req The request list structure to free. * \param wakeup If set, wakeup the work thread if freeing this reqlist * during a resource shortage condition. */ static inline void xbb_release_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, int wakeup) { mtx_lock(&xbb->lock); if (wakeup) { wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE; xbb->flags &= ~XBBF_RESOURCE_SHORTAGE; } if (reqlist->kva != NULL) xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments); xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children); STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); if ((xbb->flags & XBBF_SHUTDOWN) != 0) { /* * Shutdown is in progress. See if we can * progress further now that one more request * has completed and been returned to the * free pool. */ xbb_shutdown(xbb); } mtx_unlock(&xbb->lock); if (wakeup != 0) taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); } /** * Request resources and do basic request setup. * * \param xbb Per-instance xbb configuration structure. * \param reqlist Pointer to reqlist pointer. * \param ring_req Pointer to a block ring request. * \param ring_index The ring index of this request. * * \return 0 for success, non-zero for failure. */ static int xbb_get_resources(struct xbb_softc *xbb, struct xbb_xen_reqlist **reqlist, blkif_request_t *ring_req, RING_IDX ring_idx) { struct xbb_xen_reqlist *nreqlist; struct xbb_xen_req *nreq; nreqlist = NULL; nreq = NULL; mtx_lock(&xbb->lock); /* * We don't allow new resources to be allocated if we're in the * process of shutting down. */ if ((xbb->flags & XBBF_SHUTDOWN) != 0) { mtx_unlock(&xbb->lock); return (1); } /* * Allocate a reqlist if the caller doesn't have one already. */ if (*reqlist == NULL) { nreqlist = xbb_get_reqlist(xbb); if (nreqlist == NULL) goto bailout_error; } /* We always allocate a request. */ nreq = xbb_get_req(xbb); if (nreq == NULL) goto bailout_error; mtx_unlock(&xbb->lock); if (*reqlist == NULL) { *reqlist = nreqlist; nreqlist->operation = ring_req->operation; nreqlist->starting_sector_number = ring_req->sector_number; STAILQ_INSERT_TAIL(&xbb->reqlist_pending_stailq, nreqlist, links); } nreq->reqlist = *reqlist; nreq->req_ring_idx = ring_idx; if (xbb->abi != BLKIF_PROTOCOL_NATIVE) { bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req)); nreq->ring_req = &nreq->ring_req_storage; } else { nreq->ring_req = ring_req; } binuptime(&nreq->ds_t0); devstat_start_transaction(xbb->xbb_stats_in, &nreq->ds_t0); STAILQ_INSERT_TAIL(&(*reqlist)->contig_req_list, nreq, links); (*reqlist)->num_children++; (*reqlist)->nr_segments += ring_req->nr_segments; return (0); bailout_error: /* * We're out of resources, so set the shortage flag. The next time * a request is released, we'll try waking up the work thread to * see if we can allocate more resources. */ xbb->flags |= XBBF_RESOURCE_SHORTAGE; xbb->request_shortages++; if (nreq != NULL) xbb_release_req(xbb, nreq); mtx_unlock(&xbb->lock); if (nreqlist != NULL) xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0); return (1); } /** * Create and transmit a response to a blkif request. * * \param xbb Per-instance xbb configuration structure. * \param req The request structure to which to respond. * \param status The status code to report. See BLKIF_RSP_* * in sys/xen/interface/io/blkif.h. */ static void xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) { blkif_response_t *resp; int more_to_do; int notify; more_to_do = 0; /* * Place on the response ring for the relevant domain. * For now, only the spacing between entries is different * in the different ABIs, not the response entry layout. */ mtx_lock(&xbb->lock); switch (xbb->abi) { case BLKIF_PROTOCOL_NATIVE: resp = RING_GET_RESPONSE(&xbb->rings.native, xbb->rings.native.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_32: resp = (blkif_response_t *) RING_GET_RESPONSE(&xbb->rings.x86_32, xbb->rings.x86_32.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_64: resp = (blkif_response_t *) RING_GET_RESPONSE(&xbb->rings.x86_64, xbb->rings.x86_64.rsp_prod_pvt); break; default: panic("Unexpected blkif protocol ABI."); } resp->id = req->id; resp->operation = req->operation; resp->status = status; xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, notify); if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) { /* * Tail check for pending requests. Allows frontend to avoid * notifications if requests are already in flight (lower * overheads and promotes batching). */ RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, more_to_do); } else if (RING_HAS_UNCONSUMED_REQUESTS(&xbb->rings.common)) { more_to_do = 1; } xbb->reqs_completed++; mtx_unlock(&xbb->lock); if (more_to_do) taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); if (notify) - notify_remote_via_irq(xbb->irq); + xen_intr_signal(xbb->xen_intr_handle); } /** * Complete a request list. * * \param xbb Per-instance xbb configuration structure. * \param reqlist Allocated internal request list structure. */ static void xbb_complete_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) { struct xbb_xen_req *nreq; off_t sectors_sent; sectors_sent = 0; if (reqlist->flags & XBB_REQLIST_MAPPED) xbb_unmap_reqlist(reqlist); /* * All I/O is done, send the response. A lock should not be * necessary here because the request list is complete, and * therefore this is the only context accessing this request * right now. The functions we call do their own locking if * necessary. */ STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { off_t cur_sectors_sent; xbb_send_response(xbb, nreq, reqlist->status); /* We don't report bytes sent if there is an error. */ if (reqlist->status == BLKIF_RSP_OKAY) cur_sectors_sent = nreq->nr_512b_sectors; else cur_sectors_sent = 0; sectors_sent += cur_sectors_sent; devstat_end_transaction(xbb->xbb_stats_in, /*bytes*/cur_sectors_sent << 9, reqlist->ds_tag_type, reqlist->ds_trans_type, /*now*/NULL, /*then*/&nreq->ds_t0); } /* * Take out any sectors not sent. If we wind up negative (which * might happen if an error is reported as well as a residual), just * report 0 sectors sent. */ sectors_sent -= reqlist->residual_512b_sectors; if (sectors_sent < 0) sectors_sent = 0; devstat_end_transaction(xbb->xbb_stats, /*bytes*/ sectors_sent << 9, reqlist->ds_tag_type, reqlist->ds_trans_type, /*now*/NULL, /*then*/&reqlist->ds_t0); xbb_release_reqlist(xbb, reqlist, /*wakeup*/ 1); } /** * Completion handler for buffer I/O requests issued by the device * backend driver. * * \param bio The buffer I/O request on which to perform completion * processing. */ static void xbb_bio_done(struct bio *bio) { struct xbb_softc *xbb; struct xbb_xen_reqlist *reqlist; reqlist = bio->bio_caller1; xbb = reqlist->xbb; reqlist->residual_512b_sectors += bio->bio_resid >> 9; /* * This is a bit imprecise. With aggregated I/O a single * request list can contain multiple front-end requests and * a multiple bios may point to a single request. By carefully * walking the request list, we could map residuals and errors * back to the original front-end request, but the interface * isn't sufficiently rich for us to properly report the error. * So, we just treat the entire request list as having failed if an * error occurs on any part. And, if an error occurs, we treat * the amount of data transferred as 0. * * For residuals, we report it on the overall aggregated device, * but not on the individual requests, since we don't currently * do the work to determine which front-end request to which the * residual applies. */ if (bio->bio_error) { DPRINTF("BIO returned error %d for operation on device %s\n", bio->bio_error, xbb->dev_name); reqlist->status = BLKIF_RSP_ERROR; if (bio->bio_error == ENXIO && xenbus_get_state(xbb->dev) == XenbusStateConnected) { /* * Backend device has disappeared. Signal the * front-end that we (the device proxy) want to * go away. */ xenbus_set_state(xbb->dev, XenbusStateClosing); } } #ifdef XBB_USE_BOUNCE_BUFFERS if (bio->bio_cmd == BIO_READ) { vm_offset_t kva_offset; kva_offset = (vm_offset_t)bio->bio_data - (vm_offset_t)reqlist->bounce; memcpy((uint8_t *)reqlist->kva + kva_offset, bio->bio_data, bio->bio_bcount); } #endif /* XBB_USE_BOUNCE_BUFFERS */ /* * Decrement the pending count for the request list. When we're * done with the requests, send status back for all of them. */ if (atomic_fetchadd_int(&reqlist->pendcnt, -1) == 1) xbb_complete_reqlist(xbb, reqlist); g_destroy_bio(bio); } /** * Parse a blkif request into an internal request structure and send * it to the backend for processing. * * \param xbb Per-instance xbb configuration structure. * \param reqlist Allocated internal request list structure. * * \return On success, 0. For resource shortages, non-zero. * * This routine performs the backend common aspects of request parsing * including compiling an internal request structure, parsing the S/G * list and any secondary ring requests in which they may reside, and * the mapping of front-end I/O pages into our domain. */ static int xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) { struct xbb_sg *xbb_sg; struct gnttab_map_grant_ref *map; struct blkif_request_segment *sg; struct blkif_request_segment *last_block_sg; struct xbb_xen_req *nreq; u_int nseg; u_int seg_idx; u_int block_segs; int nr_sects; int total_sects; int operation; uint8_t bio_flags; int error; reqlist->ds_tag_type = DEVSTAT_TAG_SIMPLE; bio_flags = 0; total_sects = 0; nr_sects = 0; /* * First determine whether we have enough free KVA to satisfy this * request list. If not, tell xbb_run_queue() so it can go to * sleep until we have more KVA. */ reqlist->kva = NULL; if (reqlist->nr_segments != 0) { reqlist->kva = xbb_get_kva(xbb, reqlist->nr_segments); if (reqlist->kva == NULL) { /* * If we're out of KVA, return ENOMEM. */ return (ENOMEM); } } binuptime(&reqlist->ds_t0); devstat_start_transaction(xbb->xbb_stats, &reqlist->ds_t0); switch (reqlist->operation) { case BLKIF_OP_WRITE_BARRIER: bio_flags |= BIO_ORDERED; reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; /* FALLTHROUGH */ case BLKIF_OP_WRITE: operation = BIO_WRITE; reqlist->ds_trans_type = DEVSTAT_WRITE; if ((xbb->flags & XBBF_READ_ONLY) != 0) { DPRINTF("Attempt to write to read only device %s\n", xbb->dev_name); reqlist->status = BLKIF_RSP_ERROR; goto send_response; } break; case BLKIF_OP_READ: operation = BIO_READ; reqlist->ds_trans_type = DEVSTAT_READ; break; case BLKIF_OP_FLUSH_DISKCACHE: /* * If this is true, the user has requested that we disable * flush support. So we just complete the requests * successfully. */ if (xbb->disable_flush != 0) { goto send_response; } /* * The user has requested that we only send a real flush * for every N flush requests. So keep count, and either * complete the request immediately or queue it for the * backend. */ if (xbb->flush_interval != 0) { if (++(xbb->flush_count) < xbb->flush_interval) { goto send_response; } else xbb->flush_count = 0; } operation = BIO_FLUSH; reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; reqlist->ds_trans_type = DEVSTAT_NO_DATA; goto do_dispatch; /*NOTREACHED*/ default: DPRINTF("error: unknown block io operation [%d]\n", reqlist->operation); reqlist->status = BLKIF_RSP_ERROR; goto send_response; } reqlist->xbb = xbb; xbb_sg = xbb->xbb_sgs; map = xbb->maps; seg_idx = 0; STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { blkif_request_t *ring_req; RING_IDX req_ring_idx; u_int req_seg_idx; ring_req = nreq->ring_req; req_ring_idx = nreq->req_ring_idx; nr_sects = 0; nseg = ring_req->nr_segments; nreq->id = ring_req->id; nreq->nr_pages = nseg; nreq->nr_512b_sectors = 0; req_seg_idx = 0; sg = NULL; /* Check that number of segments is sane. */ - if (unlikely(nseg == 0) - || unlikely(nseg > xbb->max_request_segments)) { + if (__predict_false(nseg == 0) + || __predict_false(nseg > xbb->max_request_segments)) { DPRINTF("Bad number of segments in request (%d)\n", nseg); reqlist->status = BLKIF_RSP_ERROR; goto send_response; } block_segs = MIN(nreq->nr_pages, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); sg = ring_req->seg; last_block_sg = sg + block_segs; while (1) { while (sg < last_block_sg) { KASSERT(seg_idx < XBB_MAX_SEGMENTS_PER_REQLIST, ("seg_idx %d is too large, max " "segs %d\n", seg_idx, XBB_MAX_SEGMENTS_PER_REQLIST)); xbb_sg->first_sect = sg->first_sect; xbb_sg->last_sect = sg->last_sect; xbb_sg->nsect = (int8_t)(sg->last_sect - sg->first_sect + 1); if ((sg->last_sect >= (PAGE_SIZE >> 9)) || (xbb_sg->nsect <= 0)) { reqlist->status = BLKIF_RSP_ERROR; goto send_response; } nr_sects += xbb_sg->nsect; map->host_addr = xbb_get_gntaddr(reqlist, seg_idx, /*sector*/0); KASSERT(map->host_addr + PAGE_SIZE <= xbb->ring_config.gnt_addr, ("Host address %#jx len %d overlaps " "ring address %#jx\n", (uintmax_t)map->host_addr, PAGE_SIZE, (uintmax_t)xbb->ring_config.gnt_addr)); map->flags = GNTMAP_host_map; map->ref = sg->gref; map->dom = xbb->otherend_id; if (operation == BIO_WRITE) map->flags |= GNTMAP_readonly; sg++; map++; xbb_sg++; seg_idx++; req_seg_idx++; } block_segs = MIN(nseg - req_seg_idx, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); if (block_segs == 0) break; /* * Fetch the next request block full of SG elements. * For now, only the spacing between entries is * different in the different ABIs, not the sg entry * layout. */ req_ring_idx++; switch (xbb->abi) { case BLKIF_PROTOCOL_NATIVE: sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native, req_ring_idx); break; case BLKIF_PROTOCOL_X86_32: { sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32, req_ring_idx); break; } case BLKIF_PROTOCOL_X86_64: { sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64, req_ring_idx); break; } default: panic("Unexpected blkif protocol ABI."); /* NOTREACHED */ } last_block_sg = sg + block_segs; } /* Convert to the disk's sector size */ nreq->nr_512b_sectors = nr_sects; nr_sects = (nr_sects << 9) >> xbb->sector_size_shift; total_sects += nr_sects; if ((nreq->nr_512b_sectors & ((xbb->sector_size >> 9) - 1)) != 0) { device_printf(xbb->dev, "%s: I/O size (%d) is not " "a multiple of the backing store sector " "size (%d)\n", __func__, nreq->nr_512b_sectors << 9, xbb->sector_size); reqlist->status = BLKIF_RSP_ERROR; goto send_response; } } error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, xbb->maps, reqlist->nr_segments); if (error != 0) panic("Grant table operation failed (%d)", error); reqlist->flags |= XBB_REQLIST_MAPPED; for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments; seg_idx++, map++){ - if (unlikely(map->status != 0)) { + if (__predict_false(map->status != 0)) { DPRINTF("invalid buffer -- could not remap " "it (%d)\n", map->status); DPRINTF("Mapping(%d): Host Addr 0x%lx, flags " "0x%x ref 0x%x, dom %d\n", seg_idx, map->host_addr, map->flags, map->ref, map->dom); reqlist->status = BLKIF_RSP_ERROR; goto send_response; } reqlist->gnt_handles[seg_idx] = map->handle; } if (reqlist->starting_sector_number + total_sects > xbb->media_num_sectors) { DPRINTF("%s of [%" PRIu64 ",%" PRIu64 "] " "extends past end of device %s\n", operation == BIO_READ ? "read" : "write", reqlist->starting_sector_number, reqlist->starting_sector_number + total_sects, xbb->dev_name); reqlist->status = BLKIF_RSP_ERROR; goto send_response; } do_dispatch: error = xbb->dispatch_io(xbb, reqlist, operation, bio_flags); if (error != 0) { reqlist->status = BLKIF_RSP_ERROR; goto send_response; } return (0); send_response: xbb_complete_reqlist(xbb, reqlist); return (0); } static __inline int xbb_count_sects(blkif_request_t *ring_req) { int i; int cur_size = 0; for (i = 0; i < ring_req->nr_segments; i++) { int nsect; nsect = (int8_t)(ring_req->seg[i].last_sect - ring_req->seg[i].first_sect + 1); if (nsect <= 0) break; cur_size += nsect; } return (cur_size); } /** * Process incoming requests from the shared communication ring in response * to a signal on the ring's event channel. * * \param context Callback argument registerd during task initialization - * the xbb_softc for this instance. * \param pending The number of taskqueue_enqueue events that have * occurred since this handler was last run. */ static void xbb_run_queue(void *context, int pending) { struct xbb_softc *xbb; blkif_back_rings_t *rings; RING_IDX rp; uint64_t cur_sector; int cur_operation; struct xbb_xen_reqlist *reqlist; xbb = (struct xbb_softc *)context; rings = &xbb->rings; /* * Work gather and dispatch loop. Note that we have a bias here * towards gathering I/O sent by blockfront. We first gather up * everything in the ring, as long as we have resources. Then we * dispatch one request, and then attempt to gather up any * additional requests that have come in while we were dispatching * the request. * * This allows us to get a clearer picture (via devstat) of how * many requests blockfront is queueing to us at any given time. */ for (;;) { int retval; /* * Initialize reqlist to the last element in the pending * queue, if there is one. This allows us to add more * requests to that request list, if we have room. */ reqlist = STAILQ_LAST(&xbb->reqlist_pending_stailq, xbb_xen_reqlist, links); if (reqlist != NULL) { cur_sector = reqlist->next_contig_sector; cur_operation = reqlist->operation; } else { cur_operation = 0; cur_sector = 0; } /* * Cache req_prod to avoid accessing a cache line shared * with the frontend. */ rp = rings->common.sring->req_prod; /* Ensure we see queued requests up to 'rp'. */ rmb(); /** * Run so long as there is work to consume and the generation * of a response will not overflow the ring. * * @note There's a 1 to 1 relationship between requests and * responses, so an overflow should never occur. This * test is to protect our domain from digesting bogus * data. Shouldn't we log this? */ while (rings->common.req_cons != rp && RING_REQUEST_CONS_OVERFLOW(&rings->common, rings->common.req_cons) == 0){ blkif_request_t ring_req_storage; blkif_request_t *ring_req; int cur_size; switch (xbb->abi) { case BLKIF_PROTOCOL_NATIVE: ring_req = RING_GET_REQUEST(&xbb->rings.native, rings->common.req_cons); break; case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_request *ring_req32; ring_req32 = RING_GET_REQUEST( &xbb->rings.x86_32, rings->common.req_cons); blkif_get_x86_32_req(&ring_req_storage, ring_req32); ring_req = &ring_req_storage; break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_request *ring_req64; ring_req64 =RING_GET_REQUEST(&xbb->rings.x86_64, rings->common.req_cons); blkif_get_x86_64_req(&ring_req_storage, ring_req64); ring_req = &ring_req_storage; break; } default: panic("Unexpected blkif protocol ABI."); /* NOTREACHED */ } /* * Check for situations that would require closing * off this I/O for further coalescing: * - Coalescing is turned off. * - Current I/O is out of sequence with the previous * I/O. * - Coalesced I/O would be too large. */ if ((reqlist != NULL) && ((xbb->no_coalesce_reqs != 0) || ((xbb->no_coalesce_reqs == 0) && ((ring_req->sector_number != cur_sector) || (ring_req->operation != cur_operation) || ((ring_req->nr_segments + reqlist->nr_segments) > xbb->max_reqlist_segments))))) { reqlist = NULL; } /* * Grab and check for all resources in one shot. * If we can't get all of the resources we need, * the shortage is noted and the thread will get * woken up when more resources are available. */ retval = xbb_get_resources(xbb, &reqlist, ring_req, xbb->rings.common.req_cons); if (retval != 0) { /* * Resource shortage has been recorded. * We'll be scheduled to run once a request * object frees up due to a completion. */ break; } /* * Signify that we can overwrite this request with * a response by incrementing our consumer index. * The response won't be generated until after * we've already consumed all necessary data out * of the version of the request in the ring buffer * (for native mode). We must update the consumer * index before issueing back-end I/O so there is * no possibility that it will complete and a * response be generated before we make room in * the queue for that response. */ xbb->rings.common.req_cons += BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments); xbb->reqs_received++; cur_size = xbb_count_sects(ring_req); cur_sector = ring_req->sector_number + cur_size; reqlist->next_contig_sector = cur_sector; cur_operation = ring_req->operation; } /* Check for I/O to dispatch */ reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); if (reqlist == NULL) { /* * We're out of work to do, put the task queue to * sleep. */ break; } /* * Grab the first request off the queue and attempt * to dispatch it. */ STAILQ_REMOVE_HEAD(&xbb->reqlist_pending_stailq, links); retval = xbb_dispatch_io(xbb, reqlist); if (retval != 0) { /* * xbb_dispatch_io() returns non-zero only when * there is a resource shortage. If that's the * case, re-queue this request on the head of the * queue, and go to sleep until we have more * resources. */ STAILQ_INSERT_HEAD(&xbb->reqlist_pending_stailq, reqlist, links); break; } else { /* * If we still have anything on the queue after * removing the head entry, that is because we * met one of the criteria to create a new * request list (outlined above), and we'll call * that a forced dispatch for statistical purposes. * * Otherwise, if there is only one element on the * queue, we coalesced everything available on * the ring and we'll call that a normal dispatch. */ reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); if (reqlist != NULL) xbb->forced_dispatch++; else xbb->normal_dispatch++; xbb->total_dispatch++; } } } /** * Interrupt handler bound to the shared ring's event channel. * * \param arg Callback argument registerd during event channel * binding - the xbb_softc for this instance. */ -static void -xbb_intr(void *arg) +static int +xbb_filter(void *arg) { struct xbb_softc *xbb; - /* Defer to kernel thread. */ + /* Defer to taskqueue thread. */ xbb = (struct xbb_softc *)arg; taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); + + return (FILTER_HANDLED); } SDT_PROVIDER_DEFINE(xbb); SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, flush, "int"); SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, read, "int", "uint64_t", "uint64_t"); SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, write, "int", "uint64_t", "uint64_t"); /*----------------------------- Backend Handlers -----------------------------*/ /** * Backend handler for character device access. * * \param xbb Per-instance xbb configuration structure. * \param reqlist Allocated internal request list structure. * \param operation BIO_* I/O operation code. * \param bio_flags Additional bio_flag data to pass to any generated * bios (e.g. BIO_ORDERED).. * * \return 0 for success, errno codes for failure. */ static int xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, int operation, int bio_flags) { struct xbb_dev_data *dev_data; struct bio *bios[XBB_MAX_SEGMENTS_PER_REQLIST]; struct xbb_xen_req *nreq; off_t bio_offset; struct bio *bio; struct xbb_sg *xbb_sg; u_int nbio; u_int bio_idx; u_int nseg; u_int seg_idx; int error; dev_data = &xbb->backend.dev; bio_offset = (off_t)reqlist->starting_sector_number << xbb->sector_size_shift; error = 0; nbio = 0; bio_idx = 0; if (operation == BIO_FLUSH) { nreq = STAILQ_FIRST(&reqlist->contig_req_list); bio = g_new_bio(); - if (unlikely(bio == NULL)) { + if (__predict_false(bio == NULL)) { DPRINTF("Unable to allocate bio for BIO_FLUSH\n"); error = ENOMEM; return (error); } bio->bio_cmd = BIO_FLUSH; bio->bio_flags |= BIO_ORDERED; bio->bio_dev = dev_data->cdev; bio->bio_offset = 0; bio->bio_data = 0; bio->bio_done = xbb_bio_done; bio->bio_caller1 = nreq; bio->bio_pblkno = 0; nreq->pendcnt = 1; SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush, device_get_unit(xbb->dev)); (*dev_data->csw->d_strategy)(bio); return (0); } xbb_sg = xbb->xbb_sgs; bio = NULL; nseg = reqlist->nr_segments; for (seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { /* * KVA will not be contiguous, so any additional * I/O will need to be represented in a new bio. */ if ((bio != NULL) && (xbb_sg->first_sect != 0)) { if ((bio->bio_length & (xbb->sector_size - 1)) != 0) { printf("%s: Discontiguous I/O request " "from domain %d ends on " "non-sector boundary\n", __func__, xbb->otherend_id); error = EINVAL; goto fail_free_bios; } bio = NULL; } if (bio == NULL) { /* * Make sure that the start of this bio is * aligned to a device sector. */ if ((bio_offset & (xbb->sector_size - 1)) != 0){ printf("%s: Misaligned I/O request " "from domain %d\n", __func__, xbb->otherend_id); error = EINVAL; goto fail_free_bios; } bio = bios[nbio++] = g_new_bio(); - if (unlikely(bio == NULL)) { + if (__predict_false(bio == NULL)) { error = ENOMEM; goto fail_free_bios; } bio->bio_cmd = operation; bio->bio_flags |= bio_flags; bio->bio_dev = dev_data->cdev; bio->bio_offset = bio_offset; bio->bio_data = xbb_reqlist_ioaddr(reqlist, seg_idx, xbb_sg->first_sect); bio->bio_done = xbb_bio_done; bio->bio_caller1 = reqlist; bio->bio_pblkno = bio_offset >> xbb->sector_size_shift; } bio->bio_length += xbb_sg->nsect << 9; bio->bio_bcount = bio->bio_length; bio_offset += xbb_sg->nsect << 9; if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) { if ((bio->bio_length & (xbb->sector_size - 1)) != 0) { printf("%s: Discontiguous I/O request " "from domain %d ends on " "non-sector boundary\n", __func__, xbb->otherend_id); error = EINVAL; goto fail_free_bios; } /* * KVA will not be contiguous, so any additional * I/O will need to be represented in a new bio. */ bio = NULL; } } reqlist->pendcnt = nbio; for (bio_idx = 0; bio_idx < nbio; bio_idx++) { #ifdef XBB_USE_BOUNCE_BUFFERS vm_offset_t kva_offset; kva_offset = (vm_offset_t)bios[bio_idx]->bio_data - (vm_offset_t)reqlist->bounce; if (operation == BIO_WRITE) { memcpy(bios[bio_idx]->bio_data, (uint8_t *)reqlist->kva + kva_offset, bios[bio_idx]->bio_bcount); } #endif if (operation == BIO_READ) { SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read, device_get_unit(xbb->dev), bios[bio_idx]->bio_offset, bios[bio_idx]->bio_length); } else if (operation == BIO_WRITE) { SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, write, device_get_unit(xbb->dev), bios[bio_idx]->bio_offset, bios[bio_idx]->bio_length); } (*dev_data->csw->d_strategy)(bios[bio_idx]); } return (error); fail_free_bios: for (bio_idx = 0; bio_idx < (nbio-1); bio_idx++) g_destroy_bio(bios[bio_idx]); return (error); } SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, flush, "int"); SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, read, "int", "uint64_t", "uint64_t"); SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, write, "int", "uint64_t", "uint64_t"); /** * Backend handler for file access. * * \param xbb Per-instance xbb configuration structure. * \param reqlist Allocated internal request list. * \param operation BIO_* I/O operation code. * \param flags Additional bio_flag data to pass to any generated bios * (e.g. BIO_ORDERED).. * * \return 0 for success, errno codes for failure. */ static int xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, int operation, int flags) { struct xbb_file_data *file_data; u_int seg_idx; u_int nseg; off_t sectors_sent; struct uio xuio; struct xbb_sg *xbb_sg; struct iovec *xiovec; #ifdef XBB_USE_BOUNCE_BUFFERS void **p_vaddr; int saved_uio_iovcnt; #endif /* XBB_USE_BOUNCE_BUFFERS */ int error; file_data = &xbb->backend.file; sectors_sent = 0; error = 0; bzero(&xuio, sizeof(xuio)); switch (operation) { case BIO_READ: xuio.uio_rw = UIO_READ; break; case BIO_WRITE: xuio.uio_rw = UIO_WRITE; break; case BIO_FLUSH: { struct mount *mountpoint; SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush, device_get_unit(xbb->dev)); (void) vn_start_write(xbb->vn, &mountpoint, V_WAIT); vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(xbb->vn, MNT_WAIT, curthread); VOP_UNLOCK(xbb->vn, 0); vn_finished_write(mountpoint); goto bailout_send_response; /* NOTREACHED */ } default: panic("invalid operation %d", operation); /* NOTREACHED */ } xuio.uio_offset = (vm_offset_t)reqlist->starting_sector_number << xbb->sector_size_shift; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = file_data->xiovecs; xuio.uio_iovcnt = 0; xbb_sg = xbb->xbb_sgs; nseg = reqlist->nr_segments; for (xiovec = NULL, seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { /* * If the first sector is not 0, the KVA will * not be contiguous and we'll need to go on * to another segment. */ if (xbb_sg->first_sect != 0) xiovec = NULL; if (xiovec == NULL) { xiovec = &file_data->xiovecs[xuio.uio_iovcnt]; xiovec->iov_base = xbb_reqlist_ioaddr(reqlist, seg_idx, xbb_sg->first_sect); #ifdef XBB_USE_BOUNCE_BUFFERS /* * Store the address of the incoming * buffer at this particular offset * as well, so we can do the copy * later without having to do more * work to recalculate this address. */ p_vaddr = &file_data->xiovecs_vaddr[xuio.uio_iovcnt]; *p_vaddr = xbb_reqlist_vaddr(reqlist, seg_idx, xbb_sg->first_sect); #endif /* XBB_USE_BOUNCE_BUFFERS */ xiovec->iov_len = 0; xuio.uio_iovcnt++; } xiovec->iov_len += xbb_sg->nsect << 9; xuio.uio_resid += xbb_sg->nsect << 9; /* * If the last sector is not the full page * size count, the next segment will not be * contiguous in KVA and we need a new iovec. */ if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) xiovec = NULL; } xuio.uio_td = curthread; #ifdef XBB_USE_BOUNCE_BUFFERS saved_uio_iovcnt = xuio.uio_iovcnt; if (operation == BIO_WRITE) { /* Copy the write data to the local buffer. */ for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr, xiovec = xuio.uio_iov; seg_idx < xuio.uio_iovcnt; seg_idx++, xiovec++, p_vaddr++) { memcpy(xiovec->iov_base, *p_vaddr, xiovec->iov_len); } } else { /* * We only need to save off the iovecs in the case of a * read, because the copy for the read happens after the * VOP_READ(). (The uio will get modified in that call * sequence.) */ memcpy(file_data->saved_xiovecs, xuio.uio_iov, xuio.uio_iovcnt * sizeof(xuio.uio_iov[0])); } #endif /* XBB_USE_BOUNCE_BUFFERS */ switch (operation) { case BIO_READ: SDT_PROBE3(xbb, kernel, xbb_dispatch_file, read, device_get_unit(xbb->dev), xuio.uio_offset, xuio.uio_resid); vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); /* * UFS pays attention to IO_DIRECT for reads. If the * DIRECTIO option is configured into the kernel, it calls * ffs_rawread(). But that only works for single-segment * uios with user space addresses. In our case, with a * kernel uio, it still reads into the buffer cache, but it * will just try to release the buffer from the cache later * on in ffs_read(). * * ZFS does not pay attention to IO_DIRECT for reads. * * UFS does not pay attention to IO_SYNC for reads. * * ZFS pays attention to IO_SYNC (which translates into the * Solaris define FRSYNC for zfs_read()) for reads. It * attempts to sync the file before reading. * * So, to attempt to provide some barrier semantics in the * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. */ error = VOP_READ(xbb->vn, &xuio, (flags & BIO_ORDERED) ? (IO_DIRECT|IO_SYNC) : 0, file_data->cred); VOP_UNLOCK(xbb->vn, 0); break; case BIO_WRITE: { struct mount *mountpoint; SDT_PROBE3(xbb, kernel, xbb_dispatch_file, write, device_get_unit(xbb->dev), xuio.uio_offset, xuio.uio_resid); (void)vn_start_write(xbb->vn, &mountpoint, V_WAIT); vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY); /* * UFS pays attention to IO_DIRECT for writes. The write * is done asynchronously. (Normally the write would just * get put into cache. * * UFS pays attention to IO_SYNC for writes. It will * attempt to write the buffer out synchronously if that * flag is set. * * ZFS does not pay attention to IO_DIRECT for writes. * * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) * for writes. It will flush the transaction from the * cache before returning. * * So if we've got the BIO_ORDERED flag set, we want * IO_SYNC in either the UFS or ZFS case. */ error = VOP_WRITE(xbb->vn, &xuio, (flags & BIO_ORDERED) ? IO_SYNC : 0, file_data->cred); VOP_UNLOCK(xbb->vn, 0); vn_finished_write(mountpoint); break; } default: panic("invalid operation %d", operation); /* NOTREACHED */ } #ifdef XBB_USE_BOUNCE_BUFFERS /* We only need to copy here for read operations */ if (operation == BIO_READ) { for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr, xiovec = file_data->saved_xiovecs; seg_idx < saved_uio_iovcnt; seg_idx++, xiovec++, p_vaddr++) { /* * Note that we have to use the copy of the * io vector we made above. uiomove() modifies * the uio and its referenced vector as uiomove * performs the copy, so we can't rely on any * state from the original uio. */ memcpy(*p_vaddr, xiovec->iov_base, xiovec->iov_len); } } #endif /* XBB_USE_BOUNCE_BUFFERS */ bailout_send_response: if (error != 0) reqlist->status = BLKIF_RSP_ERROR; xbb_complete_reqlist(xbb, reqlist); return (0); } /*--------------------------- Backend Configuration --------------------------*/ /** * Close and cleanup any backend device/file specific state for this * block back instance. * * \param xbb Per-instance xbb configuration structure. */ static void xbb_close_backend(struct xbb_softc *xbb) { DROP_GIANT(); DPRINTF("closing dev=%s\n", xbb->dev_name); if (xbb->vn) { int flags = FREAD; if ((xbb->flags & XBBF_READ_ONLY) == 0) flags |= FWRITE; switch (xbb->device_type) { case XBB_TYPE_DISK: if (xbb->backend.dev.csw) { dev_relthread(xbb->backend.dev.cdev, xbb->backend.dev.dev_ref); xbb->backend.dev.csw = NULL; xbb->backend.dev.cdev = NULL; } break; case XBB_TYPE_FILE: break; case XBB_TYPE_NONE: default: panic("Unexpected backend type."); break; } (void)vn_close(xbb->vn, flags, NOCRED, curthread); xbb->vn = NULL; switch (xbb->device_type) { case XBB_TYPE_DISK: break; case XBB_TYPE_FILE: if (xbb->backend.file.cred != NULL) { crfree(xbb->backend.file.cred); xbb->backend.file.cred = NULL; } break; case XBB_TYPE_NONE: default: panic("Unexpected backend type."); break; } } PICKUP_GIANT(); } /** * Open a character device to be used for backend I/O. * * \param xbb Per-instance xbb configuration structure. * * \return 0 for success, errno codes for failure. */ static int xbb_open_dev(struct xbb_softc *xbb) { struct vattr vattr; struct cdev *dev; struct cdevsw *devsw; int error; xbb->device_type = XBB_TYPE_DISK; xbb->dispatch_io = xbb_dispatch_dev; xbb->backend.dev.cdev = xbb->vn->v_rdev; xbb->backend.dev.csw = dev_refthread(xbb->backend.dev.cdev, &xbb->backend.dev.dev_ref); if (xbb->backend.dev.csw == NULL) panic("Unable to retrieve device switch"); error = VOP_GETATTR(xbb->vn, &vattr, NOCRED); if (error) { xenbus_dev_fatal(xbb->dev, error, "error getting " "vnode attributes for device %s", xbb->dev_name); return (error); } dev = xbb->vn->v_rdev; devsw = dev->si_devsw; if (!devsw->d_ioctl) { xenbus_dev_fatal(xbb->dev, ENODEV, "no d_ioctl for " "device %s!", xbb->dev_name); return (ENODEV); } error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&xbb->sector_size, FREAD, curthread); if (error) { xenbus_dev_fatal(xbb->dev, error, "error calling ioctl DIOCGSECTORSIZE " "for device %s", xbb->dev_name); return (error); } error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&xbb->media_size, FREAD, curthread); if (error) { xenbus_dev_fatal(xbb->dev, error, "error calling ioctl DIOCGMEDIASIZE " "for device %s", xbb->dev_name); return (error); } return (0); } /** * Open a file to be used for backend I/O. * * \param xbb Per-instance xbb configuration structure. * * \return 0 for success, errno codes for failure. */ static int xbb_open_file(struct xbb_softc *xbb) { struct xbb_file_data *file_data; struct vattr vattr; int error; file_data = &xbb->backend.file; xbb->device_type = XBB_TYPE_FILE; xbb->dispatch_io = xbb_dispatch_file; error = VOP_GETATTR(xbb->vn, &vattr, curthread->td_ucred); if (error != 0) { xenbus_dev_fatal(xbb->dev, error, "error calling VOP_GETATTR()" "for file %s", xbb->dev_name); return (error); } /* * Verify that we have the ability to upgrade to exclusive * access on this file so we can trap errors at open instead * of reporting them during first access. */ if (VOP_ISLOCKED(xbb->vn) != LK_EXCLUSIVE) { vn_lock(xbb->vn, LK_UPGRADE | LK_RETRY); if (xbb->vn->v_iflag & VI_DOOMED) { error = EBADF; xenbus_dev_fatal(xbb->dev, error, "error locking file %s", xbb->dev_name); return (error); } } file_data->cred = crhold(curthread->td_ucred); xbb->media_size = vattr.va_size; /* * XXX KDM vattr.va_blocksize may be larger than 512 bytes here. * With ZFS, it is 131072 bytes. Block sizes that large don't work * with disklabel and UFS on FreeBSD at least. Large block sizes * may not work with other OSes as well. So just export a sector * size of 512 bytes, which should work with any OS or * application. Since our backing is a file, any block size will * work fine for the backing store. */ #if 0 xbb->sector_size = vattr.va_blocksize; #endif xbb->sector_size = 512; /* * Sanity check. The media size has to be at least one * sector long. */ if (xbb->media_size < xbb->sector_size) { error = EINVAL; xenbus_dev_fatal(xbb->dev, error, "file %s size %ju < block size %u", xbb->dev_name, (uintmax_t)xbb->media_size, xbb->sector_size); } return (error); } /** * Open the backend provider for this connection. * * \param xbb Per-instance xbb configuration structure. * * \return 0 for success, errno codes for failure. */ static int xbb_open_backend(struct xbb_softc *xbb) { struct nameidata nd; int flags; int error; flags = FREAD; error = 0; DPRINTF("opening dev=%s\n", xbb->dev_name); if (rootvnode == NULL) { xenbus_dev_fatal(xbb->dev, ENOENT, "Root file system not mounted"); return (ENOENT); } if ((xbb->flags & XBBF_READ_ONLY) == 0) flags |= FWRITE; if (!curthread->td_proc->p_fd->fd_cdir) { curthread->td_proc->p_fd->fd_cdir = rootvnode; VREF(rootvnode); } if (!curthread->td_proc->p_fd->fd_rdir) { curthread->td_proc->p_fd->fd_rdir = rootvnode; VREF(rootvnode); } if (!curthread->td_proc->p_fd->fd_jdir) { curthread->td_proc->p_fd->fd_jdir = rootvnode; VREF(rootvnode); } again: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, xbb->dev_name, curthread); error = vn_open(&nd, &flags, 0, NULL); if (error) { /* * This is the only reasonable guess we can make as far as * path if the user doesn't give us a fully qualified path. * If they want to specify a file, they need to specify the * full path. */ if (xbb->dev_name[0] != '/') { char *dev_path = "/dev/"; char *dev_name; /* Try adding device path at beginning of name */ dev_name = malloc(strlen(xbb->dev_name) + strlen(dev_path) + 1, M_XENBLOCKBACK, M_NOWAIT); if (dev_name) { sprintf(dev_name, "%s%s", dev_path, xbb->dev_name); free(xbb->dev_name, M_XENBLOCKBACK); xbb->dev_name = dev_name; goto again; } } xenbus_dev_fatal(xbb->dev, error, "error opening device %s", xbb->dev_name); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); xbb->vn = nd.ni_vp; /* We only support disks and files. */ if (vn_isdisk(xbb->vn, &error)) { error = xbb_open_dev(xbb); } else if (xbb->vn->v_type == VREG) { error = xbb_open_file(xbb); } else { error = EINVAL; xenbus_dev_fatal(xbb->dev, error, "%s is not a disk " "or file", xbb->dev_name); } VOP_UNLOCK(xbb->vn, 0); if (error != 0) { xbb_close_backend(xbb); return (error); } xbb->sector_size_shift = fls(xbb->sector_size) - 1; xbb->media_num_sectors = xbb->media_size >> xbb->sector_size_shift; DPRINTF("opened %s=%s sector_size=%u media_size=%" PRId64 "\n", (xbb->device_type == XBB_TYPE_DISK) ? "dev" : "file", xbb->dev_name, xbb->sector_size, xbb->media_size); return (0); } /*------------------------ Inter-Domain Communication ------------------------*/ /** * Free dynamically allocated KVA or pseudo-physical address allocations. * * \param xbb Per-instance xbb configuration structure. */ static void xbb_free_communication_mem(struct xbb_softc *xbb) { if (xbb->kva != 0) { #ifndef XENHVM kva_free(xbb->kva, xbb->kva_size); #else if (xbb->pseudo_phys_res != NULL) { bus_release_resource(xbb->dev, SYS_RES_MEMORY, xbb->pseudo_phys_res_id, xbb->pseudo_phys_res); xbb->pseudo_phys_res = NULL; } #endif } xbb->kva = 0; xbb->gnt_base_addr = 0; if (xbb->kva_free != NULL) { free(xbb->kva_free, M_XENBLOCKBACK); xbb->kva_free = NULL; } } /** * Cleanup all inter-domain communication mechanisms. * * \param xbb Per-instance xbb configuration structure. */ static int xbb_disconnect(struct xbb_softc *xbb) { struct gnttab_unmap_grant_ref ops[XBB_MAX_RING_PAGES]; struct gnttab_unmap_grant_ref *op; u_int ring_idx; int error; DPRINTF("\n"); if ((xbb->flags & XBBF_RING_CONNECTED) == 0) return (0); - if (xbb->irq != 0) { - unbind_from_irqhandler(xbb->irq); - xbb->irq = 0; - } + xen_intr_unbind(&xbb->xen_intr_handle); mtx_unlock(&xbb->lock); taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); mtx_lock(&xbb->lock); /* * No new interrupts can generate work, but we must wait * for all currently active requests to drain. */ if (xbb->active_request_count != 0) return (EAGAIN); for (ring_idx = 0, op = ops; ring_idx < xbb->ring_config.ring_pages; ring_idx++, op++) { op->host_addr = xbb->ring_config.gnt_addr + (ring_idx * PAGE_SIZE); op->dev_bus_addr = xbb->ring_config.bus_addr[ring_idx]; op->handle = xbb->ring_config.handle[ring_idx]; } error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, ops, xbb->ring_config.ring_pages); if (error != 0) panic("Grant table op failed (%d)", error); xbb_free_communication_mem(xbb); if (xbb->requests != NULL) { free(xbb->requests, M_XENBLOCKBACK); xbb->requests = NULL; } if (xbb->request_lists != NULL) { struct xbb_xen_reqlist *reqlist; int i; /* There is one request list for ever allocated request. */ for (i = 0, reqlist = xbb->request_lists; i < xbb->max_requests; i++, reqlist++){ #ifdef XBB_USE_BOUNCE_BUFFERS if (reqlist->bounce != NULL) { free(reqlist->bounce, M_XENBLOCKBACK); reqlist->bounce = NULL; } #endif if (reqlist->gnt_handles != NULL) { free(reqlist->gnt_handles, M_XENBLOCKBACK); reqlist->gnt_handles = NULL; } } free(xbb->request_lists, M_XENBLOCKBACK); xbb->request_lists = NULL; } xbb->flags &= ~XBBF_RING_CONNECTED; return (0); } /** * Map shared memory ring into domain local address space, initialize * ring control structures, and bind an interrupt to the event channel * used to notify us of ring changes. * * \param xbb Per-instance xbb configuration structure. */ static int xbb_connect_ring(struct xbb_softc *xbb) { struct gnttab_map_grant_ref gnts[XBB_MAX_RING_PAGES]; struct gnttab_map_grant_ref *gnt; u_int ring_idx; int error; if ((xbb->flags & XBBF_RING_CONNECTED) != 0) return (0); /* * Kva for our ring is at the tail of the region of kva allocated * by xbb_alloc_communication_mem(). */ xbb->ring_config.va = xbb->kva + (xbb->kva_size - (xbb->ring_config.ring_pages * PAGE_SIZE)); xbb->ring_config.gnt_addr = xbb->gnt_base_addr + (xbb->kva_size - (xbb->ring_config.ring_pages * PAGE_SIZE)); for (ring_idx = 0, gnt = gnts; ring_idx < xbb->ring_config.ring_pages; ring_idx++, gnt++) { gnt->host_addr = xbb->ring_config.gnt_addr + (ring_idx * PAGE_SIZE); gnt->flags = GNTMAP_host_map; gnt->ref = xbb->ring_config.ring_ref[ring_idx]; gnt->dom = xbb->otherend_id; } error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, gnts, xbb->ring_config.ring_pages); if (error) panic("blkback: Ring page grant table op failed (%d)", error); for (ring_idx = 0, gnt = gnts; ring_idx < xbb->ring_config.ring_pages; ring_idx++, gnt++) { if (gnt->status != 0) { xbb->ring_config.va = 0; xenbus_dev_fatal(xbb->dev, EACCES, "Ring shared page mapping failed. " "Status %d.", gnt->status); return (EACCES); } xbb->ring_config.handle[ring_idx] = gnt->handle; xbb->ring_config.bus_addr[ring_idx] = gnt->dev_bus_addr; } /* Initialize the ring based on ABI. */ switch (xbb->abi) { case BLKIF_PROTOCOL_NATIVE: { blkif_sring_t *sring; sring = (blkif_sring_t *)xbb->ring_config.va; BACK_RING_INIT(&xbb->rings.native, sring, xbb->ring_config.ring_pages * PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_32: { blkif_x86_32_sring_t *sring_x86_32; sring_x86_32 = (blkif_x86_32_sring_t *)xbb->ring_config.va; BACK_RING_INIT(&xbb->rings.x86_32, sring_x86_32, xbb->ring_config.ring_pages * PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_64: { blkif_x86_64_sring_t *sring_x86_64; sring_x86_64 = (blkif_x86_64_sring_t *)xbb->ring_config.va; BACK_RING_INIT(&xbb->rings.x86_64, sring_x86_64, xbb->ring_config.ring_pages * PAGE_SIZE); break; } default: panic("Unexpected blkif protocol ABI."); } xbb->flags |= XBBF_RING_CONNECTED; - error = - bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id, - xbb->ring_config.evtchn, - device_get_nameunit(xbb->dev), - xbb_intr, /*arg*/xbb, - INTR_TYPE_BIO | INTR_MPSAFE, - &xbb->irq); + error = xen_intr_bind_remote_port(xbb->dev, + xbb->otherend_id, + xbb->ring_config.evtchn, + xbb_filter, + /*ithread_handler*/NULL, + /*arg*/xbb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xbb->xen_intr_handle); if (error) { (void)xbb_disconnect(xbb); xenbus_dev_fatal(xbb->dev, error, "binding event channel"); return (error); } DPRINTF("rings connected!\n"); return 0; } /* Needed to make bit_alloc() macro work */ #define calloc(count, size) malloc((count)*(size), M_XENBLOCKBACK, \ M_NOWAIT|M_ZERO); /** * Size KVA and pseudo-physical address allocations based on negotiated * values for the size and number of I/O requests, and the size of our * communication ring. * * \param xbb Per-instance xbb configuration structure. * * These address spaces are used to dynamically map pages in the * front-end's domain into our own. */ static int xbb_alloc_communication_mem(struct xbb_softc *xbb) { xbb->reqlist_kva_pages = xbb->max_requests * xbb->max_request_segments; xbb->reqlist_kva_size = xbb->reqlist_kva_pages * PAGE_SIZE; xbb->kva_size = xbb->reqlist_kva_size + (xbb->ring_config.ring_pages * PAGE_SIZE); xbb->kva_free = bit_alloc(xbb->reqlist_kva_pages); if (xbb->kva_free == NULL) return (ENOMEM); DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n", device_get_nameunit(xbb->dev), xbb->kva_size, xbb->reqlist_kva_size); #ifndef XENHVM xbb->kva = kva_alloc(xbb->kva_size); if (xbb->kva == 0) return (ENOMEM); xbb->gnt_base_addr = xbb->kva; #else /* XENHVM */ /* * Reserve a range of pseudo physical memory that we can map * into kva. These pages will only be backed by machine * pages ("real memory") during the lifetime of front-end requests * via grant table operations. */ xbb->pseudo_phys_res_id = 0; xbb->pseudo_phys_res = bus_alloc_resource(xbb->dev, SYS_RES_MEMORY, &xbb->pseudo_phys_res_id, 0, ~0, xbb->kva_size, RF_ACTIVE); if (xbb->pseudo_phys_res == NULL) { xbb->kva = 0; return (ENOMEM); } xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res); xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res); #endif /* XENHVM */ DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n", device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva, (uintmax_t)xbb->gnt_base_addr); return (0); } /** * Collect front-end information from the XenStore. * * \param xbb Per-instance xbb configuration structure. */ static int xbb_collect_frontend_info(struct xbb_softc *xbb) { char protocol_abi[64]; const char *otherend_path; int error; u_int ring_idx; u_int ring_page_order; size_t ring_size; otherend_path = xenbus_get_otherend_path(xbb->dev); /* * Protocol defaults valid even if all negotiation fails. */ xbb->ring_config.ring_pages = 1; xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE; /* * Mandatory data (used in all versions of the protocol) first. */ error = xs_scanf(XST_NIL, otherend_path, "event-channel", NULL, "%" PRIu32, &xbb->ring_config.evtchn); if (error != 0) { xenbus_dev_fatal(xbb->dev, error, "Unable to retrieve event-channel information " "from frontend %s. Unable to connect.", xenbus_get_otherend_path(xbb->dev)); return (error); } /* * These fields are initialized to legacy protocol defaults * so we only need to fail if reading the updated value succeeds * and the new value is outside of its allowed range. * * \note xs_gather() returns on the first encountered error, so * we must use independant calls in order to guarantee * we don't miss information in a sparsly populated front-end * tree. * * \note xs_scanf() does not update variables for unmatched * fields. */ ring_page_order = 0; (void)xs_scanf(XST_NIL, otherend_path, "ring-page-order", NULL, "%u", &ring_page_order); xbb->ring_config.ring_pages = 1 << ring_page_order; (void)xs_scanf(XST_NIL, otherend_path, "num-ring-pages", NULL, "%u", &xbb->ring_config.ring_pages); ring_size = PAGE_SIZE * xbb->ring_config.ring_pages; xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size); (void)xs_scanf(XST_NIL, otherend_path, "max-requests", NULL, "%u", &xbb->max_requests); (void)xs_scanf(XST_NIL, otherend_path, "max-request-segments", NULL, "%u", &xbb->max_request_segments); (void)xs_scanf(XST_NIL, otherend_path, "max-request-size", NULL, "%u", &xbb->max_request_size); if (xbb->ring_config.ring_pages > XBB_MAX_RING_PAGES) { xenbus_dev_fatal(xbb->dev, EINVAL, "Front-end specified ring-pages of %u " "exceeds backend limit of %zu. " "Unable to connect.", xbb->ring_config.ring_pages, XBB_MAX_RING_PAGES); return (EINVAL); } else if (xbb->max_requests > XBB_MAX_REQUESTS) { xenbus_dev_fatal(xbb->dev, EINVAL, "Front-end specified max_requests of %u " "exceeds backend limit of %u. " "Unable to connect.", xbb->max_requests, XBB_MAX_REQUESTS); return (EINVAL); } else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) { xenbus_dev_fatal(xbb->dev, EINVAL, "Front-end specified max_requests_segments " "of %u exceeds backend limit of %u. " "Unable to connect.", xbb->max_request_segments, XBB_MAX_SEGMENTS_PER_REQUEST); return (EINVAL); } else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) { xenbus_dev_fatal(xbb->dev, EINVAL, "Front-end specified max_request_size " "of %u exceeds backend limit of %u. " "Unable to connect.", xbb->max_request_size, XBB_MAX_REQUEST_SIZE); return (EINVAL); } if (xbb->ring_config.ring_pages == 1) { error = xs_gather(XST_NIL, otherend_path, "ring-ref", "%" PRIu32, &xbb->ring_config.ring_ref[0], NULL); if (error != 0) { xenbus_dev_fatal(xbb->dev, error, "Unable to retrieve ring information " "from frontend %s. Unable to " "connect.", xenbus_get_otherend_path(xbb->dev)); return (error); } } else { /* Multi-page ring format. */ for (ring_idx = 0; ring_idx < xbb->ring_config.ring_pages; ring_idx++) { char ring_ref_name[]= "ring_refXX"; snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", ring_idx); error = xs_scanf(XST_NIL, otherend_path, ring_ref_name, NULL, "%" PRIu32, &xbb->ring_config.ring_ref[ring_idx]); if (error != 0) { xenbus_dev_fatal(xbb->dev, error, "Failed to retriev grant " "reference for page %u of " "shared ring. Unable " "to connect.", ring_idx); return (error); } } } error = xs_gather(XST_NIL, otherend_path, "protocol", "%63s", protocol_abi, NULL); if (error != 0 || !strcmp(protocol_abi, XEN_IO_PROTO_ABI_NATIVE)) { /* * Assume native if the frontend has not * published ABI data or it has published and * matches our own ABI. */ xbb->abi = BLKIF_PROTOCOL_NATIVE; } else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_32)) { xbb->abi = BLKIF_PROTOCOL_X86_32; } else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_64)) { xbb->abi = BLKIF_PROTOCOL_X86_64; } else { xenbus_dev_fatal(xbb->dev, EINVAL, "Unknown protocol ABI (%s) published by " "frontend. Unable to connect.", protocol_abi); return (EINVAL); } return (0); } /** * Allocate per-request data structures given request size and number * information negotiated with the front-end. * * \param xbb Per-instance xbb configuration structure. */ static int xbb_alloc_requests(struct xbb_softc *xbb) { struct xbb_xen_req *req; struct xbb_xen_req *last_req; /* * Allocate request book keeping datastructures. */ xbb->requests = malloc(xbb->max_requests * sizeof(*xbb->requests), M_XENBLOCKBACK, M_NOWAIT|M_ZERO); if (xbb->requests == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request structures"); return (ENOMEM); } req = xbb->requests; last_req = &xbb->requests[xbb->max_requests - 1]; STAILQ_INIT(&xbb->request_free_stailq); while (req <= last_req) { STAILQ_INSERT_TAIL(&xbb->request_free_stailq, req, links); req++; } return (0); } static int xbb_alloc_request_lists(struct xbb_softc *xbb) { struct xbb_xen_reqlist *reqlist; int i; /* * If no requests can be merged, we need 1 request list per * in flight request. */ xbb->request_lists = malloc(xbb->max_requests * sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_NOWAIT|M_ZERO); if (xbb->request_lists == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request list structures"); return (ENOMEM); } STAILQ_INIT(&xbb->reqlist_free_stailq); STAILQ_INIT(&xbb->reqlist_pending_stailq); for (i = 0; i < xbb->max_requests; i++) { int seg; reqlist = &xbb->request_lists[i]; reqlist->xbb = xbb; #ifdef XBB_USE_BOUNCE_BUFFERS reqlist->bounce = malloc(xbb->max_reqlist_size, M_XENBLOCKBACK, M_NOWAIT); if (reqlist->bounce == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request " "bounce buffers"); return (ENOMEM); } #endif /* XBB_USE_BOUNCE_BUFFERS */ reqlist->gnt_handles = malloc(xbb->max_reqlist_segments * sizeof(*reqlist->gnt_handles), M_XENBLOCKBACK, M_NOWAIT|M_ZERO); if (reqlist->gnt_handles == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request " "grant references"); return (ENOMEM); } for (seg = 0; seg < xbb->max_reqlist_segments; seg++) reqlist->gnt_handles[seg] = GRANT_REF_INVALID; STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); } return (0); } /** * Supply information about the physical device to the frontend * via XenBus. * * \param xbb Per-instance xbb configuration structure. */ static int xbb_publish_backend_info(struct xbb_softc *xbb) { struct xs_transaction xst; const char *our_path; const char *leaf; int error; our_path = xenbus_get_node(xbb->dev); while (1) { error = xs_transaction_start(&xst); if (error != 0) { xenbus_dev_fatal(xbb->dev, error, "Error publishing backend info " "(start transaction)"); return (error); } leaf = "sectors"; error = xs_printf(xst, our_path, leaf, "%"PRIu64, xbb->media_num_sectors); if (error != 0) break; /* XXX Support all VBD attributes here. */ leaf = "info"; error = xs_printf(xst, our_path, leaf, "%u", xbb->flags & XBBF_READ_ONLY ? VDISK_READONLY : 0); if (error != 0) break; leaf = "sector-size"; error = xs_printf(xst, our_path, leaf, "%u", xbb->sector_size); if (error != 0) break; error = xs_transaction_end(xst, 0); if (error == 0) { return (0); } else if (error != EAGAIN) { xenbus_dev_fatal(xbb->dev, error, "ending transaction"); return (error); } } xenbus_dev_fatal(xbb->dev, error, "writing %s/%s", our_path, leaf); xs_transaction_end(xst, 1); return (error); } /** * Connect to our blkfront peer now that it has completed publishing * its configuration into the XenStore. * * \param xbb Per-instance xbb configuration structure. */ static void xbb_connect(struct xbb_softc *xbb) { int error; if (xenbus_get_state(xbb->dev) == XenbusStateConnected) return; if (xbb_collect_frontend_info(xbb) != 0) return; xbb->flags &= ~XBBF_SHUTDOWN; /* * We limit the maximum number of reqlist segments to the maximum * number of segments in the ring, or our absolute maximum, * whichever is smaller. */ xbb->max_reqlist_segments = MIN(xbb->max_request_segments * xbb->max_requests, XBB_MAX_SEGMENTS_PER_REQLIST); /* * The maximum size is simply a function of the number of segments * we can handle. */ xbb->max_reqlist_size = xbb->max_reqlist_segments * PAGE_SIZE; /* Allocate resources whose size depends on front-end configuration. */ error = xbb_alloc_communication_mem(xbb); if (error != 0) { xenbus_dev_fatal(xbb->dev, error, "Unable to allocate communication memory"); return; } error = xbb_alloc_requests(xbb); if (error != 0) { /* Specific errors are reported by xbb_alloc_requests(). */ return; } error = xbb_alloc_request_lists(xbb); if (error != 0) { /* Specific errors are reported by xbb_alloc_request_lists(). */ return; } /* * Connect communication channel. */ error = xbb_connect_ring(xbb); if (error != 0) { /* Specific errors are reported by xbb_connect_ring(). */ return; } if (xbb_publish_backend_info(xbb) != 0) { /* * If we can't publish our data, we cannot participate * in this connection, and waiting for a front-end state * change will not help the situation. */ (void)xbb_disconnect(xbb); return; } /* Ready for I/O. */ xenbus_set_state(xbb->dev, XenbusStateConnected); } /*-------------------------- Device Teardown Support -------------------------*/ /** * Perform device shutdown functions. * * \param xbb Per-instance xbb configuration structure. * * Mark this instance as shutting down, wait for any active I/O on the * backend device/file to drain, disconnect from the front-end, and notify * any waiters (e.g. a thread invoking our detach method) that detach can * now proceed. */ static int xbb_shutdown(struct xbb_softc *xbb) { XenbusState frontState; int error; DPRINTF("\n"); /* * Due to the need to drop our mutex during some * xenbus operations, it is possible for two threads * to attempt to close out shutdown processing at * the same time. Tell the caller that hits this * race to try back later. */ if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0) return (EAGAIN); xbb->flags |= XBBF_IN_SHUTDOWN; mtx_unlock(&xbb->lock); if (xenbus_get_state(xbb->dev) < XenbusStateClosing) xenbus_set_state(xbb->dev, XenbusStateClosing); frontState = xenbus_get_otherend_state(xbb->dev); mtx_lock(&xbb->lock); xbb->flags &= ~XBBF_IN_SHUTDOWN; /* The front can submit I/O until entering the closed state. */ if (frontState < XenbusStateClosed) return (EAGAIN); DPRINTF("\n"); /* Indicate shutdown is in progress. */ xbb->flags |= XBBF_SHUTDOWN; /* Disconnect from the front-end. */ error = xbb_disconnect(xbb); if (error != 0) { /* * Requests still outstanding. We'll be called again * once they complete. */ KASSERT(error == EAGAIN, ("%s: Unexpected xbb_disconnect() failure %d", __func__, error)); return (error); } DPRINTF("\n"); /* Indicate to xbb_detach() that is it safe to proceed. */ wakeup(xbb); return (0); } /** * Report an attach time error to the console and Xen, and cleanup * this instance by forcing immediate detach processing. * * \param xbb Per-instance xbb configuration structure. * \param err Errno describing the error. * \param fmt Printf style format and arguments */ static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...) { va_list ap; va_list ap_hotplug; va_start(ap, fmt); va_copy(ap_hotplug, ap); xs_vprintf(XST_NIL, xenbus_get_node(xbb->dev), "hotplug-error", fmt, ap_hotplug); va_end(ap_hotplug); xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "hotplug-status", "error"); xenbus_dev_vfatal(xbb->dev, err, fmt, ap); va_end(ap); xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "online", "0"); xbb_detach(xbb->dev); } /*---------------------------- NewBus Entrypoints ----------------------------*/ /** * Inspect a XenBus device and claim it if is of the appropriate type. * * \param dev NewBus device object representing a candidate XenBus device. * * \return 0 for success, errno codes for failure. */ static int xbb_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vbd")) { device_set_desc(dev, "Backend Virtual Block Device"); device_quiet(dev); return (0); } return (ENXIO); } /** * Setup sysctl variables to control various Block Back parameters. * * \param xbb Xen Block Back softc. * */ static void xbb_setup_sysctl(struct xbb_softc *xbb) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; sysctl_ctx = device_get_sysctl_ctx(xbb->dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xbb->dev); if (sysctl_tree == NULL) return; SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_flush", CTLFLAG_RW, &xbb->disable_flush, 0, "fake the flush command"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "flush_interval", CTLFLAG_RW, &xbb->flush_interval, 0, "send a real flush for N flush requests"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "no_coalesce_reqs", CTLFLAG_RW, &xbb->no_coalesce_reqs,0, "Don't coalesce contiguous requests"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "reqs_received", CTLFLAG_RW, &xbb->reqs_received, "how many I/O requests we have received"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "reqs_completed", CTLFLAG_RW, &xbb->reqs_completed, "how many I/O requests have been completed"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "forced_dispatch", CTLFLAG_RW, &xbb->forced_dispatch, "how many I/O dispatches were forced"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "normal_dispatch", CTLFLAG_RW, &xbb->normal_dispatch, "how many I/O dispatches were normal"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "total_dispatch", CTLFLAG_RW, &xbb->total_dispatch, "total number of I/O dispatches"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "kva_shortages", CTLFLAG_RW, &xbb->kva_shortages, "how many times we have run out of KVA"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "request_shortages", CTLFLAG_RW, &xbb->request_shortages, "how many times we have run out of requests"); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_requests", CTLFLAG_RD, &xbb->max_requests, 0, "maximum outstanding requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_request_segments", CTLFLAG_RD, &xbb->max_request_segments, 0, "maximum number of pages per requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_request_size", CTLFLAG_RD, &xbb->max_request_size, 0, "maximum size in bytes of a request (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "ring_pages", CTLFLAG_RD, &xbb->ring_config.ring_pages, 0, "communication channel pages (negotiated)"); } /** * Attach to a XenBus device that has been claimed by our probe routine. * * \param dev NewBus device object representing this Xen Block Back instance. * * \return 0 for success, errno codes for failure. */ static int xbb_attach(device_t dev) { struct xbb_softc *xbb; int error; u_int max_ring_page_order; DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); /* * Basic initialization. * After this block it is safe to call xbb_detach() * to clean up any allocated data for this instance. */ xbb = device_get_softc(dev); xbb->dev = dev; xbb->otherend_id = xenbus_get_otherend_id(dev); TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb); mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF); /* * Publish protocol capabilities for consumption by the * front-end. */ error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "feature-barrier", "1"); if (error) { xbb_attach_failed(xbb, error, "writing %s/feature-barrier", xenbus_get_node(xbb->dev)); return (error); } error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "feature-flush-cache", "1"); if (error) { xbb_attach_failed(xbb, error, "writing %s/feature-flush-cache", xenbus_get_node(xbb->dev)); return (error); } /* * Amazon EC2 client compatility. They refer to max-ring-pages * instead of to max-ring-page-order. */ error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "max-ring-pages", "%zu", XBB_MAX_RING_PAGES); if (error) { xbb_attach_failed(xbb, error, "writing %s/max-ring-pages", xenbus_get_node(xbb->dev)); return (error); } max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1; error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "max-ring-page-order", "%u", max_ring_page_order); if (error) { xbb_attach_failed(xbb, error, "writing %s/max-ring-page-order", xenbus_get_node(xbb->dev)); return (error); } error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "max-requests", "%u", XBB_MAX_REQUESTS); if (error) { xbb_attach_failed(xbb, error, "writing %s/max-requests", xenbus_get_node(xbb->dev)); return (error); } error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "max-request-segments", "%u", XBB_MAX_SEGMENTS_PER_REQUEST); if (error) { xbb_attach_failed(xbb, error, "writing %s/max-request-segments", xenbus_get_node(xbb->dev)); return (error); } error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "max-request-size", "%u", XBB_MAX_REQUEST_SIZE); if (error) { xbb_attach_failed(xbb, error, "writing %s/max-request-size", xenbus_get_node(xbb->dev)); return (error); } /* Collect physical device information. */ error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev), "device-type", NULL, &xbb->dev_type, NULL); if (error != 0) xbb->dev_type = NULL; error = xs_gather(XST_NIL, xenbus_get_node(dev), "mode", NULL, &xbb->dev_mode, "params", NULL, &xbb->dev_name, NULL); if (error != 0) { xbb_attach_failed(xbb, error, "reading backend fields at %s", xenbus_get_node(dev)); return (ENXIO); } /* Parse fopen style mode flags. */ if (strchr(xbb->dev_mode, 'w') == NULL) xbb->flags |= XBBF_READ_ONLY; /* * Verify the physical device is present and can support * the desired I/O mode. */ DROP_GIANT(); error = xbb_open_backend(xbb); PICKUP_GIANT(); if (error != 0) { xbb_attach_failed(xbb, error, "Unable to open %s", xbb->dev_name); return (ENXIO); } /* Use devstat(9) for recording statistics. */ xbb->xbb_stats = devstat_new_entry("xbb", device_get_unit(xbb->dev), xbb->sector_size, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_OTHER); xbb->xbb_stats_in = devstat_new_entry("xbbi", device_get_unit(xbb->dev), xbb->sector_size, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_OTHER); /* * Setup sysctl variables. */ xbb_setup_sysctl(xbb); /* * Create a taskqueue for doing work that must occur from a * thread context. */ - xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT, - taskqueue_thread_enqueue, - /*context*/&xbb->io_taskqueue); + xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev), + M_NOWAIT, + taskqueue_thread_enqueue, + /*contxt*/&xbb->io_taskqueue); if (xbb->io_taskqueue == NULL) { xbb_attach_failed(xbb, error, "Unable to create taskqueue"); return (ENOMEM); } taskqueue_start_threads(&xbb->io_taskqueue, /*num threads*/1, /*priority*/PWAIT, /*thread name*/ "%s taskq", device_get_nameunit(dev)); /* Update hot-plug status to satisfy xend. */ error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev), "hotplug-status", "connected"); if (error) { xbb_attach_failed(xbb, error, "writing %s/hotplug-status", xenbus_get_node(xbb->dev)); return (error); } /* Tell the front end that we are ready to connect. */ xenbus_set_state(dev, XenbusStateInitWait); return (0); } /** * Detach from a block back device instance. * * \param dev NewBus device object representing this Xen Block Back instance. * * \return 0 for success, errno codes for failure. * * \note A block back device may be detached at any time in its life-cycle, * including part way through the attach process. For this reason, * initialization order and the intialization state checks in this * routine must be carefully coupled so that attach time failures * are gracefully handled. */ static int xbb_detach(device_t dev) { struct xbb_softc *xbb; DPRINTF("\n"); xbb = device_get_softc(dev); mtx_lock(&xbb->lock); while (xbb_shutdown(xbb) == EAGAIN) { msleep(xbb, &xbb->lock, /*wakeup prio unchanged*/0, "xbb_shutdown", 0); } mtx_unlock(&xbb->lock); DPRINTF("\n"); if (xbb->io_taskqueue != NULL) taskqueue_free(xbb->io_taskqueue); if (xbb->xbb_stats != NULL) devstat_remove_entry(xbb->xbb_stats); if (xbb->xbb_stats_in != NULL) devstat_remove_entry(xbb->xbb_stats_in); xbb_close_backend(xbb); if (xbb->dev_mode != NULL) { free(xbb->dev_mode, M_XENBUS); xbb->dev_mode = NULL; } if (xbb->dev_type != NULL) { free(xbb->dev_type, M_XENBUS); xbb->dev_type = NULL; } if (xbb->dev_name != NULL) { free(xbb->dev_name, M_XENBUS); xbb->dev_name = NULL; } mtx_destroy(&xbb->lock); return (0); } /** * Prepare this block back device for suspension of this VM. * * \param dev NewBus device object representing this Xen Block Back instance. * * \return 0 for success, errno codes for failure. */ static int xbb_suspend(device_t dev) { #ifdef NOT_YET struct xbb_softc *sc = device_get_softc(dev); /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xb_io_lock); sc->connected = BLKIF_STATE_SUSPENDED; mtx_unlock(&sc->xb_io_lock); #endif return (0); } /** * Perform any processing required to recover from a suspended state. * * \param dev NewBus device object representing this Xen Block Back instance. * * \return 0 for success, errno codes for failure. */ static int xbb_resume(device_t dev) { return (0); } /** * Handle state changes expressed via the XenStore by our front-end peer. * * \param dev NewBus device object representing this Xen * Block Back instance. * \param frontend_state The new state of the front-end. * * \return 0 for success, errno codes for failure. */ static void xbb_frontend_changed(device_t dev, XenbusState frontend_state) { struct xbb_softc *xbb = device_get_softc(dev); DPRINTF("frontend_state=%s, xbb_state=%s\n", xenbus_strstate(frontend_state), xenbus_strstate(xenbus_get_state(xbb->dev))); switch (frontend_state) { case XenbusStateInitialising: break; case XenbusStateInitialised: case XenbusStateConnected: xbb_connect(xbb); break; case XenbusStateClosing: case XenbusStateClosed: mtx_lock(&xbb->lock); xbb_shutdown(xbb); mtx_unlock(&xbb->lock); if (frontend_state == XenbusStateClosed) xenbus_set_state(xbb->dev, XenbusStateClosed); break; default: xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend", frontend_state); break; } } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xbb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xbb_probe), DEVMETHOD(device_attach, xbb_attach), DEVMETHOD(device_detach, xbb_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xbb_suspend), DEVMETHOD(device_resume, xbb_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xbb_frontend_changed), { 0, 0 } }; static driver_t xbb_driver = { "xbbd", xbb_methods, sizeof(struct xbb_softc), }; devclass_t xbb_devclass; DRIVER_MODULE(xbbd, xenbusb_back, xbb_driver, xbb_devclass, 0, 0); diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c index 969c3d8caf1a..7d70f42ce1cb 100644 --- a/sys/dev/xen/blkfront/blkfront.c +++ b/sys/dev/xen/blkfront/blkfront.c @@ -1,1560 +1,1555 @@ /* * XenBSD block device driver * * Copyright (c) 2010-2013 Spectra Logic Corporation * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include -#include -#include - +#include #include #include -#include #include #include #include #include +#include +#include + #include #include #include "xenbus_if.h" /*--------------------------- Forward Declarations ---------------------------*/ static void xbd_closing(device_t); static void xbd_startio(struct xbd_softc *sc); /*---------------------------------- Macros ----------------------------------*/ #if 0 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) #else #define DPRINTK(fmt, args...) #endif #define XBD_SECTOR_SHFT 9 /*---------------------------- Global Static Data ----------------------------*/ static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); /*---------------------------- Command Processing ----------------------------*/ static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0) return; sc->xbd_flags |= xbd_flag; sc->xbd_qfrozen_cnt++; } static void xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0) return; if (sc->xbd_qfrozen_cnt == 0) panic("%s: Thaw with flag 0x%x while not frozen.", __func__, xbd_flag); sc->xbd_flags &= ~xbd_flag; sc->xbd_qfrozen_cnt--; } static void xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) { if ((cm->cm_flags & XBDCF_FROZEN) != 0) return; cm->cm_flags |= XBDCF_FROZEN|cm_flag; xbd_freeze(sc, XBDF_NONE); } static void xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) { if ((cm->cm_flags & XBDCF_FROZEN) == 0) return; cm->cm_flags &= ~XBDCF_FROZEN; xbd_thaw(sc, XBDF_NONE); } static inline void xbd_flush_requests(struct xbd_softc *sc) { int notify; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify); if (notify) - notify_remote_via_irq(sc->xbd_irq); + xen_intr_signal(sc->xen_intr_handle); } static void xbd_free_command(struct xbd_command *cm) { KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE, ("Freeing command that is still on queue %d.", cm->cm_flags & XBDCF_Q_MASK)); cm->cm_flags = XBDCF_INITIALIZER; cm->cm_bp = NULL; cm->cm_complete = NULL; xbd_enqueue_cm(cm, XBD_Q_FREE); xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE); } static void xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct xbd_softc *sc; struct xbd_command *cm; blkif_request_t *ring_req; struct blkif_request_segment *sg; struct blkif_request_segment *last_block_sg; grant_ref_t *sg_ref; vm_paddr_t buffer_ma; uint64_t fsect, lsect; int ref; int op; int block_segs; cm = arg; sc = cm->cm_sc; if (error) { printf("error %d in xbd_queue_cb\n", error); cm->cm_bp->bio_error = EIO; biodone(cm->cm_bp); xbd_free_command(cm); return; } /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); sc->xbd_ring.req_prod_pvt++; ring_req->id = cm->cm_id; ring_req->operation = cm->cm_operation; ring_req->sector_number = cm->cm_sector_number; ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; ring_req->nr_segments = nsegs; cm->cm_nseg = nsegs; block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); sg = ring_req->seg; last_block_sg = sg + block_segs; sg_ref = cm->cm_sg_refs; while (1) { while (sg < last_block_sg) { buffer_ma = segs->ds_addr; fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; KASSERT(lsect <= 7, ("XEN disk driver data cannot " "cross a page boundary")); /* install a grant reference. */ ref = gnttab_claim_grant_reference(&cm->cm_gref_head); /* * GNTTAB_LIST_END == 0xffffffff, but it is private * to gnttab.c. */ KASSERT(ref != ~0, ("grant_reference failed")); gnttab_grant_foreign_access_ref( ref, xenbus_get_otherend_id(sc->xbd_dev), buffer_ma >> PAGE_SHIFT, ring_req->operation == BLKIF_OP_WRITE); *sg_ref = ref; *sg = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; sg++; sg_ref++; segs++; nsegs--; } block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); if (block_segs == 0) break; sg = BLKRING_GET_SEG_BLOCK(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); sc->xbd_ring.req_prod_pvt++; last_block_sg = sg + block_segs; } if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_PREREAD; else if (cm->cm_operation == BLKIF_OP_WRITE) op = BUS_DMASYNC_PREWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); gnttab_free_grant_references(cm->cm_gref_head); xbd_enqueue_cm(cm, XBD_Q_BUSY); /* * If bus dma had to asynchronously call us back to dispatch * this command, we are no longer executing in the context of * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to * xbd_flush_requests() to publish this command to the backend * along with any other commands that it could batch. */ if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0) xbd_flush_requests(sc); return; } static int xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) { int error; error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data, cm->cm_datalen, xbd_queue_cb, cm, 0); if (error == EINPROGRESS) { /* * Maintain queuing order by freezing the queue. The next * command may not require as many resources as the command * we just attempted to map, so we can't rely on bus dma * blocking for it too. */ xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); return (0); } return (error); } static void xbd_restart_queue_callback(void *arg) { struct xbd_softc *sc = arg; mtx_lock(&sc->xbd_io_lock); xbd_thaw(sc, XBDF_GNT_SHORTAGE); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); } static struct xbd_command * xbd_bio_command(struct xbd_softc *sc) { struct xbd_command *cm; struct bio *bp; - if (unlikely(sc->xbd_state != XBD_STATE_CONNECTED)) + if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED)) return (NULL); bp = xbd_dequeue_bio(sc); if (bp == NULL) return (NULL); if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) { xbd_freeze(sc, XBDF_CM_SHORTAGE); xbd_requeue_bio(sc, bp); return (NULL); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { gnttab_request_free_callback(&sc->xbd_callback, xbd_restart_queue_callback, sc, sc->xbd_max_request_segments); xbd_freeze(sc, XBDF_GNT_SHORTAGE); xbd_requeue_bio(sc, bp); xbd_enqueue_cm(cm, XBD_Q_FREE); return (NULL); } cm->cm_bp = bp; cm->cm_data = bp->bio_data; cm->cm_datalen = bp->bio_bcount; cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; switch (bp->bio_cmd) { case BIO_READ: cm->cm_operation = BLKIF_OP_READ; break; case BIO_WRITE: cm->cm_operation = BLKIF_OP_WRITE; if ((bp->bio_flags & BIO_ORDERED) != 0) { if ((sc->xbd_flags & XBDF_BARRIER) != 0) { cm->cm_operation = BLKIF_OP_WRITE_BARRIER; } else { /* * Single step this command. */ cm->cm_flags |= XBDCF_Q_FREEZE; if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Wait for in-flight requests to * finish. */ xbd_freeze(sc, XBDF_WAIT_IDLE); xbd_requeue_cm(cm, XBD_Q_READY); return (NULL); } } } break; case BIO_FLUSH: if ((sc->xbd_flags & XBDF_FLUSH) != 0) cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; else if ((sc->xbd_flags & XBDF_BARRIER) != 0) cm->cm_operation = BLKIF_OP_WRITE_BARRIER; else panic("flush request, but no flush support available"); break; default: panic("unknown bio command %d", bp->bio_cmd); } return (cm); } /* * Dequeue buffers and place them in the shared communication ring. * Return when no more requests can be accepted or all buffers have * been queued. * * Signal XEN once the ring has been filled out. */ static void xbd_startio(struct xbd_softc *sc) { struct xbd_command *cm; int error, queued = 0; mtx_assert(&sc->xbd_io_lock, MA_OWNED); if (sc->xbd_state != XBD_STATE_CONNECTED) return; while (RING_FREE_REQUESTS(&sc->xbd_ring) >= sc->xbd_max_request_blocks) { if (sc->xbd_qfrozen_cnt != 0) break; cm = xbd_dequeue_cm(sc, XBD_Q_READY); if (cm == NULL) cm = xbd_bio_command(sc); if (cm == NULL) break; if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { /* * Single step command. Future work is * held off until this command completes. */ xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); } if ((error = xbd_queue_request(sc, cm)) != 0) { printf("xbd_queue_request returned %d\n", error); break; } queued++; } if (queued != 0) xbd_flush_requests(sc); } static void xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm) { struct bio *bp; bp = cm->cm_bp; - if (unlikely(cm->cm_status != BLKIF_RSP_OKAY)) { + if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) { disk_err(bp, "disk error" , -1, 0); printf(" status: %x\n", cm->cm_status); bp->bio_flags |= BIO_ERROR; } if (bp->bio_flags & BIO_ERROR) bp->bio_error = EIO; else bp->bio_resid = 0; xbd_free_command(cm); biodone(bp); } static int xbd_completion(struct xbd_command *cm) { gnttab_end_foreign_access_references(cm->cm_nseg, cm->cm_sg_refs); return (BLKIF_SEGS_TO_BLOCKS(cm->cm_nseg)); } static void xbd_int(void *xsc) { struct xbd_softc *sc = xsc; struct xbd_command *cm; blkif_response_t *bret; RING_IDX i, rp; int op; mtx_lock(&sc->xbd_io_lock); - if (unlikely(sc->xbd_state == XBD_STATE_DISCONNECTED)) { + if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) { mtx_unlock(&sc->xbd_io_lock); return; } again: rp = sc->xbd_ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ for (i = sc->xbd_ring.rsp_cons; i != rp;) { bret = RING_GET_RESPONSE(&sc->xbd_ring, i); cm = &sc->xbd_shadow[bret->id]; xbd_remove_cm(cm, XBD_Q_BUSY); i += xbd_completion(cm); if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_POSTREAD; else if (cm->cm_operation == BLKIF_OP_WRITE || cm->cm_operation == BLKIF_OP_WRITE_BARRIER) op = BUS_DMASYNC_POSTWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map); /* * Release any hold this command has on future command * dispatch. */ xbd_cm_thaw(sc, cm); /* * Directly call the i/o complete routine to save an * an indirection in the common case. */ cm->cm_status = bret->status; if (cm->cm_bp) xbd_bio_complete(sc, cm); else if (cm->cm_complete != NULL) cm->cm_complete(cm); else xbd_free_command(cm); } sc->xbd_ring.rsp_cons = i; if (i != sc->xbd_ring.req_prod_pvt) { int more_to_do; RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do); if (more_to_do) goto again; } else { sc->xbd_ring.sring->rsp_event = i + 1; } if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) xbd_thaw(sc, XBDF_WAIT_IDLE); xbd_startio(sc); - if (unlikely(sc->xbd_state == XBD_STATE_SUSPENDED)) + if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED)) wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]); mtx_unlock(&sc->xbd_io_lock); } /*------------------------------- Dump Support -------------------------------*/ /** * Quiesce the disk writes for a dump file before allowing the next buffer. */ static void xbd_quiesce(struct xbd_softc *sc) { int mtd; // While there are outstanding requests while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); if (mtd) { /* Recieved request completions, update queue. */ xbd_int(sc); } if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Still pending requests, wait for the disk i/o * to complete. */ HYPERVISOR_yield(); } } } /* Kernel dump function for a paravirtualized disk device */ static void xbd_dump_complete(struct xbd_command *cm) { xbd_enqueue_cm(cm, XBD_Q_COMPLETE); } static int xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct disk *dp = arg; struct xbd_softc *sc = dp->d_drv1; struct xbd_command *cm; size_t chunk; int sbp; int rc = 0; if (length <= 0) return (rc); xbd_quiesce(sc); /* All quiet on the western front. */ /* * If this lock is held, then this module is failing, and a * successful kernel dump is highly unlikely anyway. */ mtx_lock(&sc->xbd_io_lock); /* Split the 64KB block as needed */ for (sbp=0; length > 0; sbp++) { cm = xbd_dequeue_cm(sc, XBD_Q_FREE); if (cm == NULL) { mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "dump: no more commands?\n"); return (EBUSY); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { xbd_free_command(cm); mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "no more grant allocs?\n"); return (EBUSY); } chunk = length > sc->xbd_max_request_size ? sc->xbd_max_request_size : length; cm->cm_data = virtual; cm->cm_datalen = chunk; cm->cm_operation = BLKIF_OP_WRITE; cm->cm_sector_number = offset / dp->d_sectorsize; cm->cm_complete = xbd_dump_complete; xbd_enqueue_cm(cm, XBD_Q_READY); length -= chunk; offset += chunk; virtual = (char *) virtual + chunk; } /* Tell DOM0 to do the I/O */ xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); /* Poll for the completion. */ xbd_quiesce(sc); /* All quite on the eastern front */ /* If there were any errors, bail out... */ while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) { if (cm->cm_status != BLKIF_RSP_OKAY) { device_printf(sc->xbd_dev, "Dump I/O failed at sector %jd\n", cm->cm_sector_number); rc = EIO; } xbd_free_command(cm); } return (rc); } /*----------------------------- Disk Entrypoints -----------------------------*/ static int xbd_open(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) { printf("xb%d: not found", sc->xbd_unit); return (ENXIO); } sc->xbd_flags |= XBDF_OPEN; sc->xbd_users++; return (0); } static int xbd_close(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); sc->xbd_flags &= ~XBDF_OPEN; if (--(sc->xbd_users) == 0) { /* * Check whether we have been instructed to close. We will * have ignored this request initially, as the device was * still mounted. */ if (xenbus_get_otherend_state(sc->xbd_dev) == XenbusStateClosing) xbd_closing(sc->xbd_dev); } return (0); } static int xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); return (ENOTTY); } /* * Read/write routine for a buffer. Finds the proper unit, place it on * the sortq and kick the controller. */ static void xbd_strategy(struct bio *bp) { struct xbd_softc *sc = bp->bio_disk->d_drv1; /* bogus disk? */ if (sc == NULL) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); return; } /* * Place it in the queue of disk activities for this disk */ mtx_lock(&sc->xbd_io_lock); xbd_enqueue_bio(sc, bp); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); return; } /*------------------------------ Ring Management -----------------------------*/ static int xbd_alloc_ring(struct xbd_softc *sc) { blkif_sring_t *sring; uintptr_t sring_page_addr; int error; int i; sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sring == NULL) { xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring"); return (ENOMEM); } SHARED_RING_INIT(sring); FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE); for (i = 0, sring_page_addr = (uintptr_t)sring; i < sc->xbd_ring_pages; i++, sring_page_addr += PAGE_SIZE) { error = xenbus_grant_ring(sc->xbd_dev, (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "granting ring_ref(%d)", i); return (error); } } if (sc->xbd_ring_pages == 1) { error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), "ring-ref", "%u", sc->xbd_ring_ref[0]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-ref", xenbus_get_node(sc->xbd_dev)); return (error); } } else { for (i = 0; i < sc->xbd_ring_pages; i++) { char ring_ref_name[]= "ring_refXX"; snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), ring_ref_name, "%u", sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/%s", xenbus_get_node(sc->xbd_dev), ring_ref_name); return (error); } } } - error = bind_listening_port_to_irqhandler( - xenbus_get_otherend_id(sc->xbd_dev), - "xbd", (driver_intr_t *)xbd_int, sc, - INTR_TYPE_BIO | INTR_MPSAFE, &sc->xbd_irq); + error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev, + xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc, + INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, - "bind_evtchn_to_irqhandler failed"); + "xen_intr_alloc_and_bind_local_port failed"); return (error); } return (0); } static void xbd_free_ring(struct xbd_softc *sc) { int i; if (sc->xbd_ring.sring == NULL) return; for (i = 0; i < sc->xbd_ring_pages; i++) { if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) { gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]); sc->xbd_ring_ref[i] = GRANT_REF_INVALID; } } free(sc->xbd_ring.sring, M_XENBLOCKFRONT); sc->xbd_ring.sring = NULL; } /*-------------------------- Initialization/Teardown -------------------------*/ static int xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) { struct sbuf sb; int feature_cnt; sbuf_new(&sb, features, len, SBUF_FIXEDLEN); feature_cnt = 0; if ((sc->xbd_flags & XBDF_FLUSH) != 0) { sbuf_printf(&sb, "flush"); feature_cnt++; } if ((sc->xbd_flags & XBDF_BARRIER) != 0) { if (feature_cnt != 0) sbuf_printf(&sb, ", "); sbuf_printf(&sb, "write_barrier"); feature_cnt++; } (void) sbuf_finish(&sb); return (sbuf_len(&sb)); } static int xbd_sysctl_features(SYSCTL_HANDLER_ARGS) { char features[80]; struct xbd_softc *sc = arg1; int error; int len; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); len = xbd_feature_string(sc, features, sizeof(features)); /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); } static void xbd_setup_sysctl(struct xbd_softc *xbd) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; struct sysctl_oid_list *children; sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev); if (sysctl_tree == NULL) return; children = SYSCTL_CHILDREN(sysctl_tree); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, "maximum outstanding requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_segments", CTLFLAG_RD, &xbd->xbd_max_request_segments, 0, "maximum number of pages per requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, "maximum size in bytes of a request (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, "communication channel pages (negotiated)"); SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, xbd_sysctl_features, "A", "protocol features (negotiated)"); } /* * Translate Linux major/minor to an appropriate name and unit * number. For HVM guests, this allows us to use the same drive names * with blkfront as the emulated drives, easing transition slightly. */ static void xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name) { static struct vdev_info { int major; int shift; int base; const char *name; } info[] = { {3, 6, 0, "ada"}, /* ide0 */ {22, 6, 2, "ada"}, /* ide1 */ {33, 6, 4, "ada"}, /* ide2 */ {34, 6, 6, "ada"}, /* ide3 */ {56, 6, 8, "ada"}, /* ide4 */ {57, 6, 10, "ada"}, /* ide5 */ {88, 6, 12, "ada"}, /* ide6 */ {89, 6, 14, "ada"}, /* ide7 */ {90, 6, 16, "ada"}, /* ide8 */ {91, 6, 18, "ada"}, /* ide9 */ {8, 4, 0, "da"}, /* scsi disk0 */ {65, 4, 16, "da"}, /* scsi disk1 */ {66, 4, 32, "da"}, /* scsi disk2 */ {67, 4, 48, "da"}, /* scsi disk3 */ {68, 4, 64, "da"}, /* scsi disk4 */ {69, 4, 80, "da"}, /* scsi disk5 */ {70, 4, 96, "da"}, /* scsi disk6 */ {71, 4, 112, "da"}, /* scsi disk7 */ {128, 4, 128, "da"}, /* scsi disk8 */ {129, 4, 144, "da"}, /* scsi disk9 */ {130, 4, 160, "da"}, /* scsi disk10 */ {131, 4, 176, "da"}, /* scsi disk11 */ {132, 4, 192, "da"}, /* scsi disk12 */ {133, 4, 208, "da"}, /* scsi disk13 */ {134, 4, 224, "da"}, /* scsi disk14 */ {135, 4, 240, "da"}, /* scsi disk15 */ {202, 4, 0, "xbd"}, /* xbd */ {0, 0, 0, NULL}, }; int major = vdevice >> 8; int minor = vdevice & 0xff; int i; if (vdevice & (1 << 28)) { *unit = (vdevice & ((1 << 28) - 1)) >> 8; *name = "xbd"; return; } for (i = 0; info[i].major; i++) { if (info[i].major == major) { *unit = info[i].base + (minor >> info[i].shift); *name = info[i].name; return; } } *unit = minor >> 4; *name = "xbd"; } int xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, int vdevice, uint16_t vdisk_info, unsigned long sector_size) { char features[80]; int unit, error = 0; const char *name; xbd_vdevice_to_unit(vdevice, &unit, &name); sc->xbd_unit = unit; if (strcmp(name, "xbd") != 0) device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); if (xbd_feature_string(sc, features, sizeof(features)) > 0) { device_printf(sc->xbd_dev, "features: %s\n", features); } sc->xbd_disk = disk_alloc(); sc->xbd_disk->d_unit = sc->xbd_unit; sc->xbd_disk->d_open = xbd_open; sc->xbd_disk->d_close = xbd_close; sc->xbd_disk->d_ioctl = xbd_ioctl; sc->xbd_disk->d_strategy = xbd_strategy; sc->xbd_disk->d_dump = xbd_dump; sc->xbd_disk->d_name = name; sc->xbd_disk->d_drv1 = sc; sc->xbd_disk->d_sectorsize = sector_size; sc->xbd_disk->d_mediasize = sectors * sector_size; sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; sc->xbd_disk->d_flags = 0; if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; device_printf(sc->xbd_dev, "synchronize cache commands enabled.\n"); } disk_create(sc->xbd_disk, DISK_VERSION); return error; } static void xbd_free(struct xbd_softc *sc) { int i; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_DISCONNECTED; mtx_unlock(&sc->xbd_io_lock); /* Free resources associated with old device channel. */ xbd_free_ring(sc); if (sc->xbd_shadow) { for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; cm = &sc->xbd_shadow[i]; if (cm->cm_sg_refs != NULL) { free(cm->cm_sg_refs, M_XENBLOCKFRONT); cm->cm_sg_refs = NULL; } bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); } free(sc->xbd_shadow, M_XENBLOCKFRONT); sc->xbd_shadow = NULL; bus_dma_tag_destroy(sc->xbd_io_dmat); xbd_initq_cm(sc, XBD_Q_FREE); xbd_initq_cm(sc, XBD_Q_READY); xbd_initq_cm(sc, XBD_Q_COMPLETE); } - if (sc->xbd_irq) { - unbind_from_irqhandler(sc->xbd_irq); - sc->xbd_irq = 0; - } + xen_intr_unbind(&sc->xen_intr_handle); + } /*--------------------------- State Change Handlers --------------------------*/ static void xbd_initialize(struct xbd_softc *sc) { const char *otherend_path; const char *node_path; uint32_t max_ring_page_order; int error; int i; if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { /* Initialization has already been performed. */ return; } /* * Protocol defaults valid even if negotiation for a * setting fails. */ max_ring_page_order = 0; sc->xbd_ring_pages = 1; sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; sc->xbd_max_request_size = XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); sc->xbd_max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->xbd_max_request_segments); /* * Protocol negotiation. * * \note xs_gather() returns on the first encountered error, so * we must use independant calls in order to guarantee * we don't miss information in a sparsly populated back-end * tree. * * \note xs_scanf() does not update variables for unmatched * fields. */ otherend_path = xenbus_get_otherend_path(sc->xbd_dev); node_path = xenbus_get_node(sc->xbd_dev); /* Support both backend schemes for relaying ring page limits. */ (void)xs_scanf(XST_NIL, otherend_path, "max-ring-page-order", NULL, "%" PRIu32, &max_ring_page_order); sc->xbd_ring_pages = 1 << max_ring_page_order; (void)xs_scanf(XST_NIL, otherend_path, "max-ring-pages", NULL, "%" PRIu32, &sc->xbd_ring_pages); if (sc->xbd_ring_pages < 1) sc->xbd_ring_pages = 1; sc->xbd_max_requests = BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE); (void)xs_scanf(XST_NIL, otherend_path, "max-requests", NULL, "%" PRIu32, &sc->xbd_max_requests); (void)xs_scanf(XST_NIL, otherend_path, "max-request-segments", NULL, "%" PRIu32, &sc->xbd_max_request_segments); (void)xs_scanf(XST_NIL, otherend_path, "max-request-size", NULL, "%" PRIu32, &sc->xbd_max_request_size); if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) { device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "limited to front-end limit of %zu.\n", sc->xbd_ring_pages, XBD_MAX_RING_PAGES); sc->xbd_ring_pages = XBD_MAX_RING_PAGES; } if (powerof2(sc->xbd_ring_pages) == 0) { uint32_t new_page_limit; new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1); device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "is not a power of 2. Limited to %u.\n", sc->xbd_ring_pages, new_page_limit); sc->xbd_ring_pages = new_page_limit; } if (sc->xbd_max_requests > XBD_MAX_REQUESTS) { device_printf(sc->xbd_dev, "Back-end specified max_requests of %u " "limited to front-end limit of %u.\n", sc->xbd_max_requests, XBD_MAX_REQUESTS); sc->xbd_max_requests = XBD_MAX_REQUESTS; } if (sc->xbd_max_request_segments > XBD_MAX_SEGMENTS_PER_REQUEST) { device_printf(sc->xbd_dev, "Back-end specified max_request_segments of %u " "limited to front-end limit of %u.\n", sc->xbd_max_request_segments, XBD_MAX_SEGMENTS_PER_REQUEST); sc->xbd_max_request_segments = XBD_MAX_SEGMENTS_PER_REQUEST; } if (sc->xbd_max_request_size > XBD_MAX_REQUEST_SIZE) { device_printf(sc->xbd_dev, "Back-end specified max_request_size of %u " "limited to front-end limit of %u.\n", sc->xbd_max_request_size, XBD_MAX_REQUEST_SIZE); sc->xbd_max_request_size = XBD_MAX_REQUEST_SIZE; } if (sc->xbd_max_request_size > XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments)) { device_printf(sc->xbd_dev, "Back-end specified max_request_size of %u " "limited to front-end limit of %u. (Too few segments.)\n", sc->xbd_max_request_size, XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments)); sc->xbd_max_request_size = XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); } sc->xbd_max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->xbd_max_request_segments); /* Allocate datastructures based on negotiated values. */ error = bus_dma_tag_create( bus_get_dma_tag(sc->xbd_dev), /* parent */ 512, PAGE_SIZE, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->xbd_max_request_size, sc->xbd_max_request_segments, PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->xbd_io_lock, /* lockarg */ &sc->xbd_io_dmat); if (error != 0) { xenbus_dev_fatal(sc->xbd_dev, error, "Cannot allocate parent DMA tag\n"); return; } /* Per-transaction data allocation. */ sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sc->xbd_shadow == NULL) { bus_dma_tag_destroy(sc->xbd_io_dmat); xenbus_dev_fatal(sc->xbd_dev, error, "Cannot allocate request structures\n"); return; } for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; cm = &sc->xbd_shadow[i]; cm->cm_sg_refs = malloc( sizeof(grant_ref_t) * sc->xbd_max_request_segments, M_XENBLOCKFRONT, M_NOWAIT); if (cm->cm_sg_refs == NULL) break; cm->cm_id = i; cm->cm_flags = XBDCF_INITIALIZER; cm->cm_sc = sc; if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) break; xbd_free_command(cm); } if (xbd_alloc_ring(sc) != 0) return; /* Support both backend schemes for relaying ring page limits. */ if (sc->xbd_ring_pages > 1) { error = xs_printf(XST_NIL, node_path, "num-ring-pages","%u", sc->xbd_ring_pages); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/num-ring-pages", node_path); return; } error = xs_printf(XST_NIL, node_path, "ring-page-order", "%u", fls(sc->xbd_ring_pages) - 1); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-page-order", node_path); return; } } error = xs_printf(XST_NIL, node_path, "max-requests","%u", sc->xbd_max_requests); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/max-requests", node_path); return; } error = xs_printf(XST_NIL, node_path, "max-request-segments","%u", sc->xbd_max_request_segments); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/max-request-segments", node_path); return; } error = xs_printf(XST_NIL, node_path, "max-request-size","%u", sc->xbd_max_request_size); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/max-request-size", node_path); return; } error = xs_printf(XST_NIL, node_path, "event-channel", - "%u", irq_to_evtchn_port(sc->xbd_irq)); + "%u", xen_intr_port(sc->xen_intr_handle)); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/event-channel", node_path); return; } error = xs_printf(XST_NIL, node_path, "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/protocol", node_path); return; } xenbus_set_state(sc->xbd_dev, XenbusStateInitialised); } /* * Invoked when the backend is finally 'ready' (and has published * the details about the physical device - #sectors, size, etc). */ static void xbd_connect(struct xbd_softc *sc) { device_t dev = sc->xbd_dev; unsigned long sectors, sector_size; unsigned int binfo; int err, feature_barrier, feature_flush; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) return; DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "sectors", "%lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading backend fields at %s", xenbus_get_otherend_path(dev)); return; } err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-barrier", "%lu", &feature_barrier, NULL); if (err == 0 && feature_barrier != 0) sc->xbd_flags |= XBDF_BARRIER; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-flush-cache", "%lu", &feature_flush, NULL); if (err == 0 && feature_flush != 0) sc->xbd_flags |= XBDF_FLUSH; if (sc->xbd_disk == NULL) { device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), device_get_desc(dev), xenbus_get_node(dev)); bus_print_child_footer(device_get_parent(dev), dev); xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo, sector_size); } (void)xenbus_set_state(dev, XenbusStateConnected); /* Kick pending requests. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_CONNECTED; xbd_startio(sc); sc->xbd_flags |= XBDF_READY; mtx_unlock(&sc->xbd_io_lock); } /** * Handle the change of state of the backend to Closing. We must delete our * device-layer structures now, to ensure that writes are flushed through to * the backend. Once this is done, we can switch to Closed in * acknowledgement. */ static void xbd_closing(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); xenbus_set_state(dev, XenbusStateClosing); DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev)); if (sc->xbd_disk != NULL) { disk_destroy(sc->xbd_disk); sc->xbd_disk = NULL; } xenbus_set_state(dev, XenbusStateClosed); } /*---------------------------- NewBus Entrypoints ----------------------------*/ static int xbd_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vbd")) { device_set_desc(dev, "Virtual Block Device"); device_quiet(dev); return (0); } return (ENXIO); } /* * Setup supplies the backend dir, virtual device. We place an event * channel and shared frame entries. We watch backend to wait if it's * ok. */ static int xbd_attach(device_t dev) { struct xbd_softc *sc; const char *name; uint32_t vdevice; int error; int i; int unit; /* FIXME: Use dynamic device id if this is not set. */ error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device", NULL, "%" PRIu32, &vdevice); if (error) { xenbus_dev_fatal(dev, error, "reading virtual-device"); device_printf(dev, "Couldn't determine virtual device.\n"); return (error); } xbd_vdevice_to_unit(vdevice, &unit, &name); if (!strcmp(name, "xbd")) device_set_unit(dev, unit); sc = device_get_softc(dev); mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF); xbd_initqs(sc); for (i = 0; i < XBD_MAX_RING_PAGES; i++) sc->xbd_ring_ref[i] = GRANT_REF_INVALID; sc->xbd_dev = dev; sc->xbd_vdevice = vdevice; sc->xbd_state = XBD_STATE_DISCONNECTED; xbd_setup_sysctl(sc); /* Wait for backend device to publish its protocol capabilities. */ xenbus_set_state(dev, XenbusStateInitialising); return (0); } static int xbd_detach(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev)); xbd_free(sc); mtx_destroy(&sc->xbd_io_lock); return 0; } static int xbd_suspend(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); int retval; int saved_state; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); saved_state = sc->xbd_state; sc->xbd_state = XBD_STATE_SUSPENDED; /* Wait for outstanding I/O to drain. */ retval = 0; while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { retval = EBUSY; break; } } mtx_unlock(&sc->xbd_io_lock); if (retval != 0) sc->xbd_state = saved_state; return (retval); } static int xbd_resume(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev)); xbd_free(sc); xbd_initialize(sc); return (0); } /** * Callback received when the backend's state changes. */ static void xbd_backend_changed(device_t dev, XenbusState backend_state) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("backend_state=%d\n", backend_state); switch (backend_state) { case XenbusStateUnknown: case XenbusStateInitialising: case XenbusStateReconfigured: case XenbusStateReconfiguring: case XenbusStateClosed: break; case XenbusStateInitWait: case XenbusStateInitialised: xbd_initialize(sc); break; case XenbusStateConnected: xbd_initialize(sc); xbd_connect(sc); break; case XenbusStateClosing: if (sc->xbd_users > 0) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else xbd_closing(dev); break; } } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xbd_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xbd_probe), DEVMETHOD(device_attach, xbd_attach), DEVMETHOD(device_detach, xbd_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xbd_suspend), DEVMETHOD(device_resume, xbd_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed), { 0, 0 } }; static driver_t xbd_driver = { "xbd", xbd_methods, sizeof(struct xbd_softc), }; devclass_t xbd_devclass; DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h index 0f7d6cb124d8..9c803bc2eacf 100644 --- a/sys/dev/xen/blkfront/block.h +++ b/sys/dev/xen/blkfront/block.h @@ -1,349 +1,349 @@ /* * XenBSD block device driver * * Copyright (c) 2010-2013 Spectra Logic Corporation * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef __XEN_BLKFRONT_BLOCK_H__ #define __XEN_BLKFRONT_BLOCK_H__ #include /** * Given a number of blkif segments, compute the maximum I/O size supported. * * \note This calculation assumes that all but the first and last segments * of the I/O are fully utilized. * * \note We reserve a segement from the maximum supported by the transport to * guarantee we can handle an unaligned transfer without the need to * use a bounce buffer. */ #define XBD_SEGS_TO_SIZE(segs) \ (((segs) - 1) * PAGE_SIZE) /** * Compute the maximum number of blkif segments requried to represent * an I/O of the given size. * * \note This calculation assumes that all but the first and last segments * of the I/O are fully utilized. * * \note We reserve a segement to guarantee we can handle an unaligned * transfer without the need to use a bounce buffer. */ #define XBD_SIZE_TO_SEGS(size) \ ((size / PAGE_SIZE) + 1) /** * The maximum number of outstanding requests blocks (request headers plus * additional segment blocks) we will allow in a negotiated block-front/back * communication channel. */ #define XBD_MAX_REQUESTS 256 /** * The maximum mapped region size per request we will allow in a negotiated * block-front/back communication channel. */ #define XBD_MAX_REQUEST_SIZE \ MIN(MAXPHYS, XBD_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST)) /** * The maximum number of segments (within a request header and accompanying * segment blocks) per request we will allow in a negotiated block-front/back * communication channel. */ #define XBD_MAX_SEGMENTS_PER_REQUEST \ (MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ XBD_SIZE_TO_SEGS(XBD_MAX_REQUEST_SIZE))) /** * The maximum number of shared memory ring pages we will allow in a * negotiated block-front/back communication channel. Allow enough * ring space for all requests to be XBD_MAX_REQUEST_SIZE'd. */ #define XBD_MAX_RING_PAGES \ BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBD_MAX_SEGMENTS_PER_REQUEST) \ * XBD_MAX_REQUESTS) typedef enum { XBDCF_Q_MASK = 0xFF, /* This command has contributed to xbd_qfrozen_cnt. */ XBDCF_FROZEN = 1<<8, /* Freeze the command queue on dispatch (i.e. single step command). */ XBDCF_Q_FREEZE = 1<<9, /* Bus DMA returned EINPROGRESS for this command. */ XBDCF_ASYNC_MAPPING = 1<<10, XBDCF_INITIALIZER = XBDCF_Q_MASK } xbdc_flag_t; struct xbd_command; typedef void xbd_cbcf_t(struct xbd_command *); struct xbd_command { TAILQ_ENTRY(xbd_command) cm_link; struct xbd_softc *cm_sc; xbdc_flag_t cm_flags; bus_dmamap_t cm_map; uint64_t cm_id; grant_ref_t *cm_sg_refs; struct bio *cm_bp; grant_ref_t cm_gref_head; void *cm_data; size_t cm_datalen; u_int cm_nseg; int cm_operation; blkif_sector_t cm_sector_number; int cm_status; xbd_cbcf_t *cm_complete; }; typedef enum { XBD_Q_FREE, XBD_Q_READY, XBD_Q_BUSY, XBD_Q_COMPLETE, XBD_Q_BIO, XBD_Q_COUNT, XBD_Q_NONE = XBDCF_Q_MASK } xbd_q_index_t; typedef struct xbd_cm_q { TAILQ_HEAD(, xbd_command) q_tailq; uint32_t q_length; uint32_t q_max; } xbd_cm_q_t; typedef enum { XBD_STATE_DISCONNECTED, XBD_STATE_CONNECTED, XBD_STATE_SUSPENDED } xbd_state_t; typedef enum { XBDF_NONE = 0, XBDF_OPEN = 1 << 0, /* drive is open (can't shut down) */ XBDF_BARRIER = 1 << 1, /* backend supports barriers */ XBDF_FLUSH = 1 << 2, /* backend supports flush */ XBDF_READY = 1 << 3, /* Is ready */ XBDF_CM_SHORTAGE = 1 << 4, /* Free cm resource shortage active. */ XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */ XBDF_WAIT_IDLE = 1 << 6 /* * No new work until oustanding work * completes. */ } xbd_flag_t; /* * We have one of these per vbd, whether ide, scsi or 'other'. */ struct xbd_softc { device_t xbd_dev; struct disk *xbd_disk; /* disk params */ struct bio_queue_head xbd_bioq; /* sort queue */ int xbd_unit; xbd_flag_t xbd_flags; int xbd_qfrozen_cnt; int xbd_vdevice; xbd_state_t xbd_state; u_int xbd_ring_pages; uint32_t xbd_max_requests; uint32_t xbd_max_request_segments; uint32_t xbd_max_request_blocks; uint32_t xbd_max_request_size; grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES]; blkif_front_ring_t xbd_ring; - unsigned int xbd_irq; + xen_intr_handle_t xen_intr_handle; struct gnttab_free_callback xbd_callback; xbd_cm_q_t xbd_cm_q[XBD_Q_COUNT]; bus_dma_tag_t xbd_io_dmat; /** * The number of people holding this device open. We won't allow a * hot-unplug unless this is 0. */ int xbd_users; struct mtx xbd_io_lock; struct xbd_command *xbd_shadow; }; int xbd_instance_create(struct xbd_softc *, blkif_sector_t sectors, int device, uint16_t vdisk_info, unsigned long sector_size); static inline void xbd_added_qentry(struct xbd_softc *sc, xbd_q_index_t index) { struct xbd_cm_q *cmq; cmq = &sc->xbd_cm_q[index]; cmq->q_length++; if (cmq->q_length > cmq->q_max) cmq->q_max = cmq->q_length; } static inline void xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index) { sc->xbd_cm_q[index].q_length--; } static inline uint32_t xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index) { return (sc->xbd_cm_q[index].q_length); } static inline void xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index) { struct xbd_cm_q *cmq; cmq = &sc->xbd_cm_q[index]; TAILQ_INIT(&cmq->q_tailq); cmq->q_length = 0; cmq->q_max = 0; } static inline void xbd_enqueue_cm(struct xbd_command *cm, xbd_q_index_t index) { KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE) panic("%s: command %p is already on queue %d.", __func__, cm, cm->cm_flags & XBDCF_Q_MASK); TAILQ_INSERT_TAIL(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= index; xbd_added_qentry(cm->cm_sc, index); } static inline void xbd_requeue_cm(struct xbd_command *cm, xbd_q_index_t index) { KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE) panic("%s: command %p is already on queue %d.", __func__, cm, cm->cm_flags & XBDCF_Q_MASK); TAILQ_INSERT_HEAD(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= index; xbd_added_qentry(cm->cm_sc, index); } static inline struct xbd_command * xbd_dequeue_cm(struct xbd_softc *sc, xbd_q_index_t index) { struct xbd_command *cm; KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if ((cm = TAILQ_FIRST(&sc->xbd_cm_q[index].q_tailq)) != NULL) { if ((cm->cm_flags & XBDCF_Q_MASK) != index) { panic("%s: command %p is on queue %d, " "not specified queue %d", __func__, cm, cm->cm_flags & XBDCF_Q_MASK, index); } TAILQ_REMOVE(&sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= XBD_Q_NONE; xbd_removed_qentry(cm->cm_sc, index); } return (cm); } static inline void xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index) { xbd_q_index_t index; index = cm->cm_flags & XBDCF_Q_MASK; KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if (index != expected_index) { panic("%s: command %p is on queue %d, not specified queue %d", __func__, cm, index, expected_index); } TAILQ_REMOVE(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= XBD_Q_NONE; xbd_removed_qentry(cm->cm_sc, index); } static inline void xbd_initq_bio(struct xbd_softc *sc) { bioq_init(&sc->xbd_bioq); } static inline void xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp) { bioq_insert_tail(&sc->xbd_bioq, bp); xbd_added_qentry(sc, XBD_Q_BIO); } static inline void xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp) { bioq_insert_head(&sc->xbd_bioq, bp); xbd_added_qentry(sc, XBD_Q_BIO); } static inline struct bio * xbd_dequeue_bio(struct xbd_softc *sc) { struct bio *bp; if ((bp = bioq_first(&sc->xbd_bioq)) != NULL) { bioq_remove(&sc->xbd_bioq, bp); xbd_removed_qentry(sc, XBD_Q_BIO); } return (bp); } static inline void xbd_initqs(struct xbd_softc *sc) { u_int index; for (index = 0; index < XBD_Q_COUNT; index++) xbd_initq_cm(sc, index); xbd_initq_bio(sc); } #endif /* __XEN_BLKFRONT_BLOCK_H__ */ diff --git a/sys/dev/xen/console/console.c b/sys/dev/xen/console/console.c index 6281bf2165e0..65a0e7dad961 100644 --- a/sys/dev/xen/console/console.c +++ b/sys/dev/xen/console/console.c @@ -1,444 +1,442 @@ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include #include "opt_ddb.h" #ifdef DDB #include #endif static char driver_name[] = "xc"; devclass_t xc_devclass; /* do not make static */ static void xcoutwakeup(struct tty *); static void xc_timeout(void *); static void __xencons_tx_flush(void); static boolean_t xcons_putc(int c); /* switch console so that shutdown can occur gracefully */ static void xc_shutdown(void *arg, int howto); static int xc_mute; static void xcons_force_flush(void); static void xencons_priv_interrupt(void *); static cn_probe_t xc_cnprobe; static cn_init_t xc_cninit; static cn_term_t xc_cnterm; static cn_getc_t xc_cngetc; static cn_putc_t xc_cnputc; static cn_grab_t xc_cngrab; static cn_ungrab_t xc_cnungrab; #define XC_POLLTIME (hz/10) CONSOLE_DRIVER(xc); static int xen_console_up; static boolean_t xc_start_needed; static struct callout xc_callout; struct mtx cn_mtx; #define RBUF_SIZE 1024 #define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1)) #define WBUF_SIZE 4096 #define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) static char wbuf[WBUF_SIZE]; static char rbuf[RBUF_SIZE]; static int rc, rp; static unsigned int cnsl_evt_reg; static unsigned int wc, wp; /* write_cons, write_prod */ +xen_intr_handle_t xen_intr_handle; +device_t xencons_dev; #ifdef KDB static int xc_altbrk; #endif #define CDEV_MAJOR 12 #define XCUNIT(x) (dev2unit(x)) #define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN)) #define CN_LOCK_INIT(x, _name) \ mtx_init(&x, _name, NULL, MTX_SPIN|MTX_RECURSE) #define CN_LOCK(l) \ do { \ if (panicstr == NULL) \ mtx_lock_spin(&(l)); \ } while (0) #define CN_UNLOCK(l) \ do { \ if (panicstr == NULL) \ mtx_unlock_spin(&(l)); \ } while (0) #define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED) #define CN_LOCK_DESTROY(x) mtx_destroy(&x) static struct tty *xccons; static tsw_open_t xcopen; static tsw_close_t xcclose; static struct ttydevsw xc_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_open = xcopen, .tsw_close = xcclose, .tsw_outwakeup = xcoutwakeup, }; static void xc_cnprobe(struct consdev *cp) { cp->cn_pri = CN_REMOTE; sprintf(cp->cn_name, "%s0", driver_name); } static void xc_cninit(struct consdev *cp) { CN_LOCK_INIT(cn_mtx,"XCONS LOCK"); } static void xc_cnterm(struct consdev *cp) { } static void xc_cngrab(struct consdev *cp) { } static void xc_cnungrab(struct consdev *cp) { } static int xc_cngetc(struct consdev *dev) { int ret; if (xencons_has_input()) xencons_handle_input(NULL); CN_LOCK(cn_mtx); if ((rp - rc) && !xc_mute) { /* we need to return only one char */ ret = (int)rbuf[RBUF_MASK(rc)]; rc++; } else ret = -1; CN_UNLOCK(cn_mtx); return(ret); } static void xc_cnputc_domu(struct consdev *dev, int c) { xcons_putc(c); } static void xc_cnputc_dom0(struct consdev *dev, int c) { HYPERVISOR_console_io(CONSOLEIO_write, 1, (char *)&c); } static void xc_cnputc(struct consdev *dev, int c) { if (xen_start_info->flags & SIF_INITDOMAIN) xc_cnputc_dom0(dev, c); else xc_cnputc_domu(dev, c); } extern int db_active; static boolean_t xcons_putc(int c) { int force_flush = xc_mute || #ifdef DDB db_active || #endif panicstr; /* we're not gonna recover, so force * flush */ if ((wp-wc) < (WBUF_SIZE-1)) { if ((wbuf[WBUF_MASK(wp++)] = c) == '\n') { wbuf[WBUF_MASK(wp++)] = '\r'; #ifdef notyet if (force_flush) xcons_force_flush(); #endif } } else if (force_flush) { #ifdef notyet xcons_force_flush(); #endif } if (cnsl_evt_reg) __xencons_tx_flush(); /* inform start path that we're pretty full */ return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE; } static void xc_identify(driver_t *driver, device_t parent) { device_t child; child = BUS_ADD_CHILD(parent, 0, driver_name, 0); device_set_driver(child, driver); device_set_desc(child, "Xen Console"); } static int xc_probe(device_t dev) { return (0); } static int xc_attach(device_t dev) { int error; + xencons_dev = dev; xccons = tty_alloc(&xc_ttydevsw, NULL); tty_makedev(xccons, NULL, "xc%r", 0); callout_init(&xc_callout, 0); xencons_ring_init(); cnsl_evt_reg = 1; callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons); if (xen_start_info->flags & SIF_INITDOMAIN) { - error = bind_virq_to_irqhandler( - VIRQ_CONSOLE, - 0, - "console", - NULL, - xencons_priv_interrupt, NULL, - INTR_TYPE_TTY, NULL); - - KASSERT(error >= 0, ("can't register console interrupt")); + error = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL, + xencons_priv_interrupt, NULL, + INTR_TYPE_TTY, &xen_intr_handle); + KASSERT(error >= 0, ("can't register console interrupt")); } /* register handler to flush console on shutdown */ if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown, NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) printf("xencons: shutdown event registration failed!\n"); return (0); } /* * return 0 for all console input, force flush all output. */ static void xc_shutdown(void *arg, int howto) { xc_mute = 1; xcons_force_flush(); } void xencons_rx(char *buf, unsigned len) { int i; struct tty *tp = xccons; if (xen_console_up #ifdef DDB && !kdb_active #endif ) { tty_lock(tp); for (i = 0; i < len; i++) { #ifdef KDB kdb_alt_break(buf[i], &xc_altbrk); #endif ttydisc_rint(tp, buf[i], 0); } ttydisc_rint_done(tp); tty_unlock(tp); } else { CN_LOCK(cn_mtx); for (i = 0; i < len; i++) rbuf[RBUF_MASK(rp++)] = buf[i]; CN_UNLOCK(cn_mtx); } } static void __xencons_tx_flush(void) { int sz; CN_LOCK(cn_mtx); while (wc != wp) { int sent; sz = wp - wc; if (sz > (WBUF_SIZE - WBUF_MASK(wc))) sz = WBUF_SIZE - WBUF_MASK(wc); if (xen_start_info->flags & SIF_INITDOMAIN) { HYPERVISOR_console_io(CONSOLEIO_write, sz, &wbuf[WBUF_MASK(wc)]); wc += sz; } else { sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); if (sent == 0) break; wc += sent; } } CN_UNLOCK(cn_mtx); } void xencons_tx(void) { __xencons_tx_flush(); } static void xencons_priv_interrupt(void *arg) { static char rbuf[16]; int l; while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0) xencons_rx(rbuf, l); xencons_tx(); } static int xcopen(struct tty *tp) { xen_console_up = 1; return (0); } static void xcclose(struct tty *tp) { xen_console_up = 0; } static inline int __xencons_put_char(int ch) { char _ch = (char)ch; if ((wp - wc) == WBUF_SIZE) return 0; wbuf[WBUF_MASK(wp++)] = _ch; return 1; } static void xcoutwakeup(struct tty *tp) { boolean_t cons_full = FALSE; char c; while (ttydisc_getc(tp, &c, 1) == 1 && !cons_full) cons_full = xcons_putc(c); if (cons_full) { /* let the timeout kick us in a bit */ xc_start_needed = TRUE; } } static void xc_timeout(void *v) { struct tty *tp; int c; tp = (struct tty *)v; tty_lock(tp); while ((c = xc_cngetc(NULL)) != -1) ttydisc_rint(tp, c, 0); if (xc_start_needed) { xc_start_needed = FALSE; xcoutwakeup(tp); } tty_unlock(tp); callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp); } static device_method_t xc_methods[] = { DEVMETHOD(device_identify, xc_identify), DEVMETHOD(device_probe, xc_probe), DEVMETHOD(device_attach, xc_attach), DEVMETHOD_END }; static driver_t xc_driver = { driver_name, xc_methods, 0, }; /*** Forcibly flush console data before dying. ***/ void xcons_force_flush(void) { int sz; if (xen_start_info->flags & SIF_INITDOMAIN) return; /* Spin until console data is flushed through to the domain controller. */ while (wc != wp) { int sent = 0; if ((sz = wp - wc) == 0) continue; sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); if (sent > 0) wc += sent; } } DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0); diff --git a/sys/dev/xen/console/xencons_ring.c b/sys/dev/xen/console/xencons_ring.c index 077d286f3fe0..3701551ea101 100644 --- a/sys/dev/xen/console/xencons_ring.c +++ b/sys/dev/xen/console/xencons_ring.c @@ -1,167 +1,169 @@ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include + +#include #include #include #include #include #include #include #include #include #include #define console_evtchn console.domU.evtchn -static unsigned int console_irq; +xen_intr_handle_t console_handle; extern char *console_page; extern struct mtx cn_mtx; +extern device_t xencons_dev; static inline struct xencons_interface * xencons_interface(void) { return (struct xencons_interface *)console_page; } int xencons_has_input(void) { struct xencons_interface *intf; intf = xencons_interface(); return (intf->in_cons != intf->in_prod); } int xencons_ring_send(const char *data, unsigned len) { struct xencons_interface *intf; XENCONS_RING_IDX cons, prod; int sent; intf = xencons_interface(); cons = intf->out_cons; prod = intf->out_prod; sent = 0; mb(); KASSERT((prod - cons) <= sizeof(intf->out), ("console send ring inconsistent")); while ((sent < len) && ((prod - cons) < sizeof(intf->out))) intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; wmb(); intf->out_prod = prod; - notify_remote_via_evtchn(xen_start_info->console_evtchn); + xen_intr_signal(console_handle); return sent; } static xencons_receiver_func *xencons_receiver; void xencons_handle_input(void *unused) { struct xencons_interface *intf; XENCONS_RING_IDX cons, prod; CN_LOCK(cn_mtx); intf = xencons_interface(); cons = intf->in_cons; prod = intf->in_prod; CN_UNLOCK(cn_mtx); /* XXX needs locking */ while (cons != prod) { xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1); cons++; } mb(); intf->in_cons = cons; CN_LOCK(cn_mtx); - notify_remote_via_evtchn(xen_start_info->console_evtchn); + xen_intr_signal(console_handle); xencons_tx(); CN_UNLOCK(cn_mtx); } void xencons_ring_register_receiver(xencons_receiver_func *f) { xencons_receiver = f; } int xencons_ring_init(void) { int err; if (!xen_start_info->console_evtchn) return 0; - err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn, - "xencons", xencons_handle_input, NULL, - INTR_TYPE_MISC | INTR_MPSAFE, &console_irq); + err = xen_intr_bind_local_port(xencons_dev, + xen_start_info->console_evtchn, NULL, xencons_handle_input, NULL, + INTR_TYPE_MISC | INTR_MPSAFE, &console_handle); if (err) { return err; } return 0; } extern void xencons_suspend(void); extern void xencons_resume(void); void xencons_suspend(void) { if (!xen_start_info->console_evtchn) return; - unbind_from_irqhandler(console_irq); + xen_intr_unbind(&console_handle); } void xencons_resume(void) { (void)xencons_ring_init(); } /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 8 * tab-width: 4 * indent-tabs-mode: t * End: */ diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index 18f42bbe653b..649f28160407 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -1,518 +1,521 @@ /*- * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. */ /*- * PV suspend/resume support: * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christian Limpach. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * HVM suspend/resume support: * * Copyright (c) 2008 Citrix Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /** * \file control.c * * \brief Device driver to repond to control domain events that impact * this VM. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef XENHVM #include #include #endif #include #include -#include +#include #include #include #include +#include #include #include #include #include #include #include #include +#include +#include + /*--------------------------- Forward Declarations --------------------------*/ /** Function signature for shutdown event handlers. */ typedef void (xctrl_shutdown_handler_t)(void); static xctrl_shutdown_handler_t xctrl_poweroff; static xctrl_shutdown_handler_t xctrl_reboot; static xctrl_shutdown_handler_t xctrl_suspend; static xctrl_shutdown_handler_t xctrl_crash; static xctrl_shutdown_handler_t xctrl_halt; /*-------------------------- Private Data Structures -------------------------*/ /** Element type for lookup table of event name to handler. */ struct xctrl_shutdown_reason { const char *name; xctrl_shutdown_handler_t *handler; }; /** Lookup table for shutdown event name to handler. */ static const struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = { { "poweroff", xctrl_poweroff }, { "reboot", xctrl_reboot }, { "suspend", xctrl_suspend }, { "crash", xctrl_crash }, { "halt", xctrl_halt }, }; struct xctrl_softc { struct xs_watch xctrl_watch; }; /*------------------------------ Event Handlers ------------------------------*/ static void xctrl_poweroff() { shutdown_nice(RB_POWEROFF|RB_HALT); } static void xctrl_reboot() { shutdown_nice(0); } #ifndef XENHVM extern void xencons_suspend(void); extern void xencons_resume(void); /* Full PV mode suspension. */ static void xctrl_suspend() { int i, j, k, fpp; unsigned long max_pfn, start_info_mfn; EVENTHANDLER_INVOKE(power_suspend); #ifdef SMP struct thread *td; cpuset_t map; u_int cpuid; /* * Bind us to CPU 0 and stop any other VCPUs. */ td = curthread; thread_lock(td); sched_bind(td, 0); thread_unlock(td); cpuid = PCPU_GET(cpuid); KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0")); map = all_cpus; CPU_CLR(cpuid, &map); CPU_NAND(&map, &stopped_cpus); if (!CPU_EMPTY(&map)) stop_cpus(map); #endif /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. */ mtx_lock(&Giant); if (DEVICE_SUSPEND(root_bus) != 0) { mtx_unlock(&Giant); printf("%s: device_suspend failed\n", __func__); #ifdef SMP if (!CPU_EMPTY(&map)) restart_cpus(map); #endif return; } mtx_unlock(&Giant); local_irq_disable(); xencons_suspend(); gnttab_suspend(); + intr_suspend(); max_pfn = HYPERVISOR_shared_info->arch.max_pfn; void *shared_info = HYPERVISOR_shared_info; HYPERVISOR_shared_info = NULL; pmap_kremove((vm_offset_t) shared_info); PT_UPDATES_FLUSH(); xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn); /* * We'll stop somewhere inside this hypercall. When it returns, * we'll start resuming after the restore. */ start_info_mfn = VTOMFN(xen_start_info); pmap_suspend(); HYPERVISOR_suspend(start_info_mfn); pmap_resume(); pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info); HYPERVISOR_shared_info = shared_info; HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = VTOMFN(xen_pfn_to_mfn_frame_list_list); fpp = PAGE_SIZE/sizeof(unsigned long); for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { if ((j % fpp) == 0) { k++; xen_pfn_to_mfn_frame_list_list[k] = VTOMFN(xen_pfn_to_mfn_frame_list[k]); j = 0; } xen_pfn_to_mfn_frame_list[k][j] = VTOMFN(&xen_phys_machine[i]); } HYPERVISOR_shared_info->arch.max_pfn = max_pfn; gnttab_resume(); - irq_resume(); + intr_resume(); local_irq_enable(); xencons_resume(); #ifdef CONFIG_SMP for_each_cpu(i) vcpu_prepare(i); #endif /* * Only resume xenbus /after/ we've prepared our VCPUs; otherwise * the VCPU hotplug callback can race with our vcpu_prepare */ mtx_lock(&Giant); DEVICE_RESUME(root_bus); mtx_unlock(&Giant); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); if (!CPU_EMPTY(&map)) restart_cpus(map); #endif EVENTHANDLER_INVOKE(power_resume); } static void xen_pv_shutdown_final(void *arg, int howto) { /* * Inform the hypervisor that shutdown is complete. * This is not necessary in HVM domains since Xen * emulates ACPI in that mode and FreeBSD's ACPI * support will request this transition. */ if (howto & (RB_HALT | RB_POWEROFF)) HYPERVISOR_shutdown(SHUTDOWN_poweroff); else HYPERVISOR_shutdown(SHUTDOWN_reboot); } #else extern void xenpci_resume(void); /* HVM mode suspension. */ static void xctrl_suspend() { int suspend_cancelled; EVENTHANDLER_INVOKE(power_suspend); /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. */ mtx_lock(&Giant); if (DEVICE_SUSPEND(root_bus) != 0) { mtx_unlock(&Giant); printf("%s: device_suspend failed\n", __func__); return; } mtx_unlock(&Giant); /* * Prevent any races with evtchn_interrupt() handler. */ disable_intr(); - irq_suspend(); + intr_suspend(); suspend_cancelled = HYPERVISOR_suspend(0); - if (suspend_cancelled) - irq_resume(); - else - xenpci_resume(); + + intr_resume(); /* * Re-enable interrupts and put the scheduler back to normal. */ enable_intr(); /* * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or * similar. */ mtx_lock(&Giant); if (!suspend_cancelled) DEVICE_RESUME(root_bus); mtx_unlock(&Giant); EVENTHANDLER_INVOKE(power_resume); } #endif static void xctrl_crash() { panic("Xen directed crash"); } static void xctrl_halt() { shutdown_nice(RB_HALT); } /*------------------------------ Event Reception -----------------------------*/ static void xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len) { const struct xctrl_shutdown_reason *reason; const struct xctrl_shutdown_reason *last_reason; char *result; int error; int result_len; error = xs_read(XST_NIL, "control", "shutdown", &result_len, (void **)&result); if (error != 0) return; reason = xctrl_shutdown_reasons; last_reason = reason + nitems(xctrl_shutdown_reasons); while (reason < last_reason) { if (!strcmp(result, reason->name)) { reason->handler(); break; } reason++; } free(result, M_XENSTORE); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void xctrl_identify(driver_t *driver __unused, device_t parent) { /* * A single device instance for our driver is always present * in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existance of the Xen Control device * * \param dev NewBus device_t for this Xen control instance. * * \return Always returns 0 indicating success. */ static int xctrl_probe(device_t dev) { device_set_desc(dev, "Xen Control Device"); return (0); } /** * \brief Attach the Xen control device. * * \param dev NewBus device_t for this Xen control instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xctrl_attach(device_t dev) { struct xctrl_softc *xctrl; xctrl = device_get_softc(dev); /* Activate watch */ xctrl->xctrl_watch.node = "control/shutdown"; xctrl->xctrl_watch.callback = xctrl_on_watch_event; xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl; xs_register_watch(&xctrl->xctrl_watch); #ifndef XENHVM EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL, SHUTDOWN_PRI_LAST); #endif return (0); } /** * \brief Detach the Xen control device. * * \param dev NewBus device_t for this Xen control device instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xctrl_detach(device_t dev) { struct xctrl_softc *xctrl; xctrl = device_get_softc(dev); /* Release watch */ xs_unregister_watch(&xctrl->xctrl_watch); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xctrl_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xctrl_identify), DEVMETHOD(device_probe, xctrl_probe), DEVMETHOD(device_attach, xctrl_attach), DEVMETHOD(device_detach, xctrl_detach), DEVMETHOD_END }; DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc)); devclass_t xctrl_devclass; DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, NULL, NULL); diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c index 9463bad7a8b1..8f0286c31f06 100644 --- a/sys/dev/xen/netback/netback.c +++ b/sys/dev/xen/netback/netback.c @@ -1,2537 +1,2535 @@ /*- * Copyright (c) 2009-2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * Alan Somers (Spectra Logic Corporation) * John Suykerbuyk (Spectra Logic Corporation) */ #include __FBSDID("$FreeBSD$"); /** * \file netback.c * * \brief Device driver supporting the vending of network access * from this FreeBSD domain to other domains. */ #include "opt_inet.h" #include "opt_global.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 700000 #include #endif #include #include #include #include #include #include #include #include -#include -#include -#include +#include +#include #include #include #include +#include + /*--------------------------- Compile-time Tunables --------------------------*/ /*---------------------------------- Macros ----------------------------------*/ /** * Custom malloc type for all driver allocations. */ static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data"); #define XNB_SG 1 /* netback driver supports feature-sg */ #define XNB_GSO_TCPV4 1 /* netback driver supports feature-gso-tcpv4 */ #define XNB_RX_COPY 1 /* netback driver supports feature-rx-copy */ #define XNB_RX_FLIP 0 /* netback driver does not support feature-rx-flip */ #undef XNB_DEBUG #define XNB_DEBUG /* hardcode on during development */ #ifdef XNB_DEBUG #define DPRINTF(fmt, args...) \ printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while (0) #endif /* Default length for stack-allocated grant tables */ #define GNTTAB_LEN (64) /* Features supported by all backends. TSO and LRO can be negotiated */ #define XNB_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) /** * Two argument version of the standard macro. Second argument is a tentative * value of req_cons */ #define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({ \ unsigned int req = (_r)->sring->req_prod - cons; \ unsigned int rsp = RING_SIZE(_r) - \ (cons - (_r)->rsp_prod_pvt); \ req < rsp ? req : rsp; \ }) #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) #define virt_to_offset(x) ((x) & (PAGE_SIZE - 1)) /** * Predefined array type of grant table copy descriptors. Used to pass around * statically allocated memory structures. */ typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN]; /*--------------------------- Forward Declarations ---------------------------*/ struct xnb_softc; struct xnb_pkt; static void xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) __printflike(3,4); static int xnb_shutdown(struct xnb_softc *xnb); static int create_netdev(device_t dev); static int xnb_detach(device_t dev); static int xen_net_read_mac(device_t dev, uint8_t mac[]); static int xnb_ifmedia_upd(struct ifnet *ifp); static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static void xnb_intr(void *arg); static int xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend, const struct mbuf *mbufc, gnttab_copy_table gnttab); static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc, struct ifnet *ifnet, gnttab_copy_table gnttab); static int xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, RING_IDX start); static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, int error); static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp); static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, domid_t otherend_id); static void xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, int n_entries); static int xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, RING_IDX start, int space); static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, domid_t otherend_id); static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, int n_entries, netif_rx_back_ring_t *ring); static void xnb_add_mbuf_cksum(struct mbuf *mbufc); static void xnb_stop(struct xnb_softc*); static int xnb_ioctl(struct ifnet*, u_long, caddr_t); static void xnb_start_locked(struct ifnet*); static void xnb_start(struct ifnet*); static void xnb_ifinit_locked(struct xnb_softc*); static void xnb_ifinit(void*); #ifdef XNB_DEBUG static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS); static int xnb_dump_rings(SYSCTL_HANDLER_ARGS); #endif /*------------------------------ Data Structures -----------------------------*/ /** * Representation of a xennet packet. Simplified version of a packet as * stored in the Xen tx ring. Applicable to both RX and TX packets */ struct xnb_pkt{ /** * Array index of the first data-bearing (eg, not extra info) entry * for this packet */ RING_IDX car; /** * Array index of the second data-bearing entry for this packet. * Invalid if the packet has only one data-bearing entry. If the * packet has more than two data-bearing entries, then the second * through the last will be sequential modulo the ring size */ RING_IDX cdr; /** * Optional extra info. Only valid if flags contains * NETTXF_extra_info. Note that extra.type will always be * XEN_NETIF_EXTRA_TYPE_GSO. Currently, no known netfront or netback * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_* */ netif_extra_info_t extra; /** Size of entire packet in bytes. */ uint16_t size; /** The size of the first entry's data in bytes */ uint16_t car_size; /** * Either NETTXF_ or NETRXF_ flags. Note that the flag values are * not the same for TX and RX packets */ uint16_t flags; /** * The number of valid data-bearing entries (either netif_tx_request's * or netif_rx_response's) in the packet. If this is 0, it means the * entire packet is invalid. */ uint16_t list_len; /** There was an error processing the packet */ uint8_t error; }; /** xnb_pkt method: initialize it */ static inline void xnb_pkt_initialize(struct xnb_pkt *pxnb) { bzero(pxnb, sizeof(*pxnb)); } /** xnb_pkt method: mark the packet as valid */ static inline void xnb_pkt_validate(struct xnb_pkt *pxnb) { pxnb->error = 0; }; /** xnb_pkt method: mark the packet as invalid */ static inline void xnb_pkt_invalidate(struct xnb_pkt *pxnb) { pxnb->error = 1; }; /** xnb_pkt method: Check whether the packet is valid */ static inline int xnb_pkt_is_valid(const struct xnb_pkt *pxnb) { return (! pxnb->error); } #ifdef XNB_DEBUG /** xnb_pkt method: print the packet's contents in human-readable format*/ static void __unused xnb_dump_pkt(const struct xnb_pkt *pkt) { if (pkt == NULL) { DPRINTF("Was passed a null pointer.\n"); return; } DPRINTF("pkt address= %p\n", pkt); DPRINTF("pkt->size=%d\n", pkt->size); DPRINTF("pkt->car_size=%d\n", pkt->car_size); DPRINTF("pkt->flags=0x%04x\n", pkt->flags); DPRINTF("pkt->list_len=%d\n", pkt->list_len); /* DPRINTF("pkt->extra"); TODO */ DPRINTF("pkt->car=%d\n", pkt->car); DPRINTF("pkt->cdr=%d\n", pkt->cdr); DPRINTF("pkt->error=%d\n", pkt->error); } #endif /* XNB_DEBUG */ static void xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq) { if (txreq != NULL) { DPRINTF("netif_tx_request index =%u\n", idx); DPRINTF("netif_tx_request.gref =%u\n", txreq->gref); DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset); DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags); DPRINTF("netif_tx_request.id =%hu\n", txreq->id); DPRINTF("netif_tx_request.size =%hu\n", txreq->size); } } /** * \brief Configuration data for a shared memory request ring * used to communicate with the front-end client of this * this driver. */ struct xnb_ring_config { /** * Runtime structures for ring access. Unfortunately, TX and RX rings * use different data structures, and that cannot be changed since it * is part of the interdomain protocol. */ union{ netif_rx_back_ring_t rx_ring; netif_tx_back_ring_t tx_ring; } back_ring; /** * The device bus address returned by the hypervisor when * mapping the ring and required to unmap it when a connection * is torn down. */ uint64_t bus_addr; /** The pseudo-physical address where ring memory is mapped.*/ uint64_t gnt_addr; /** KVA address where ring memory is mapped. */ vm_offset_t va; /** * Grant table handles, one per-ring page, returned by the * hyperpervisor upon mapping of the ring and required to * unmap it when a connection is torn down. */ grant_handle_t handle; /** The number of ring pages mapped for the current connection. */ unsigned ring_pages; /** * The grant references, one per-ring page, supplied by the * front-end, allowing us to reference the ring pages in the * front-end's domain and to map these pages into our own domain. */ grant_ref_t ring_ref; }; /** * Per-instance connection state flags. */ typedef enum { /** Communication with the front-end has been established. */ XNBF_RING_CONNECTED = 0x01, /** * Front-end requests exist in the ring and are waiting for * xnb_xen_req objects to free up. */ XNBF_RESOURCE_SHORTAGE = 0x02, /** Connection teardown has started. */ XNBF_SHUTDOWN = 0x04, /** A thread is already performing shutdown processing. */ XNBF_IN_SHUTDOWN = 0x08 } xnb_flag_t; /** * Types of rings. Used for array indices and to identify a ring's control * data structure type */ typedef enum{ XNB_RING_TYPE_TX = 0, /* ID of TX rings, used for array indices */ XNB_RING_TYPE_RX = 1, /* ID of RX rings, used for array indices */ XNB_NUM_RING_TYPES } xnb_ring_type_t; /** * Per-instance configuration data. */ struct xnb_softc { /** NewBus device corresponding to this instance. */ device_t dev; /* Media related fields */ /** Generic network media state */ struct ifmedia sc_media; /** Media carrier info */ struct ifnet *xnb_ifp; /** Our own private carrier state */ unsigned carrier; /** Device MAC Address */ uint8_t mac[ETHER_ADDR_LEN]; /* Xen related fields */ /** * \brief The netif protocol abi in effect. * * There are situations where the back and front ends can * have a different, native abi (e.g. intel x86_64 and * 32bit x86 domains on the same machine). The back-end * always accomodates the front-end's native abi. That * value is pulled from the XenStore and recorded here. */ int abi; /** * Name of the bridge to which this VIF is connected, if any * This field is dynamically allocated by xenbus and must be free()ed * when no longer needed */ char *bridge; /** The interrupt driven even channel used to signal ring events. */ evtchn_port_t evtchn; /** Xen device handle.*/ long handle; - /** IRQ mapping for the communication ring event channel. */ - int irq; + /** Handle to the communication ring event channel. */ + xen_intr_handle_t xen_intr_handle; /** * \brief Cached value of the front-end's domain id. * * This value is used at once for each mapped page in * a transaction. We cache it to avoid incuring the * cost of an ivar access every time this is needed. */ domid_t otherend_id; /** * Undocumented frontend feature. Has something to do with * scatter/gather IO */ uint8_t can_sg; /** Undocumented frontend feature */ uint8_t gso; /** Undocumented frontend feature */ uint8_t gso_prefix; /** Can checksum TCP/UDP over IPv4 */ uint8_t ip_csum; /* Implementation related fields */ /** * Preallocated grant table copy descriptor for RX operations. * Access must be protected by rx_lock */ gnttab_copy_table rx_gnttab; /** * Preallocated grant table copy descriptor for TX operations. * Access must be protected by tx_lock */ gnttab_copy_table tx_gnttab; #ifdef XENHVM /** * Resource representing allocated physical address space * associated with our per-instance kva region. */ struct resource *pseudo_phys_res; /** Resource id for allocated physical address space. */ int pseudo_phys_res_id; #endif /** Ring mapping and interrupt configuration data. */ struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES]; /** * Global pool of kva used for mapping remote domain ring * and I/O transaction data. */ vm_offset_t kva; /** Psuedo-physical address corresponding to kva. */ uint64_t gnt_base_addr; /** Various configuration and state bit flags. */ xnb_flag_t flags; /** Mutex protecting per-instance data in the receive path. */ struct mtx rx_lock; /** Mutex protecting per-instance data in the softc structure. */ struct mtx sc_lock; /** Mutex protecting per-instance data in the transmit path. */ struct mtx tx_lock; /** The size of the global kva pool. */ int kva_size; }; /*---------------------------- Debugging functions ---------------------------*/ #ifdef XNB_DEBUG static void __unused xnb_dump_gnttab_copy(const struct gnttab_copy *entry) { if (entry == NULL) { printf("NULL grant table pointer\n"); return; } if (entry->flags & GNTCOPY_dest_gref) printf("gnttab dest ref=\t%u\n", entry->dest.u.ref); else printf("gnttab dest gmfn=\t%lu\n", entry->dest.u.gmfn); printf("gnttab dest offset=\t%hu\n", entry->dest.offset); printf("gnttab dest domid=\t%hu\n", entry->dest.domid); if (entry->flags & GNTCOPY_source_gref) printf("gnttab source ref=\t%u\n", entry->source.u.ref); else printf("gnttab source gmfn=\t%lu\n", entry->source.u.gmfn); printf("gnttab source offset=\t%hu\n", entry->source.offset); printf("gnttab source domid=\t%hu\n", entry->source.domid); printf("gnttab len=\t%hu\n", entry->len); printf("gnttab flags=\t%hu\n", entry->flags); printf("gnttab status=\t%hd\n", entry->status); } static int xnb_dump_rings(SYSCTL_HANDLER_ARGS) { static char results[720]; struct xnb_softc const* xnb = (struct xnb_softc*)arg1; netif_rx_back_ring_t const* rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; netif_tx_back_ring_t const* txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; /* empty the result strings */ results[0] = 0; if ( !txb || !txb->sring || !rxb || !rxb->sring ) return (SYSCTL_OUT(req, results, strnlen(results, 720))); snprintf(results, 720, "\n\t%35s %18s\n" /* TX, RX */ "\t%16s %18d %18d\n" /* req_cons */ "\t%16s %18d %18d\n" /* nr_ents */ "\t%16s %18d %18d\n" /* rsp_prod_pvt */ "\t%16s %18p %18p\n" /* sring */ "\t%16s %18d %18d\n" /* req_prod */ "\t%16s %18d %18d\n" /* req_event */ "\t%16s %18d %18d\n" /* rsp_prod */ "\t%16s %18d %18d\n", /* rsp_event */ "TX", "RX", "req_cons", txb->req_cons, rxb->req_cons, "nr_ents", txb->nr_ents, rxb->nr_ents, "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt, "sring", txb->sring, rxb->sring, "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod, "sring->req_event", txb->sring->req_event, rxb->sring->req_event, "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod, "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event); return (SYSCTL_OUT(req, results, strnlen(results, 720))); } static void __unused xnb_dump_mbuf(const struct mbuf *m) { int len; uint8_t *d; if (m == NULL) return; printf("xnb_dump_mbuf:\n"); if (m->m_flags & M_PKTHDR) { printf(" flowid=%10d, csum_flags=%#8x, csum_data=%#8x, " "tso_segsz=%5hd\n", m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags, m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz); printf(" rcvif=%16p, len=%19d\n", m->m_pkthdr.rcvif, m->m_pkthdr.len); } printf(" m_next=%16p, m_nextpk=%16p, m_data=%16p\n", m->m_next, m->m_nextpkt, m->m_data); printf(" m_len=%17d, m_flags=%#15x, m_type=%18u\n", m->m_len, m->m_flags, m->m_type); len = m->m_len; d = mtod(m, uint8_t*); while (len > 0) { int i; printf(" "); for (i = 0; (i < 16) && (len > 0); i++, len--) { printf("%02hhx ", *(d++)); } printf("\n"); } } #endif /* XNB_DEBUG */ /*------------------------ Inter-Domain Communication ------------------------*/ /** * Free dynamically allocated KVA or pseudo-physical address allocations. * * \param xnb Per-instance xnb configuration structure. */ static void xnb_free_communication_mem(struct xnb_softc *xnb) { if (xnb->kva != 0) { #ifndef XENHVM kva_free(xnb->kva, xnb->kva_size); #else if (xnb->pseudo_phys_res != NULL) { bus_release_resource(xnb->dev, SYS_RES_MEMORY, xnb->pseudo_phys_res_id, xnb->pseudo_phys_res); xnb->pseudo_phys_res = NULL; } #endif /* XENHVM */ } xnb->kva = 0; xnb->gnt_base_addr = 0; } /** * Cleanup all inter-domain communication mechanisms. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_disconnect(struct xnb_softc *xnb) { struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES]; int error; int i; - if (xnb->irq != 0) { - unbind_from_irqhandler(xnb->irq); - xnb->irq = 0; - } + xen_intr_unbind(xnb->xen_intr_handle); /* * We may still have another thread currently processing requests. We * must acquire the rx and tx locks to make sure those threads are done, * but we can release those locks as soon as we acquire them, because no * more interrupts will be arriving. */ mtx_lock(&xnb->tx_lock); mtx_unlock(&xnb->tx_lock); mtx_lock(&xnb->rx_lock); mtx_unlock(&xnb->rx_lock); /* Free malloc'd softc member variables */ if (xnb->bridge != NULL) free(xnb->bridge, M_XENSTORE); /* All request processing has stopped, so unmap the rings */ for (i=0; i < XNB_NUM_RING_TYPES; i++) { gnts[i].host_addr = xnb->ring_configs[i].gnt_addr; gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr; gnts[i].handle = xnb->ring_configs[i].handle; } error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts, XNB_NUM_RING_TYPES); KASSERT(error == 0, ("Grant table unmap op failed (%d)", error)); xnb_free_communication_mem(xnb); /* * Zero the ring config structs because the pointers, handles, and * grant refs contained therein are no longer valid. */ bzero(&xnb->ring_configs[XNB_RING_TYPE_TX], sizeof(struct xnb_ring_config)); bzero(&xnb->ring_configs[XNB_RING_TYPE_RX], sizeof(struct xnb_ring_config)); xnb->flags &= ~XNBF_RING_CONNECTED; return (0); } /** * Map a single shared memory ring into domain local address space and * initialize its control structure * * \param xnb Per-instance xnb configuration structure * \param ring_type Array index of this ring in the xnb's array of rings * \return An errno */ static int xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type) { struct gnttab_map_grant_ref gnt; struct xnb_ring_config *ring = &xnb->ring_configs[ring_type]; int error; /* TX ring type = 0, RX =1 */ ring->va = xnb->kva + ring_type * PAGE_SIZE; ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE; gnt.host_addr = ring->gnt_addr; gnt.flags = GNTMAP_host_map; gnt.ref = ring->ring_ref; gnt.dom = xnb->otherend_id; error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1); if (error != 0) panic("netback: Ring page grant table op failed (%d)", error); if (gnt.status != 0) { ring->va = 0; error = EACCES; xenbus_dev_fatal(xnb->dev, error, "Ring shared page mapping failed. " "Status %d.", gnt.status); } else { ring->handle = gnt.handle; ring->bus_addr = gnt.dev_bus_addr; if (ring_type == XNB_RING_TYPE_TX) { BACK_RING_INIT(&ring->back_ring.tx_ring, (netif_tx_sring_t*)ring->va, ring->ring_pages * PAGE_SIZE); } else if (ring_type == XNB_RING_TYPE_RX) { BACK_RING_INIT(&ring->back_ring.rx_ring, (netif_rx_sring_t*)ring->va, ring->ring_pages * PAGE_SIZE); } else { xenbus_dev_fatal(xnb->dev, error, "Unknown ring type %d", ring_type); } } return error; } /** * Setup the shared memory rings and bind an interrupt to the event channel * used to notify us of ring changes. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_connect_comms(struct xnb_softc *xnb) { int error; xnb_ring_type_t i; if ((xnb->flags & XNBF_RING_CONNECTED) != 0) return (0); /* * Kva for our rings are at the tail of the region of kva allocated * by xnb_alloc_communication_mem(). */ for (i=0; i < XNB_NUM_RING_TYPES; i++) { error = xnb_connect_ring(xnb, i); if (error != 0) return error; } xnb->flags |= XNBF_RING_CONNECTED; - error = - bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id, - xnb->evtchn, - device_get_nameunit(xnb->dev), - xnb_intr, /*arg*/xnb, - INTR_TYPE_BIO | INTR_MPSAFE, - &xnb->irq); + error = xen_intr_bind_remote_port(xnb->dev, + xnb->otherend_id, + xnb->evtchn, + /*filter*/NULL, + xnb_intr, /*arg*/xnb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xnb->xen_intr_handle); if (error != 0) { (void)xnb_disconnect(xnb); xenbus_dev_fatal(xnb->dev, error, "binding event channel"); return (error); } DPRINTF("rings connected!\n"); return (0); } /** * Size KVA and pseudo-physical address allocations based on negotiated * values for the size and number of I/O requests, and the size of our * communication ring. * * \param xnb Per-instance xnb configuration structure. * * These address spaces are used to dynamically map pages in the * front-end's domain into our own. */ static int xnb_alloc_communication_mem(struct xnb_softc *xnb) { xnb_ring_type_t i; xnb->kva_size = 0; for (i=0; i < XNB_NUM_RING_TYPES; i++) { xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE; } #ifndef XENHVM xnb->kva = kva_alloc(xnb->kva_size); if (xnb->kva == 0) return (ENOMEM); xnb->gnt_base_addr = xnb->kva; #else /* defined XENHVM */ /* * Reserve a range of pseudo physical memory that we can map * into kva. These pages will only be backed by machine * pages ("real memory") during the lifetime of front-end requests * via grant table operations. We will map the netif tx and rx rings * into this space. */ xnb->pseudo_phys_res_id = 0; xnb->pseudo_phys_res = bus_alloc_resource(xnb->dev, SYS_RES_MEMORY, &xnb->pseudo_phys_res_id, 0, ~0, xnb->kva_size, RF_ACTIVE); if (xnb->pseudo_phys_res == NULL) { xnb->kva = 0; return (ENOMEM); } xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res); xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res); #endif /* !defined XENHVM */ return (0); } /** * Collect information from the XenStore related to our device and its frontend * * \param xnb Per-instance xnb configuration structure. */ static int xnb_collect_xenstore_info(struct xnb_softc *xnb) { /** * \todo Linux collects the following info. We should collect most * of this, too: * "feature-rx-notify" */ const char *otherend_path; const char *our_path; int err; unsigned int rx_copy, bridge_len; uint8_t no_csum_offload; otherend_path = xenbus_get_otherend_path(xnb->dev); our_path = xenbus_get_node(xnb->dev); /* Collect the critical communication parameters */ err = xs_gather(XST_NIL, otherend_path, "tx-ring-ref", "%l" PRIu32, &xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref, "rx-ring-ref", "%l" PRIu32, &xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref, "event-channel", "%" PRIu32, &xnb->evtchn, NULL); if (err != 0) { xenbus_dev_fatal(xnb->dev, err, "Unable to retrieve ring information from " "frontend %s. Unable to connect.", otherend_path); return (err); } /* Collect the handle from xenstore */ err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle); if (err != 0) { xenbus_dev_fatal(xnb->dev, err, "Error reading handle from frontend %s. " "Unable to connect.", otherend_path); } /* * Collect the bridgename, if any. We do not need bridge_len; we just * throw it away */ err = xs_read(XST_NIL, our_path, "bridge", &bridge_len, (void**)&xnb->bridge); if (err != 0) xnb->bridge = NULL; /* * Does the frontend request that we use rx copy? If not, return an * error because this driver only supports rx copy. */ err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL, "%" PRIu32, &rx_copy); if (err == ENOENT) { err = 0; rx_copy = 0; } if (err < 0) { xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy", otherend_path); return err; } /** * \todo: figure out the exact meaning of this feature, and when * the frontend will set it to true. It should be set to true * at some point */ /* if (!rx_copy)*/ /* return EOPNOTSUPP;*/ /** \todo Collect the rx notify feature */ /* Collect the feature-sg. */ if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL, "%hhu", &xnb->can_sg) < 0) xnb->can_sg = 0; /* Collect remaining frontend features */ if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL, "%hhu", &xnb->gso) < 0) xnb->gso = 0; if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL, "%hhu", &xnb->gso_prefix) < 0) xnb->gso_prefix = 0; if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL, "%hhu", &no_csum_offload) < 0) no_csum_offload = 0; xnb->ip_csum = (no_csum_offload == 0); return (0); } /** * Supply information about the physical device to the frontend * via XenBus. * * \param xnb Per-instance xnb configuration structure. */ static int xnb_publish_backend_info(struct xnb_softc *xnb) { struct xs_transaction xst; const char *our_path; int error; our_path = xenbus_get_node(xnb->dev); do { error = xs_transaction_start(&xst); if (error != 0) { xenbus_dev_fatal(xnb->dev, error, "Error publishing backend info " "(start transaction)"); break; } error = xs_printf(xst, our_path, "feature-sg", "%d", XNB_SG); if (error != 0) break; error = xs_printf(xst, our_path, "feature-gso-tcpv4", "%d", XNB_GSO_TCPV4); if (error != 0) break; error = xs_printf(xst, our_path, "feature-rx-copy", "%d", XNB_RX_COPY); if (error != 0) break; error = xs_printf(xst, our_path, "feature-rx-flip", "%d", XNB_RX_FLIP); if (error != 0) break; error = xs_transaction_end(xst, 0); if (error != 0 && error != EAGAIN) { xenbus_dev_fatal(xnb->dev, error, "ending transaction"); break; } } while (error == EAGAIN); return (error); } /** * Connect to our netfront peer now that it has completed publishing * its configuration into the XenStore. * * \param xnb Per-instance xnb configuration structure. */ static void xnb_connect(struct xnb_softc *xnb) { int error; if (xenbus_get_state(xnb->dev) == XenbusStateConnected) return; if (xnb_collect_xenstore_info(xnb) != 0) return; xnb->flags &= ~XNBF_SHUTDOWN; /* Read front end configuration. */ /* Allocate resources whose size depends on front-end configuration. */ error = xnb_alloc_communication_mem(xnb); if (error != 0) { xenbus_dev_fatal(xnb->dev, error, "Unable to allocate communication memory"); return; } /* * Connect communication channel. */ error = xnb_connect_comms(xnb); if (error != 0) { /* Specific errors are reported by xnb_connect_comms(). */ return; } xnb->carrier = 1; /* Ready for I/O. */ xenbus_set_state(xnb->dev, XenbusStateConnected); } /*-------------------------- Device Teardown Support -------------------------*/ /** * Perform device shutdown functions. * * \param xnb Per-instance xnb configuration structure. * * Mark this instance as shutting down, wait for any active requests * to drain, disconnect from the front-end, and notify any waiters (e.g. * a thread invoking our detach method) that detach can now proceed. */ static int xnb_shutdown(struct xnb_softc *xnb) { /* * Due to the need to drop our mutex during some * xenbus operations, it is possible for two threads * to attempt to close out shutdown processing at * the same time. Tell the caller that hits this * race to try back later. */ if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0) return (EAGAIN); xnb->flags |= XNBF_SHUTDOWN; xnb->flags |= XNBF_IN_SHUTDOWN; mtx_unlock(&xnb->sc_lock); /* Free the network interface */ xnb->carrier = 0; if (xnb->xnb_ifp != NULL) { ether_ifdetach(xnb->xnb_ifp); if_free(xnb->xnb_ifp); xnb->xnb_ifp = NULL; } mtx_lock(&xnb->sc_lock); xnb_disconnect(xnb); mtx_unlock(&xnb->sc_lock); if (xenbus_get_state(xnb->dev) < XenbusStateClosing) xenbus_set_state(xnb->dev, XenbusStateClosing); mtx_lock(&xnb->sc_lock); xnb->flags &= ~XNBF_IN_SHUTDOWN; /* Indicate to xnb_detach() that is it safe to proceed. */ wakeup(xnb); return (0); } /** * Report an attach time error to the console and Xen, and cleanup * this instance by forcing immediate detach processing. * * \param xnb Per-instance xnb configuration structure. * \param err Errno describing the error. * \param fmt Printf style format and arguments */ static void xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...) { va_list ap; va_list ap_hotplug; va_start(ap, fmt); va_copy(ap_hotplug, ap); xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-error", fmt, ap_hotplug); va_end(ap_hotplug); xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-status", "error"); xenbus_dev_vfatal(xnb->dev, err, fmt, ap); va_end(ap); xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "online", "0"); xnb_detach(xnb->dev); } /*---------------------------- NewBus Entrypoints ----------------------------*/ /** * Inspect a XenBus device and claim it if is of the appropriate type. * * \param dev NewBus device object representing a candidate XenBus device. * * \return 0 for success, errno codes for failure. */ static int xnb_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vif")) { DPRINTF("Claiming device %d, %s\n", device_get_unit(dev), devclass_get_name(device_get_devclass(dev))); device_set_desc(dev, "Backend Virtual Network Device"); device_quiet(dev); return (0); } return (ENXIO); } /** * Setup sysctl variables to control various Network Back parameters. * * \param xnb Xen Net Back softc. * */ static void xnb_setup_sysctl(struct xnb_softc *xnb) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; sysctl_ctx = device_get_sysctl_ctx(xnb->dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xnb->dev); if (sysctl_tree == NULL) return; #ifdef XNB_DEBUG SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "unit_test_results", CTLTYPE_STRING | CTLFLAG_RD, xnb, 0, xnb_unit_test_main, "A", "Results of builtin unit tests"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "dump_rings", CTLTYPE_STRING | CTLFLAG_RD, xnb, 0, xnb_dump_rings, "A", "Xennet Back Rings"); #endif /* XNB_DEBUG */ } /** * Create a network device. * @param handle device handle */ int create_netdev(device_t dev) { struct ifnet *ifp; struct xnb_softc *xnb; int err = 0; xnb = device_get_softc(dev); mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF); mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF); mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF); xnb->dev = dev; ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts); ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL); err = xen_net_read_mac(dev, xnb->mac); if (err == 0) { /* Set up ifnet structure */ ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER); ifp->if_softc = xnb; if_initname(ifp, "xnb", device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xnb_ioctl; ifp->if_output = ether_output; ifp->if_start = xnb_start; #ifdef notyet ifp->if_watchdog = xnb_watchdog; #endif ifp->if_init = xnb_ifinit; ifp->if_mtu = ETHERMTU; ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1; ifp->if_hwassist = XNB_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_capenable = IFCAP_HWCSUM; ether_ifattach(ifp, xnb->mac); xnb->carrier = 0; } return err; } /** * Attach to a XenBus device that has been claimed by our probe routine. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_attach(device_t dev) { struct xnb_softc *xnb; int error; xnb_ring_type_t i; error = create_netdev(dev); if (error != 0) { xenbus_dev_fatal(dev, error, "creating netdev"); return (error); } DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); /* * Basic initialization. * After this block it is safe to call xnb_detach() * to clean up any allocated data for this instance. */ xnb = device_get_softc(dev); xnb->otherend_id = xenbus_get_otherend_id(dev); for (i=0; i < XNB_NUM_RING_TYPES; i++) { xnb->ring_configs[i].ring_pages = 1; } /* * Setup sysctl variables. */ xnb_setup_sysctl(xnb); /* Update hot-plug status to satisfy xend. */ error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev), "hotplug-status", "connected"); if (error != 0) { xnb_attach_failed(xnb, error, "writing %s/hotplug-status", xenbus_get_node(xnb->dev)); return (error); } if ((error = xnb_publish_backend_info(xnb)) != 0) { /* * If we can't publish our data, we cannot participate * in this connection, and waiting for a front-end state * change will not help the situation. */ xnb_attach_failed(xnb, error, "Publishing backend status for %s", xenbus_get_node(xnb->dev)); return error; } /* Tell the front end that we are ready to connect. */ xenbus_set_state(dev, XenbusStateInitWait); return (0); } /** * Detach from a net back device instance. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. * * \note A net back device may be detached at any time in its life-cycle, * including part way through the attach process. For this reason, * initialization order and the intialization state checks in this * routine must be carefully coupled so that attach time failures * are gracefully handled. */ static int xnb_detach(device_t dev) { struct xnb_softc *xnb; DPRINTF("\n"); xnb = device_get_softc(dev); mtx_lock(&xnb->sc_lock); while (xnb_shutdown(xnb) == EAGAIN) { msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0, "xnb_shutdown", 0); } mtx_unlock(&xnb->sc_lock); DPRINTF("\n"); mtx_destroy(&xnb->tx_lock); mtx_destroy(&xnb->rx_lock); mtx_destroy(&xnb->sc_lock); return (0); } /** * Prepare this net back device for suspension of this VM. * * \param dev NewBus device object representing this Xen net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_suspend(device_t dev) { return (0); } /** * Perform any processing required to recover from a suspended state. * * \param dev NewBus device object representing this Xen Net Back instance. * * \return 0 for success, errno codes for failure. */ static int xnb_resume(device_t dev) { return (0); } /** * Handle state changes expressed via the XenStore by our front-end peer. * * \param dev NewBus device object representing this Xen * Net Back instance. * \param frontend_state The new state of the front-end. * * \return 0 for success, errno codes for failure. */ static void xnb_frontend_changed(device_t dev, XenbusState frontend_state) { struct xnb_softc *xnb; xnb = device_get_softc(dev); DPRINTF("frontend_state=%s, xnb_state=%s\n", xenbus_strstate(frontend_state), xenbus_strstate(xenbus_get_state(xnb->dev))); switch (frontend_state) { case XenbusStateInitialising: break; case XenbusStateInitialised: case XenbusStateConnected: xnb_connect(xnb); break; case XenbusStateClosing: case XenbusStateClosed: mtx_lock(&xnb->sc_lock); xnb_shutdown(xnb); mtx_unlock(&xnb->sc_lock); if (frontend_state == XenbusStateClosed) xenbus_set_state(xnb->dev, XenbusStateClosed); break; default: xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend", frontend_state); break; } } /*---------------------------- Request Processing ----------------------------*/ /** * Interrupt handler bound to the shared ring's event channel. * Entry point for the xennet transmit path in netback * Transfers packets from the Xen ring to the host's generic networking stack * * \param arg Callback argument registerd during event channel * binding - the xnb_softc for this instance. */ static void xnb_intr(void *arg) { struct xnb_softc *xnb; struct ifnet *ifp; netif_tx_back_ring_t *txb; RING_IDX req_prod_local; xnb = (struct xnb_softc *)arg; ifp = xnb->xnb_ifp; txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring; mtx_lock(&xnb->tx_lock); do { int notify; req_prod_local = txb->sring->req_prod; xen_rmb(); for (;;) { struct mbuf *mbufc; int err; err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp, xnb->tx_gnttab); if (err || (mbufc == NULL)) break; /* Send the packet to the generic network stack */ (*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc); } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify); if (notify != 0) - notify_remote_via_irq(xnb->irq); + xen_intr_signal(xnb->xen_intr_handle); txb->sring->req_event = txb->req_cons + 1; xen_mb(); } while (txb->sring->req_prod != req_prod_local) ; mtx_unlock(&xnb->tx_lock); xnb_start(ifp); } /** * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring. * Will read exactly 0 or 1 packets from the ring; never a partial packet. * \param[out] pkt The returned packet. If there is an error building * the packet, pkt.list_len will be set to 0. * \param[in] tx_ring Pointer to the Ring that is the input to this function * \param[in] start The ring index of the first potential request * \return The number of requests consumed to build this packet */ static int xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring, RING_IDX start) { /* * Outline: * 1) Initialize pkt * 2) Read the first request of the packet * 3) Read the extras * 4) Set cdr * 5) Loop on the remainder of the packet * 6) Finalize pkt (stuff like car_size and list_len) */ int idx = start; int discard = 0; /* whether to discard the packet */ int more_data = 0; /* there are more request past the last one */ uint16_t cdr_size = 0; /* accumulated size of requests 2 through n */ xnb_pkt_initialize(pkt); /* Read the first request */ if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); pkt->size = tx->size; pkt->flags = tx->flags & ~NETTXF_more_data; more_data = tx->flags & NETTXF_more_data; pkt->list_len++; pkt->car = idx; idx++; } /* Read the extra info */ if ((pkt->flags & NETTXF_extra_info) && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_extra_info_t *ext = (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx); pkt->extra.type = ext->type; switch (pkt->extra.type) { case XEN_NETIF_EXTRA_TYPE_GSO: pkt->extra.u.gso = ext->u.gso; break; default: /* * The reference Linux netfront driver will * never set any other extra.type. So we don't * know what to do with it. Let's print an * error, then consume and discard the packet */ printf("xnb(%s:%d): Unknown extra info type %d." " Discarding packet\n", __func__, __LINE__, pkt->extra.type); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); discard = 1; break; } pkt->extra.flags = ext->flags; if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) { /* * The reference linux netfront driver never sets this * flag (nor does any other known netfront). So we * will discard the packet. */ printf("xnb(%s:%d): Request sets " "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle " "that\n", __func__, __LINE__); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); discard = 1; } idx++; } /* Set cdr. If there is not more data, cdr is invalid */ pkt->cdr = idx; /* Loop on remainder of packet */ while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) { netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx); pkt->list_len++; cdr_size += tx->size; if (tx->flags & ~NETTXF_more_data) { /* There should be no other flags set at this point */ printf("xnb(%s:%d): Request sets unknown flags %d " "after the 1st request in the packet.\n", __func__, __LINE__, tx->flags); xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start)); xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx)); } more_data = tx->flags & NETTXF_more_data; idx++; } /* Finalize packet */ if (more_data != 0) { /* The ring ran out of requests before finishing the packet */ xnb_pkt_invalidate(pkt); idx = start; /* tell caller that we consumed no requests */ } else { /* Calculate car_size */ pkt->car_size = pkt->size - cdr_size; } if (discard != 0) { xnb_pkt_invalidate(pkt); } return idx - start; } /** * Respond to all the requests that constituted pkt. Builds the responses and * writes them to the ring, but doesn't push them to the shared ring. * \param[in] pkt the packet that needs a response * \param[in] error true if there was an error handling the packet, such * as in the hypervisor copy op or mbuf allocation * \param[out] ring Responses go here */ static void xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring, int error) { /* * Outline: * 1) Respond to the first request * 2) Respond to the extra info reques * Loop through every remaining request in the packet, generating * responses that copy those requests' ids and sets the status * appropriately. */ netif_tx_request_t *tx; netif_tx_response_t *rsp; int i; uint16_t status; status = (xnb_pkt_is_valid(pkt) == 0) || error ? NETIF_RSP_ERROR : NETIF_RSP_OKAY; KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car), ("Cannot respond to ring requests out of order")); if (pkt->list_len >= 1) { uint16_t id; tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); id = tx->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = status; ring->rsp_prod_pvt++; if (pkt->flags & NETRXF_extra_info) { rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->status = NETIF_RSP_NULL; ring->rsp_prod_pvt++; } } for (i=0; i < pkt->list_len - 1; i++) { uint16_t id; tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt); id = tx->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = status; ring->rsp_prod_pvt++; } } /** * Create an mbuf chain to represent a packet. Initializes all of the headers * in the mbuf chain, but does not copy the data. The returned chain must be * free()'d when no longer needed * \param[in] pkt A packet to model the mbuf chain after * \return A newly allocated mbuf chain, possibly with clusters attached. * NULL on failure */ static struct mbuf* xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp) { /** * \todo consider using a memory pool for mbufs instead of * reallocating them for every packet */ /** \todo handle extra data */ struct mbuf *m; m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA); if (m != NULL) { m->m_pkthdr.rcvif = ifp; if (pkt->flags & NETTXF_data_validated) { /* * We lie to the host OS and always tell it that the * checksums are ok, because the packet is unlikely to * get corrupted going across domains. */ m->m_pkthdr.csum_flags = ( CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR ); m->m_pkthdr.csum_data = 0xffff; } } return m; } /** * Build a gnttab_copy table that can be used to copy data from a pkt * to an mbufc. Does not actually perform the copy. Always uses gref's on * the packet side. * \param[in] pkt pkt's associated requests form the src for * the copy operation * \param[in] mbufc mbufc's storage forms the dest for the copy operation * \param[out] gnttab Storage for the returned grant table * \param[in] txb Pointer to the backend ring structure * \param[in] otherend_id The domain ID of the other end of the copy * \return The number of gnttab entries filled */ static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb, domid_t otherend_id) { const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ int gnt_idx = 0; /* index into grant table */ RING_IDX r_idx = pkt->car; /* index into tx ring buffer */ int r_ofs = 0; /* offset of next data within tx request's data area */ int m_ofs = 0; /* offset of next data within mbuf's data area */ /* size in bytes that still needs to be represented in the table */ uint16_t size_remaining = pkt->size; while (size_remaining > 0) { const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx); const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs; const size_t req_size = r_idx == pkt->car ? pkt->car_size : txq->size; const size_t pkt_space = req_size - r_ofs; /* * space is the largest amount of data that can be copied in the * grant table's next entry */ const size_t space = MIN(pkt_space, mbuf_space); /* TODO: handle this error condition without panicking */ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); gnttab[gnt_idx].source.u.ref = txq->gref; gnttab[gnt_idx].source.domid = otherend_id; gnttab[gnt_idx].source.offset = txq->offset + r_ofs; gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].dest.offset = virt_to_offset( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].dest.domid = DOMID_SELF; gnttab[gnt_idx].len = space; gnttab[gnt_idx].flags = GNTCOPY_source_gref; gnt_idx++; r_ofs += space; m_ofs += space; size_remaining -= space; if (req_size - r_ofs <= 0) { /* Must move to the next tx request */ r_ofs = 0; r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; } if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) { /* Must move to the next mbuf */ m_ofs = 0; mbuf = mbuf->m_next; } } return gnt_idx; } /** * Check the status of the grant copy operations, and update mbufs various * non-data fields to reflect the data present. * \param[in,out] mbufc mbuf chain to update. The chain must be valid and of * the correct length, and data should already be present * \param[in] gnttab A grant table for a just completed copy op * \param[in] n_entries The number of valid entries in the grant table */ static void xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab, int n_entries) { struct mbuf *mbuf = mbufc; int i; size_t total_size = 0; for (i = 0; i < n_entries; i++) { KASSERT(gnttab[i].status == GNTST_okay, ("Some gnttab_copy entry had error status %hd\n", gnttab[i].status)); mbuf->m_len += gnttab[i].len; total_size += gnttab[i].len; if (M_TRAILINGSPACE(mbuf) <= 0) { mbuf = mbuf->m_next; } } mbufc->m_pkthdr.len = total_size; xnb_add_mbuf_cksum(mbufc); } /** * Dequeue at most one packet from the shared ring * \param[in,out] txb Netif tx ring. A packet will be removed from it, and * its private indices will be updated. But the indices * will not be pushed to the shared ring. * \param[in] ifnet Interface to which the packet will be sent * \param[in] otherend Domain ID of the other end of the ring * \param[out] mbufc The assembled mbuf chain, ready to send to the generic * networking stack * \param[in,out] gnttab Pointer to enough memory for a grant table. We make * this a function parameter so that we will take less * stack space. * \return An error code */ static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc, struct ifnet *ifnet, gnttab_copy_table gnttab) { struct xnb_pkt pkt; /* number of tx requests consumed to build the last packet */ int num_consumed; int nr_ents; *mbufc = NULL; num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons); if (num_consumed == 0) return 0; /* Nothing to receive */ /* update statistics independent of errors */ ifnet->if_ipackets++; /* * if we got here, then 1 or more requests was consumed, but the packet * is not necessarily valid. */ if (xnb_pkt_is_valid(&pkt) == 0) { /* got a garbage packet, respond and drop it */ xnb_txpkt2rsp(&pkt, txb, 1); txb->req_cons += num_consumed; DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n", num_consumed); ifnet->if_ierrors++; return EINVAL; } *mbufc = xnb_pkt2mbufc(&pkt, ifnet); if (*mbufc == NULL) { /* * Couldn't allocate mbufs. Respond and drop the packet. Do * not consume the requests */ xnb_txpkt2rsp(&pkt, txb, 1); DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n", num_consumed); ifnet->if_iqdrops++; return ENOMEM; } nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend); if (nr_ents > 0) { int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, gnttab, nr_ents); KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); xnb_update_mbufc(*mbufc, gnttab, nr_ents); } xnb_txpkt2rsp(&pkt, txb, 0); txb->req_cons += num_consumed; return 0; } /** * Create an xnb_pkt based on the contents of an mbuf chain. * \param[in] mbufc mbuf chain to transform into a packet * \param[out] pkt Storage for the newly generated xnb_pkt * \param[in] start The ring index of the first available slot in the rx * ring * \param[in] space The number of free slots in the rx ring * \retval 0 Success * \retval EINVAL mbufc was corrupt or not convertible into a pkt * \retval EAGAIN There was not enough space in the ring to queue the * packet */ static int xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt, RING_IDX start, int space) { int retval = 0; if ((mbufc == NULL) || ( (mbufc->m_flags & M_PKTHDR) == 0) || (mbufc->m_pkthdr.len == 0)) { xnb_pkt_invalidate(pkt); retval = EINVAL; } else { int slots_required; xnb_pkt_validate(pkt); pkt->flags = 0; pkt->size = mbufc->m_pkthdr.len; pkt->car = start; pkt->car_size = mbufc->m_len; if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) { pkt->flags |= NETRXF_extra_info; pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz; pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; pkt->extra.u.gso.pad = 0; pkt->extra.u.gso.features = 0; pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO; pkt->extra.flags = 0; pkt->cdr = start + 2; } else { pkt->cdr = start + 1; } if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) { pkt->flags |= (NETRXF_csum_blank | NETRXF_data_validated); } /* * Each ring response can have up to PAGE_SIZE of data. * Assume that we can defragment the mbuf chain efficiently * into responses so that each response but the last uses all * PAGE_SIZE bytes. */ pkt->list_len = (pkt->size + PAGE_SIZE - 1) / PAGE_SIZE; if (pkt->list_len > 1) { pkt->flags |= NETRXF_more_data; } slots_required = pkt->list_len + (pkt->flags & NETRXF_extra_info ? 1 : 0); if (slots_required > space) { xnb_pkt_invalidate(pkt); retval = EAGAIN; } } return retval; } /** * Build a gnttab_copy table that can be used to copy data from an mbuf chain * to the frontend's shared buffers. Does not actually perform the copy. * Always uses gref's on the other end's side. * \param[in] pkt pkt's associated responses form the dest for the copy * operatoin * \param[in] mbufc The source for the copy operation * \param[out] gnttab Storage for the returned grant table * \param[in] rxb Pointer to the backend ring structure * \param[in] otherend_id The domain ID of the other end of the copy * \return The number of gnttab entries filled */ static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc, gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb, domid_t otherend_id) { const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */ int gnt_idx = 0; /* index into grant table */ RING_IDX r_idx = pkt->car; /* index into rx ring buffer */ int r_ofs = 0; /* offset of next data within rx request's data area */ int m_ofs = 0; /* offset of next data within mbuf's data area */ /* size in bytes that still needs to be represented in the table */ uint16_t size_remaining; size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0; while (size_remaining > 0) { const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx); const size_t mbuf_space = mbuf->m_len - m_ofs; /* Xen shared pages have an implied size of PAGE_SIZE */ const size_t req_size = PAGE_SIZE; const size_t pkt_space = req_size - r_ofs; /* * space is the largest amount of data that can be copied in the * grant table's next entry */ const size_t space = MIN(pkt_space, mbuf_space); /* TODO: handle this error condition without panicing */ KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short")); gnttab[gnt_idx].dest.u.ref = rxq->gref; gnttab[gnt_idx].dest.domid = otherend_id; gnttab[gnt_idx].dest.offset = r_ofs; gnttab[gnt_idx].source.u.gmfn = virt_to_mfn( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].source.offset = virt_to_offset( mtod(mbuf, vm_offset_t) + m_ofs); gnttab[gnt_idx].source.domid = DOMID_SELF; gnttab[gnt_idx].len = space; gnttab[gnt_idx].flags = GNTCOPY_dest_gref; gnt_idx++; r_ofs += space; m_ofs += space; size_remaining -= space; if (req_size - r_ofs <= 0) { /* Must move to the next rx request */ r_ofs = 0; r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1; } if (mbuf->m_len - m_ofs <= 0) { /* Must move to the next mbuf */ m_ofs = 0; mbuf = mbuf->m_next; } } return gnt_idx; } /** * Generates responses for all the requests that constituted pkt. Builds * responses and writes them to the ring, but doesn't push the shared ring * indices. * \param[in] pkt the packet that needs a response * \param[in] gnttab The grant copy table corresponding to this packet. * Used to determine how many rsp->netif_rx_response_t's to * generate. * \param[in] n_entries Number of relevant entries in the grant table * \param[out] ring Responses go here * \return The number of RX requests that were consumed to generate * the responses */ static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab, int n_entries, netif_rx_back_ring_t *ring) { /* * This code makes the following assumptions: * * All entries in gnttab set GNTCOPY_dest_gref * * The entries in gnttab are grouped by their grefs: any two * entries with the same gref must be adjacent */ int error = 0; int gnt_idx, i; int n_responses = 0; grant_ref_t last_gref = GRANT_REF_INVALID; RING_IDX r_idx; KASSERT(gnttab != NULL, ("Received a null granttable copy")); /* * In the event of an error, we only need to send one response to the * netfront. In that case, we musn't write any data to the responses * after the one we send. So we must loop all the way through gnttab * looking for errors before we generate any responses * * Since we're looping through the grant table anyway, we'll count the * number of different gref's in it, which will tell us how many * responses to generate */ for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) { int16_t status = gnttab[gnt_idx].status; if (status != GNTST_okay) { DPRINTF( "Got error %d for hypervisor gnttab_copy status\n", status); error = 1; break; } if (gnttab[gnt_idx].dest.u.ref != last_gref) { n_responses++; last_gref = gnttab[gnt_idx].dest.u.ref; } } if (error != 0) { uint16_t id; netif_rx_response_t *rsp; id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id; rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt); rsp->id = id; rsp->status = NETIF_RSP_ERROR; n_responses = 1; } else { gnt_idx = 0; const int has_extra = pkt->flags & NETRXF_extra_info; if (has_extra != 0) n_responses++; for (i = 0; i < n_responses; i++) { netif_rx_request_t rxq; netif_rx_response_t *rsp; r_idx = ring->rsp_prod_pvt + i; /* * We copy the structure of rxq instead of making a * pointer because it shares the same memory as rsp. */ rxq = *(RING_GET_REQUEST(ring, r_idx)); rsp = RING_GET_RESPONSE(ring, r_idx); if (has_extra && (i == 1)) { netif_extra_info_t *ext = (netif_extra_info_t*)rsp; ext->type = XEN_NETIF_EXTRA_TYPE_GSO; ext->flags = 0; ext->u.gso.size = pkt->extra.u.gso.size; ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; ext->u.gso.pad = 0; ext->u.gso.features = 0; } else { rsp->id = rxq.id; rsp->status = GNTST_okay; rsp->offset = 0; rsp->flags = 0; if (i < pkt->list_len - 1) rsp->flags |= NETRXF_more_data; if ((i == 0) && has_extra) rsp->flags |= NETRXF_extra_info; if ((i == 0) && (pkt->flags & NETRXF_data_validated)) { rsp->flags |= NETRXF_data_validated; rsp->flags |= NETRXF_csum_blank; } rsp->status = 0; for (; gnttab[gnt_idx].dest.u.ref == rxq.gref; gnt_idx++) { rsp->status += gnttab[gnt_idx].len; } } } } ring->req_cons += n_responses; ring->rsp_prod_pvt += n_responses; return n_responses; } /** * Add IP, TCP, and/or UDP checksums to every mbuf in a chain. The first mbuf * in the chain must start with a struct ether_header. * * XXX This function will perform incorrectly on UDP packets that are split up * into multiple ethernet frames. */ static void xnb_add_mbuf_cksum(struct mbuf *mbufc) { struct ether_header *eh; struct ip *iph; uint16_t ether_type; eh = mtod(mbufc, struct ether_header*); ether_type = ntohs(eh->ether_type); if (ether_type != ETHERTYPE_IP) { /* Nothing to calculate */ return; } iph = (struct ip*)(eh + 1); if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { iph->ip_sum = 0; iph->ip_sum = in_cksum_hdr(iph); } switch (iph->ip_p) { case IPPROTO_TCP: if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip); struct tcphdr *th = (struct tcphdr*)(iph + 1); th->th_sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen)); th->th_sum = in_cksum_skip(mbufc, sizeof(struct ether_header) + ntohs(iph->ip_len), sizeof(struct ether_header) + (iph->ip_hl << 2)); } break; case IPPROTO_UDP: if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) { size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip); struct udphdr *uh = (struct udphdr*)(iph + 1); uh->uh_sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen)); uh->uh_sum = in_cksum_skip(mbufc, sizeof(struct ether_header) + ntohs(iph->ip_len), sizeof(struct ether_header) + (iph->ip_hl << 2)); } break; default: break; } } static void xnb_stop(struct xnb_softc *xnb) { struct ifnet *ifp; mtx_assert(&xnb->sc_lock, MA_OWNED); ifp = xnb->xnb_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } static int xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct xnb_softc *xnb = ifp->if_softc; #ifdef INET struct ifreq *ifr = (struct ifreq*) data; struct ifaddr *ifa = (struct ifaddr*)data; #endif int error = 0; switch (cmd) { case SIOCSIFFLAGS: mtx_lock(&xnb->sc_lock); if (ifp->if_flags & IFF_UP) { xnb_ifinit_locked(xnb); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { xnb_stop(xnb); } } /* * Note: netfront sets a variable named xn_if_flags * here, but that variable is never read */ mtx_unlock(&xnb->sc_lock); break; case SIOCSIFADDR: case SIOCGIFADDR: #ifdef INET mtx_lock(&xnb->sc_lock); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } arp_ifinit(ifp, ifa); mtx_unlock(&xnb->sc_lock); } else { mtx_unlock(&xnb->sc_lock); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFCAP: mtx_lock(&xnb->sc_lock); if (ifr->ifr_reqcap & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= XNB_CSUM_FEATURES; } else { ifp->if_capenable &= ~(IFCAP_TXCSUM); ifp->if_hwassist &= ~(XNB_CSUM_FEATURES); } if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) { ifp->if_capenable |= IFCAP_RXCSUM; } else { ifp->if_capenable &= ~(IFCAP_RXCSUM); } /* * TODO enable TSO4 and LRO once we no longer need * to calculate checksums in software */ #if 0 if (ifr->if_reqcap |= IFCAP_TSO4) { if (IFCAP_TXCSUM & ifp->if_capenable) { printf("xnb: Xen netif requires that " "TXCSUM be enabled in order " "to use TSO4\n"); error = EINVAL; } else { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } } else { ifp->if_capenable &= ~(IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TSO); } if (ifr->ifreqcap |= IFCAP_LRO) { ifp->if_capenable |= IFCAP_LRO; } else { ifp->if_capenable &= ~(IFCAP_LRO); } #endif mtx_unlock(&xnb->sc_lock); break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xnb_ifinit(xnb); break; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void xnb_start_locked(struct ifnet *ifp) { netif_rx_back_ring_t *rxb; struct xnb_softc *xnb; struct mbuf *mbufc; RING_IDX req_prod_local; xnb = ifp->if_softc; rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring; if (!xnb->carrier) return; do { int out_of_space = 0; int notify; req_prod_local = rxb->sring->req_prod; xen_rmb(); for (;;) { int error; IF_DEQUEUE(&ifp->if_snd, mbufc); if (mbufc == NULL) break; error = xnb_send(rxb, xnb->otherend_id, mbufc, xnb->rx_gnttab); switch (error) { case EAGAIN: /* * Insufficient space in the ring. * Requeue pkt and send when space is * available. */ IF_PREPEND(&ifp->if_snd, mbufc); /* * Perhaps the frontend missed an IRQ * and went to sleep. Notify it to wake * it up. */ out_of_space = 1; break; case EINVAL: /* OS gave a corrupt packet. Drop it.*/ ifp->if_oerrors++; /* FALLTHROUGH */ default: /* Send succeeded, or packet had error. * Free the packet */ ifp->if_opackets++; if (mbufc) m_freem(mbufc); break; } if (out_of_space != 0) break; } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify); if ((notify != 0) || (out_of_space != 0)) - notify_remote_via_irq(xnb->irq); + xen_intr_signal(xnb->xen_intr_handle); rxb->sring->req_event = req_prod_local + 1; xen_mb(); } while (rxb->sring->req_prod != req_prod_local) ; } /** * Sends one packet to the ring. Blocks until the packet is on the ring * \param[in] mbufc Contains one packet to send. Caller must free * \param[in,out] rxb The packet will be pushed onto this ring, but the * otherend will not be notified. * \param[in] otherend The domain ID of the other end of the connection * \retval EAGAIN The ring did not have enough space for the packet. * The ring has not been modified * \param[in,out] gnttab Pointer to enough memory for a grant table. We make * this a function parameter so that we will take less * stack space. * \retval EINVAL mbufc was corrupt or not convertible into a pkt */ static int xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc, gnttab_copy_table gnttab) { struct xnb_pkt pkt; int error, n_entries, n_reqs; RING_IDX space; space = ring->sring->req_prod - ring->req_cons; error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space); if (error != 0) return error; n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend); if (n_entries != 0) { int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, gnttab, n_entries); KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n", hv_ret)); } n_reqs = xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring); return 0; } static void xnb_start(struct ifnet *ifp) { struct xnb_softc *xnb; xnb = ifp->if_softc; mtx_lock(&xnb->rx_lock); xnb_start_locked(ifp); mtx_unlock(&xnb->rx_lock); } /* equivalent of network_open() in Linux */ static void xnb_ifinit_locked(struct xnb_softc *xnb) { struct ifnet *ifp; ifp = xnb->xnb_ifp; mtx_assert(&xnb->sc_lock, MA_OWNED); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; xnb_stop(xnb); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } static void xnb_ifinit(void *xsc) { struct xnb_softc *xnb = xsc; mtx_lock(&xnb->sc_lock); xnb_ifinit_locked(xnb); mtx_unlock(&xnb->sc_lock); } /** * Read the 'mac' node at the given device's node in the store, and parse that * as colon-separated octets, placing result the given mac array. mac must be * a preallocated array of length ETHER_ADDR_LEN ETH_ALEN (as declared in * net/ethernet.h). * Return 0 on success, or errno on error. */ static int xen_net_read_mac(device_t dev, uint8_t mac[]) { char *s, *e, *macstr; const char *path; int error = 0; int i; path = xenbus_get_node(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); if (error != 0) { xenbus_dev_fatal(dev, error, "parsing %s/mac", path); } else { s = macstr; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac[i] = strtoul(s, &e, 16); if (s == e || (e[0] != ':' && e[0] != 0)) { error = ENOENT; break; } s = &e[1]; } free(macstr, M_XENBUS); } return error; } /** * Callback used by the generic networking code to tell us when our carrier * state has changed. Since we don't have a physical carrier, we don't care */ static int xnb_ifmedia_upd(struct ifnet *ifp) { return (0); } /** * Callback used by the generic networking code to ask us what our carrier * state is. Since we don't have a physical carrier, this is very simple */ static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xnb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xnb_probe), DEVMETHOD(device_attach, xnb_attach), DEVMETHOD(device_detach, xnb_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xnb_suspend), DEVMETHOD(device_resume, xnb_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed), { 0, 0 } }; static driver_t xnb_driver = { "xnb", xnb_methods, sizeof(struct xnb_softc), }; devclass_t xnb_devclass; DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, xnb_devclass, 0, 0); /*-------------------------- Unit Tests -------------------------------------*/ #ifdef XNB_DEBUG #include "netback_unit_tests.c" #endif diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index 68b3bf9be826..f9c72e6fc49a 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -1,2257 +1,2254 @@ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 700000 #include #include #endif #include #include #include /* for DELAY */ #include #include #include #include #include #include #include -#include -#include -#include +#include #include #include -#include #include #include #include #include +#include + #include #include "xenbus_if.h" /* Features supported by all backends. TSO and LRO can be negotiated */ #define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) #if __FreeBSD_version >= 700000 /* * Should the driver do LRO on the RX end * this can be toggled on the fly, but the * interface must be reset (down/up) for it * to take effect. */ static int xn_enable_lro = 1; TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); #else #define IFCAP_TSO4 0 #define CSUM_TSO 0 #endif #ifdef CONFIG_XEN static int MODPARM_rx_copy = 0; module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); static int MODPARM_rx_flip = 0; module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); #else static const int MODPARM_rx_copy = 1; static const int MODPARM_rx_flip = 0; #endif /** * \brief The maximum allowed data fragments in a single transmit * request. * * This limit is imposed by the backend driver. We assume here that * we are dealing with a Linux driver domain and have set our limit * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) #define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) #define RX_COPY_THRESHOLD 256 #define net_ratelimit() 0 struct netfront_info; struct netfront_rx_info; static void xn_txeof(struct netfront_info *); static void xn_rxeof(struct netfront_info *); static void network_alloc_rx_buffers(struct netfront_info *); static void xn_tick_locked(struct netfront_info *); static void xn_tick(void *); static void xn_intr(void *); static inline int xn_count_frags(struct mbuf *m); static int xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head); static void xn_start_locked(struct ifnet *); static void xn_start(struct ifnet *); static int xn_ioctl(struct ifnet *, u_long, caddr_t); static void xn_ifinit_locked(struct netfront_info *); static void xn_ifinit(void *); static void xn_stop(struct netfront_info *); static void xn_query_features(struct netfront_info *np); static int xn_configure_features(struct netfront_info *np); #ifdef notyet static void xn_watchdog(struct ifnet *); #endif static void show_device(struct netfront_info *sc); #ifdef notyet static void netfront_closing(device_t dev); #endif static void netif_free(struct netfront_info *info); static int netfront_detach(device_t dev); static int talk_to_backend(device_t dev, struct netfront_info *info); static int create_netdev(device_t dev); static void netif_disconnect_backend(struct netfront_info *info); static int setup_device(device_t dev, struct netfront_info *info); static void free_ring(int *ref, void *ring_ptr_ref); static int xn_ifmedia_upd(struct ifnet *ifp); static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); /* Xenolinux helper functions */ int network_connect(struct netfront_info *); static void xn_free_rx_ring(struct netfront_info *); static void xn_free_tx_ring(struct netfront_info *); static int xennet_get_responses(struct netfront_info *np, struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, struct mbuf **list, int *pages_flipped_p); #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) #define INVALID_P2M_ENTRY (~0UL) /* * Mbuf pointers. We need these to keep track of the virtual addresses * of our mbuf chains since we can only convert from virtual to physical, * not the other way around. The size must track the free index arrays. */ struct xn_chain_data { struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1]; int xn_tx_chain_cnt; struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1]; }; struct net_device_stats { u_long rx_packets; /* total packets received */ u_long tx_packets; /* total packets transmitted */ u_long rx_bytes; /* total bytes received */ u_long tx_bytes; /* total bytes transmitted */ u_long rx_errors; /* bad packets received */ u_long tx_errors; /* packet transmit problems */ u_long rx_dropped; /* no space in linux buffers */ u_long tx_dropped; /* no space available in linux */ u_long multicast; /* multicast packets received */ u_long collisions; /* detailed rx_errors: */ u_long rx_length_errors; u_long rx_over_errors; /* receiver ring buff overflow */ u_long rx_crc_errors; /* recved pkt with crc error */ u_long rx_frame_errors; /* recv'd frame alignment error */ u_long rx_fifo_errors; /* recv'r fifo overrun */ u_long rx_missed_errors; /* receiver missed packet */ /* detailed tx_errors */ u_long tx_aborted_errors; u_long tx_carrier_errors; u_long tx_fifo_errors; u_long tx_heartbeat_errors; u_long tx_window_errors; /* for cslip etc */ u_long rx_compressed; u_long tx_compressed; }; struct netfront_info { struct ifnet *xn_ifp; #if __FreeBSD_version >= 700000 struct lro_ctrl xn_lro; #endif struct net_device_stats stats; u_int tx_full; netif_tx_front_ring_t tx; netif_rx_front_ring_t rx; struct mtx tx_lock; struct mtx rx_lock; struct mtx sc_lock; - u_int handle; - u_int irq; + xen_intr_handle_t xen_intr_handle; u_int copying_receiver; u_int carrier; u_int maxfrags; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 32 #define RX_MAX_TARGET NET_RX_RING_SIZE int rx_min_target; int rx_max_target; int rx_target; grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; device_t xbdev; int tx_ring_ref; int rx_ring_ref; uint8_t mac[ETHER_ADDR_LEN]; struct xn_chain_data xn_cdata; /* mbufs */ struct mbuf_head xn_rx_batch; /* head of the batch queue */ int xn_if_flags; struct callout xn_stat_ch; u_long rx_pfn_array[NET_RX_RING_SIZE]; multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; mmu_update_t rx_mmu[NET_RX_RING_SIZE]; struct ifmedia sc_media; }; #define rx_mbufs xn_cdata.xn_rx_chain #define tx_mbufs xn_cdata.xn_tx_chain #define XN_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \ mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF); \ mtx_init(&(_sc)->sc_lock, #_name"_sc", "netfront softc lock", MTX_DEF) #define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock) #define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock) #define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock) #define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock) #define XN_LOCK(_sc) mtx_lock(&(_sc)->sc_lock); #define XN_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock); #define XN_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED); #define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED); #define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED); #define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_lock); \ mtx_destroy(&(_sc)->tx_lock); \ mtx_destroy(&(_sc)->sc_lock); struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; #define netfront_carrier_on(netif) ((netif)->carrier = 1) #define netfront_carrier_off(netif) ((netif)->carrier = 0) #define netfront_carrier_ok(netif) ((netif)->carrier) /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ static inline void add_id_to_freelist(struct mbuf **list, uintptr_t id) { KASSERT(id != 0, ("%s: the head item (0) must always be free.", __func__)); list[id] = list[0]; list[0] = (struct mbuf *)id; } static inline unsigned short get_id_from_freelist(struct mbuf **list) { uintptr_t id; id = (uintptr_t)list[0]; KASSERT(id != 0, ("%s: the head item (0) must always remain free.", __func__)); list[0] = list[id]; return (id); } static inline int xennet_rxidx(RING_IDX idx) { return idx & (NET_RX_RING_SIZE - 1); } static inline struct mbuf * xennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri) { int i = xennet_rxidx(ri); struct mbuf *m; m = np->rx_mbufs[i]; np->rx_mbufs[i] = NULL; return (m); } static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri) { int i = xennet_rxidx(ri); grant_ref_t ref = np->grant_rx_ref[i]; KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n")); np->grant_rx_ref[i] = GRANT_REF_INVALID; return ref; } #define IPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #ifdef INVARIANTS #define WPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #else #define WPRINTK(fmt, args...) #endif #ifdef DEBUG #define DPRINTK(fmt, args...) \ printf("[XEN] %s: " fmt, __func__, ##args) #else #define DPRINTK(fmt, args...) #endif /** * Read the 'mac' node at the given device's node in the store, and parse that * as colon-separated octets, placing result the given mac array. mac must be * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). * Return 0 on success, or errno on error. */ static int xen_net_read_mac(device_t dev, uint8_t mac[]) { int error, i; char *s, *e, *macstr; const char *path; path = xenbus_get_node(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); if (error == ENOENT) { /* * Deal with missing mac XenStore nodes on devices with * HVM emulation (the 'ioemu' configuration attribute) * enabled. * * The HVM emulator may execute in a stub device model * domain which lacks the permission, only given to Dom0, * to update the guest's XenStore tree. For this reason, * the HVM emulator doesn't even attempt to write the * front-side mac node, even when operating in Dom0. * However, there should always be a mac listed in the * backend tree. Fallback to this version if our query * of the front side XenStore location doesn't find * anything. */ path = xenbus_get_otherend_path(dev); error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr); } if (error != 0) { xenbus_dev_fatal(dev, error, "parsing %s/mac", path); return (error); } s = macstr; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac[i] = strtoul(s, &e, 16); if (s == e || (e[0] != ':' && e[0] != 0)) { free(macstr, M_XENBUS); return (ENOENT); } s = &e[1]; } free(macstr, M_XENBUS); return (0); } /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. Switch to * Connected state. */ static int netfront_probe(device_t dev) { if (!strcmp(xenbus_get_type(dev), "vif")) { device_set_desc(dev, "Virtual Network Interface"); return (0); } return (ENXIO); } static int netfront_attach(device_t dev) { int err; err = create_netdev(dev); if (err) { xenbus_dev_fatal(dev, err, "creating netdev"); return (err); } #if __FreeBSD_version >= 700000 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW, &xn_enable_lro, 0, "Large Receive Offload"); #endif return (0); } static int netfront_suspend(device_t dev) { struct netfront_info *info = device_get_softc(dev); XN_RX_LOCK(info); XN_TX_LOCK(info); netfront_carrier_off(info); XN_TX_UNLOCK(info); XN_RX_UNLOCK(info); return (0); } /** * We are reconnecting to the backend, due to a suspend/resume, or a backend * driver restart. We tear down our netif structure and recreate it, but * leave the device-layer structures intact so that this is transparent to the * rest of the kernel. */ static int netfront_resume(device_t dev) { struct netfront_info *info = device_get_softc(dev); netif_disconnect_backend(info); return (0); } /* Common code used when first setting up, and when resuming. */ static int talk_to_backend(device_t dev, struct netfront_info *info) { const char *message; struct xs_transaction xst; const char *node = xenbus_get_node(dev); int err; err = xen_net_read_mac(dev, info->mac); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", node); goto out; } /* Create shared ring, alloc event channel. */ err = setup_device(dev, info); if (err) goto out; again: err = xs_transaction_start(&xst); if (err) { xenbus_dev_fatal(dev, err, "starting transaction"); goto destroy_ring; } err = xs_printf(xst, node, "tx-ring-ref","%u", info->tx_ring_ref); if (err) { message = "writing tx ring-ref"; goto abort_transaction; } err = xs_printf(xst, node, "rx-ring-ref","%u", info->rx_ring_ref); if (err) { message = "writing rx ring-ref"; goto abort_transaction; } err = xs_printf(xst, node, - "event-channel", "%u", irq_to_evtchn_port(info->irq)); + "event-channel", "%u", + xen_intr_port(info->xen_intr_handle)); if (err) { message = "writing event-channel"; goto abort_transaction; } err = xs_printf(xst, node, "request-rx-copy", "%u", info->copying_receiver); if (err) { message = "writing request-rx-copy"; goto abort_transaction; } err = xs_printf(xst, node, "feature-rx-notify", "%d", 1); if (err) { message = "writing feature-rx-notify"; goto abort_transaction; } err = xs_printf(xst, node, "feature-sg", "%d", 1); if (err) { message = "writing feature-sg"; goto abort_transaction; } #if __FreeBSD_version >= 700000 err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1); if (err) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } #endif err = xs_transaction_end(xst, 0); if (err) { if (err == EAGAIN) goto again; xenbus_dev_fatal(dev, err, "completing transaction"); goto destroy_ring; } return 0; abort_transaction: xs_transaction_end(xst, 1); xenbus_dev_fatal(dev, err, "%s", message); destroy_ring: netif_free(info); out: return err; } static int setup_device(device_t dev, struct netfront_info *info) { netif_tx_sring_t *txs; netif_rx_sring_t *rxs; int error; struct ifnet *ifp; ifp = info->xn_ifp; info->tx_ring_ref = GRANT_REF_INVALID; info->rx_ring_ref = GRANT_REF_INVALID; info->rx.sring = NULL; info->tx.sring = NULL; - info->irq = 0; txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (!txs) { error = ENOMEM; xenbus_dev_fatal(dev, error, "allocating tx ring page"); goto fail; } SHARED_RING_INIT(txs); FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref); if (error) goto fail; rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (!rxs) { error = ENOMEM; xenbus_dev_fatal(dev, error, "allocating rx ring page"); goto fail; } SHARED_RING_INIT(rxs); FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref); if (error) goto fail; - error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), - "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); + error = xen_intr_alloc_and_bind_local_port(dev, + xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info, + INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle); if (error) { xenbus_dev_fatal(dev, error, - "bind_evtchn_to_irqhandler failed"); + "xen_intr_alloc_and_bind_local_port failed"); goto fail; } show_device(info); return (0); fail: netif_free(info); return (error); } #ifdef INET /** * If this interface has an ipv4 address, send an arp for it. This * helps to get the network going again after migrating hosts. */ static void netfront_send_fake_arp(device_t dev, struct netfront_info *info) { struct ifnet *ifp; struct ifaddr *ifa; ifp = info->xn_ifp; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) { arp_ifinit(ifp, ifa); } } } #endif /** * Callback received when the backend's state changes. */ static void netfront_backend_changed(device_t dev, XenbusState newstate) { struct netfront_info *sc = device_get_softc(dev); DPRINTK("newstate=%d\n", newstate); switch (newstate) { case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateConnected: case XenbusStateUnknown: case XenbusStateClosed: case XenbusStateReconfigured: case XenbusStateReconfiguring: break; case XenbusStateInitWait: if (xenbus_get_state(dev) != XenbusStateInitialising) break; if (network_connect(sc) != 0) break; xenbus_set_state(dev, XenbusStateConnected); #ifdef INET netfront_send_fake_arp(dev, sc); #endif break; case XenbusStateClosing: xenbus_set_state(dev, XenbusStateClosed); break; } } static void xn_free_rx_ring(struct netfront_info *sc) { #if 0 int i; for (i = 0; i < NET_RX_RING_SIZE; i++) { if (sc->xn_cdata.rx_mbufs[i] != NULL) { m_freem(sc->rx_mbufs[i]); sc->rx_mbufs[i] = NULL; } } sc->rx.rsp_cons = 0; sc->xn_rx_if->req_prod = 0; sc->xn_rx_if->event = sc->rx.rsp_cons ; #endif } static void xn_free_tx_ring(struct netfront_info *sc) { #if 0 int i; for (i = 0; i < NET_TX_RING_SIZE; i++) { if (sc->tx_mbufs[i] != NULL) { m_freem(sc->tx_mbufs[i]); sc->xn_cdata.xn_tx_chain[i] = NULL; } } return; #endif } /** * \brief Verify that there is sufficient space in the Tx ring * buffer for a maximally sized request to be enqueued. * * A transmit request requires a transmit descriptor for each packet * fragment, plus up to 2 entries for "options" (e.g. TSO). */ static inline int xn_tx_slot_available(struct netfront_info *np) { return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2)); } static void netif_release_tx_bufs(struct netfront_info *np) { int i; for (i = 1; i <= NET_TX_RING_SIZE; i++) { struct mbuf *m; m = np->tx_mbufs[i]; /* * We assume that no kernel addresses are * less than NET_TX_RING_SIZE. Any entry * in the table that is below this number * must be an index from free-list tracking. */ if (((uintptr_t)m) <= NET_TX_RING_SIZE) continue; gnttab_end_foreign_access_ref(np->grant_tx_ref[i]); gnttab_release_grant_reference(&np->gref_tx_head, np->grant_tx_ref[i]); np->grant_tx_ref[i] = GRANT_REF_INVALID; add_id_to_freelist(np->tx_mbufs, i); np->xn_cdata.xn_tx_chain_cnt--; if (np->xn_cdata.xn_tx_chain_cnt < 0) { panic("%s: tx_chain_cnt must be >= 0", __func__); } m_free(m); } } static void network_alloc_rx_buffers(struct netfront_info *sc) { int otherend_id = xenbus_get_otherend_id(sc->xbdev); unsigned short id; struct mbuf *m_new; int i, batch_target, notify; RING_IDX req_prod; struct xen_memory_reservation reservation; grant_ref_t ref; int nr_flips; netif_rx_request_t *req; vm_offset_t vaddr; u_long pfn; req_prod = sc->rx.req_prod_pvt; - if (unlikely(sc->carrier == 0)) + if (__predict_false(sc->carrier == 0)) return; /* * Allocate mbufs greedily, even though we batch updates to the * receive ring. This creates a less bursty demand on the memory * allocator, and so should reduce the chance of failed allocation * requests both for ourself and for other kernel subsystems. * * Here we attempt to maintain rx_target buffers in flight, counting * buffers that we have yet to process in the receive ring. */ batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { printf("%s: MGETHDR failed\n", __func__); goto no_mbuf; } m_cljget(m_new, M_NOWAIT, MJUMPAGESIZE); if ((m_new->m_flags & M_EXT) == 0) { printf("%s: m_cljget failed\n", __func__); m_freem(m_new); no_mbuf: if (i != 0) goto refill; /* * XXX set timer */ break; } m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; /* queue the mbufs allocated */ mbufq_tail(&sc->xn_rx_batch, m_new); } /* * If we've allocated at least half of our target number of entries, * submit them to the backend - we have enough to make the overhead * of submission worthwhile. Otherwise wait for more mbufs and * request entries to become available. */ if (i < (sc->rx_target/2)) { if (req_prod >sc->rx.sring->req_prod) goto push; return; } /* * Double floating fill target if we risked having the backend * run out of empty buffers for receive traffic. We define "running * low" as having less than a fourth of our target buffers free * at the time we refilled the queue. */ if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) { sc->rx_target *= 2; if (sc->rx_target > sc->rx_max_target) sc->rx_target = sc->rx_max_target; } refill: for (nr_flips = i = 0; ; i++) { if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) break; m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); id = xennet_rxidx(req_prod + i); KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain")); sc->rx_mbufs[id] = m_new; ref = gnttab_claim_grant_reference(&sc->gref_rx_head); KASSERT(ref != GNTTAB_LIST_END, ("reserved grant references exhuasted")); sc->grant_rx_ref[id] = ref; vaddr = mtod(m_new, vm_offset_t); pfn = vtophys(vaddr) >> PAGE_SHIFT; req = RING_GET_REQUEST(&sc->rx, req_prod + i); if (sc->copying_receiver == 0) { gnttab_grant_foreign_transfer_ref(ref, otherend_id, pfn); sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn); if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Remove this page before passing * back to Xen. */ set_phys_to_machine(pfn, INVALID_P2M_ENTRY); MULTI_update_va_mapping(&sc->rx_mcl[i], vaddr, 0, 0); } nr_flips++; } else { gnttab_grant_foreign_access_ref(ref, otherend_id, PFNTOMFN(pfn), 0); } req->id = id; req->gref = ref; sc->rx_pfn_array[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; } KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed")); /* * We may have allocated buffers which have entries outstanding * in the page * update queue -- make sure we flush those first! */ PT_UPDATES_FLUSH(); if (nr_flips != 0) { #ifdef notyet /* Tell the ballon driver what is going on. */ balloon_update_driver_allowance(i); #endif set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array); reservation.nr_extents = i; reservation.extent_order = 0; reservation.address_bits = 0; reservation.domid = DOMID_SELF; if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* After all PTEs have been zapped, flush the TLB. */ sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; /* Give away a batch of pages. */ sc->rx_mcl[i].op = __HYPERVISOR_memory_op; sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation; sc->rx_mcl[i].args[1] = (u_long)&reservation; /* Zap PTEs and give away pages in one big multicall. */ (void)HYPERVISOR_multicall(sc->rx_mcl, i+1); - if (unlikely(sc->rx_mcl[i].result != i || + if (__predict_false(sc->rx_mcl[i].result != i || HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != i)) panic("%s: unable to reduce memory " "reservation\n", __func__); } } else { wmb(); } /* Above is a suitable barrier to ensure backend will see requests. */ sc->rx.req_prod_pvt = req_prod + i; push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); } static void xn_rxeof(struct netfront_info *np) { struct ifnet *ifp; #if __FreeBSD_version >= 700000 struct lro_ctrl *lro = &np->xn_lro; struct lro_entry *queued; #endif struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; multicall_entry_t *mcl; struct mbuf *m; struct mbuf_head rxq, errq; int err, pages_flipped = 0, work_to_do; do { XN_RX_LOCK_ASSERT(np); if (!netfront_carrier_ok(np)) return; mbufq_init(&errq); mbufq_init(&rxq); ifp = np->xn_ifp; rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ i = np->rx.rsp_cons; while ((i != rp)) { memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); memset(extras, 0, sizeof(rinfo.extras)); m = NULL; err = xennet_get_responses(np, &rinfo, rp, &i, &m, &pages_flipped); - if (unlikely(err)) { + if (__predict_false(err)) { if (m) mbufq_tail(&errq, m); np->stats.rx_errors++; continue; } m->m_pkthdr.rcvif = ifp; if ( rx->flags & NETRXF_data_validated ) { /* Tell the stack the checksums are okay */ /* * XXX this isn't necessarily the case - need to add * check */ m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } np->stats.rx_packets++; np->stats.rx_bytes += m->m_pkthdr.len; mbufq_tail(&rxq, m); np->rx.rsp_cons = i; } if (pages_flipped) { /* Some pages are no longer absent... */ #ifdef notyet balloon_update_driver_allowance(-pages_flipped); #endif /* Do all the remapping work, and M->P updates, in one big * hypercall. */ if (!!xen_feature(XENFEAT_auto_translated_physmap)) { mcl = np->rx_mcl + pages_flipped; mcl->op = __HYPERVISOR_mmu_update; mcl->args[0] = (u_long)np->rx_mmu; mcl->args[1] = pages_flipped; mcl->args[2] = 0; mcl->args[3] = DOMID_SELF; (void)HYPERVISOR_multicall(np->rx_mcl, pages_flipped + 1); } } while ((m = mbufq_dequeue(&errq))) m_freem(m); /* * Process all the mbufs after the remapping is complete. * Break the mbuf chain first though. */ while ((m = mbufq_dequeue(&rxq)) != NULL) { ifp->if_ipackets++; /* * Do we really need to drop the rx lock? */ XN_RX_UNLOCK(np); #if __FreeBSD_version >= 700000 /* Use LRO if possible */ if ((ifp->if_capenable & IFCAP_LRO) == 0 || lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { /* * If LRO fails, pass up to the stack * directly. */ (*ifp->if_input)(ifp, m); } #else (*ifp->if_input)(ifp, m); #endif XN_RX_LOCK(np); } np->rx.rsp_cons = i; #if __FreeBSD_version >= 700000 /* * Flush any outstanding LRO work */ while (!SLIST_EMPTY(&lro->lro_active)) { queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif #if 0 /* If we get a callback with very few responses, reduce fill target. */ /* NB. Note exponential increase, linear decrease. */ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) np->rx_target = np->rx_min_target; #endif network_alloc_rx_buffers(np); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do); } while (work_to_do); } static void xn_txeof(struct netfront_info *np) { RING_IDX i, prod; unsigned short id; struct ifnet *ifp; netif_tx_response_t *txr; struct mbuf *m; XN_TX_LOCK_ASSERT(np); if (!netfront_carrier_ok(np)) return; ifp = np->xn_ifp; do { prod = np->tx.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ for (i = np->tx.rsp_cons; i != prod; i++) { txr = RING_GET_RESPONSE(&np->tx, i); if (txr->status == NETIF_RSP_NULL) continue; if (txr->status != NETIF_RSP_OKAY) { printf("%s: WARNING: response is %d!\n", __func__, txr->status); } id = txr->id; m = np->tx_mbufs[id]; KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); KASSERT((uintptr_t)m > NET_TX_RING_SIZE, ("mbuf already on the free list, but we're " "trying to free it again!")); M_ASSERTVALID(m); /* * Increment packet count if this is the last * mbuf of the chain. */ if (!m->m_next) ifp->if_opackets++; - if (unlikely(gnttab_query_foreign_access( + if (__predict_false(gnttab_query_foreign_access( np->grant_tx_ref[id]) != 0)) { panic("%s: grant id %u still in use by the " "backend", __func__, id); } gnttab_end_foreign_access_ref( np->grant_tx_ref[id]); gnttab_release_grant_reference( &np->gref_tx_head, np->grant_tx_ref[id]); np->grant_tx_ref[id] = GRANT_REF_INVALID; np->tx_mbufs[id] = NULL; add_id_to_freelist(np->tx_mbufs, id); np->xn_cdata.xn_tx_chain_cnt--; m_free(m); /* Only mark the queue active if we've freed up at least one slot to try */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } np->tx.rsp_cons = prod; /* * Set a new event, then check for race with update of * tx_cons. Note that it is essential to schedule a * callback, no matter how few buffers are pending. Even if * there is space in the transmit ring, higher layers may * be blocked because too much data is outstanding: in such * cases notification from Xen is likely to be the only kick * that we'll get. */ np->tx.sring->rsp_event = prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; mb(); } while (prod != np->tx.sring->rsp_prod); if (np->tx_full && ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { np->tx_full = 0; #if 0 if (np->user_state == UST_OPEN) netif_wake_queue(dev); #endif } } static void xn_intr(void *xsc) { struct netfront_info *np = xsc; struct ifnet *ifp = np->xn_ifp; #if 0 if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && likely(netfront_carrier_ok(np)) && ifp->if_drv_flags & IFF_DRV_RUNNING)) return; #endif if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) { XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); } XN_RX_LOCK(np); xn_rxeof(np); XN_RX_UNLOCK(np); if (ifp->if_drv_flags & IFF_DRV_RUNNING && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) xn_start(ifp); } static void xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, grant_ref_t ref) { int new = xennet_rxidx(np->rx.req_prod_pvt); KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL")); np->rx_mbufs[new] = m; np->grant_rx_ref[new] = ref; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; np->rx.req_prod_pvt++; } static int xennet_get_extras(struct netfront_info *np, struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons) { struct netif_extra_info *extra; int err = 0; do { struct mbuf *m; grant_ref_t ref; - if (unlikely(*cons + 1 == rp)) { + if (__predict_false(*cons + 1 == rp)) { #if 0 if (net_ratelimit()) WPRINTK("Missing extra info\n"); #endif err = EINVAL; break; } extra = (struct netif_extra_info *) RING_GET_RESPONSE(&np->rx, ++(*cons)); - if (unlikely(!extra->type || + if (__predict_false(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { #if 0 if (net_ratelimit()) WPRINTK("Invalid extra type: %d\n", extra->type); #endif err = EINVAL; } else { memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); } m = xennet_get_rx_mbuf(np, *cons); ref = xennet_get_rx_ref(np, *cons); xennet_move_rx_slot(np, m, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); return err; } static int xennet_get_responses(struct netfront_info *np, struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons, struct mbuf **list, int *pages_flipped_p) { int pages_flipped = *pages_flipped_p; struct mmu_update *mmu; struct multicall_entry *mcl; struct netif_rx_response *rx = &rinfo->rx; struct netif_extra_info *extras = rinfo->extras; struct mbuf *m, *m0, *m_prev; grant_ref_t ref = xennet_get_rx_ref(np, *cons); RING_IDX ref_cons = *cons; int frags = 1; int err = 0; u_long ret; m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons); if (rx->flags & NETRXF_extra_info) { err = xennet_get_extras(np, extras, rp, cons); } if (m0 != NULL) { m0->m_pkthdr.len = 0; m0->m_next = NULL; } for (;;) { u_long mfn; #if 0 DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n", rx->status, rx->offset, frags); #endif - if (unlikely(rx->status < 0 || + if (__predict_false(rx->status < 0 || rx->offset + rx->status > PAGE_SIZE)) { #if 0 if (net_ratelimit()) WPRINTK("rx->offset: %x, size: %u\n", rx->offset, rx->status); #endif xennet_move_rx_slot(np, m, ref); if (m0 == m) m0 = NULL; m = NULL; err = EINVAL; goto next_skip_queue; } /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ if (ref == GRANT_REF_INVALID) { #if 0 if (net_ratelimit()) WPRINTK("Bad rx response id %d.\n", rx->id); #endif printf("%s: Bad rx response id %d.\n", __func__,rx->id); err = EINVAL; goto next; } if (!np->copying_receiver) { /* Memory pressure, insufficient buffer * headroom, ... */ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", rx->id, rx->status); xennet_move_rx_slot(np, m, ref); err = ENOMEM; goto next; } if (!xen_feature( XENFEAT_auto_translated_physmap)) { /* Remap the page. */ void *vaddr = mtod(m, void *); uint32_t pfn; mcl = np->rx_mcl + pages_flipped; mmu = np->rx_mmu + pages_flipped; MULTI_update_va_mapping(mcl, (u_long)vaddr, (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW | PG_V | PG_M | PG_A, 0); pfn = (uintptr_t)m->m_ext.ext_arg1; mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = pfn; set_phys_to_machine(pfn, mfn); } pages_flipped++; } else { ret = gnttab_end_foreign_access_ref(ref); KASSERT(ret, ("ret != 0")); } gnttab_release_grant_reference(&np->gref_rx_head, ref); next: if (m == NULL) break; m->m_len = rx->status; m->m_data += rx->offset; m0->m_pkthdr.len += rx->status; next_skip_queue: if (!(rx->flags & NETRXF_more_data)) break; if (*cons + frags == rp) { if (net_ratelimit()) WPRINTK("Need more frags\n"); err = ENOENT; printf("%s: cons %u frags %u rp %u, not enough frags\n", __func__, *cons, frags, rp); break; } /* * Note that m can be NULL, if rx->status < 0 or if * rx->offset + rx->status > PAGE_SIZE above. */ m_prev = m; rx = RING_GET_RESPONSE(&np->rx, *cons + frags); m = xennet_get_rx_mbuf(np, *cons + frags); /* * m_prev == NULL can happen if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m_prev != NULL) m_prev->m_next = m; /* * m0 can be NULL if rx->status < 0 or if * rx->offset + * rx->status > PAGE_SIZE above. */ if (m0 == NULL) m0 = m; m->m_next = NULL; ref = xennet_get_rx_ref(np, *cons + frags); ref_cons = *cons + frags; frags++; } *list = m0; *cons += frags; *pages_flipped_p = pages_flipped; return (err); } static void xn_tick_locked(struct netfront_info *sc) { XN_RX_LOCK_ASSERT(sc); callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); /* XXX placeholder for printing debug information */ } static void xn_tick(void *xsc) { struct netfront_info *sc; sc = xsc; XN_RX_LOCK(sc); xn_tick_locked(sc); XN_RX_UNLOCK(sc); } /** * \brief Count the number of fragments in an mbuf chain. * * Surprisingly, there isn't an M* macro for this. */ static inline int xn_count_frags(struct mbuf *m) { int nfrags; for (nfrags = 0; m != NULL; m = m->m_next) nfrags++; return (nfrags); } /** * Given an mbuf chain, make sure we have enough room and then push * it onto the transmit ring. */ static int xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head) { struct ifnet *ifp; struct mbuf *m; u_int nfrags; netif_extra_info_t *extra; int otherend_id; ifp = sc->xn_ifp; /** * Defragment the mbuf if necessary. */ nfrags = xn_count_frags(m_head); /* * Check to see whether this request is longer than netback * can handle, and try to defrag it. */ /** * It is a bit lame, but the netback driver in Linux can't * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of * the Linux network stack. */ if (nfrags > sc->maxfrags) { m = m_defrag(m_head, M_NOWAIT); if (!m) { /* * Defrag failed, so free the mbuf and * therefore drop the packet. */ m_freem(m_head); return (EMSGSIZE); } m_head = m; } /* Determine how many fragments now exist */ nfrags = xn_count_frags(m_head); /* * Check to see whether the defragmented packet has too many * segments for the Linux netback driver. */ /** * The FreeBSD TCP stack, with TSO enabled, can produce a chain * of mbufs longer than Linux can handle. Make sure we don't * pass a too-long chain over to the other side by dropping the * packet. It doesn't look like there is currently a way to * tell the TCP stack to generate a shorter chain of packets. */ if (nfrags > MAX_TX_REQ_FRAGS) { #ifdef DEBUG printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback " "won't be able to handle it, dropping\n", __func__, nfrags, MAX_TX_REQ_FRAGS); #endif m_freem(m_head); return (EMSGSIZE); } /* * This check should be redundant. We've already verified that we * have enough slots in the ring to handle a packet of maximum * size, and that our packet is less than the maximum size. Keep * it in here as an assert for now just to make certain that * xn_tx_chain_cnt is accurate. */ KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE, ("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE " "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt, (int) nfrags, (int) NET_TX_RING_SIZE)); /* * Start packing the mbufs in this chain into * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ m = m_head; extra = NULL; otherend_id = xenbus_get_otherend_id(sc->xbdev); for (m = m_head; m; m = m->m_next) { netif_tx_request_t *tx; uintptr_t id; grant_ref_t ref; u_long mfn; /* XXX Wrong type? */ tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt); id = get_id_from_freelist(sc->tx_mbufs); if (id == 0) panic("%s: was allocated the freelist head!\n", __func__); sc->xn_cdata.xn_tx_chain_cnt++; if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE) panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", __func__); sc->tx_mbufs[id] = m; tx->id = id; ref = gnttab_claim_grant_reference(&sc->gref_tx_head); KASSERT((short)ref >= 0, ("Negative ref")); mfn = virt_to_mfn(mtod(m, vm_offset_t)); gnttab_grant_foreign_access_ref(ref, otherend_id, mfn, GNTMAP_readonly); tx->gref = sc->grant_tx_ref[id] = ref; tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); tx->flags = 0; if (m == m_head) { /* * The first fragment has the entire packet * size, subsequent fragments have just the * fragment size. The backend works out the * true size of the first fragment by * subtracting the sizes of the other * fragments. */ tx->size = m->m_pkthdr.len; /* * The first fragment contains the checksum flags * and is optionally followed by extra data for * TSO etc. */ /** * CSUM_TSO requires checksum offloading. * Some versions of FreeBSD fail to * set CSUM_TCP in the CSUM_TSO case, * so we have to test for CSUM_TSO * explicitly. */ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) { tx->flags |= (NETTXF_csum_blank | NETTXF_data_validated); } #if __FreeBSD_version >= 700000 if (m->m_pkthdr.csum_flags & CSUM_TSO) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&sc->tx, ++sc->tx.req_prod_pvt); tx->flags |= NETTXF_extra_info; gso->u.gso.size = m->m_pkthdr.tso_segsz; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; } #endif } else { tx->size = m->m_len; } if (m->m_next) tx->flags |= NETTXF_more_data; sc->tx.req_prod_pvt++; } BPF_MTAP(ifp, m_head); sc->stats.tx_bytes += m_head->m_pkthdr.len; sc->stats.tx_packets++; return (0); } static void xn_start_locked(struct ifnet *ifp) { struct netfront_info *sc; struct mbuf *m_head; int notify; sc = ifp->if_softc; if (!netfront_carrier_ok(sc)) return; /* * While we have enough transmit slots available for at least one * maximum-sized packet, pull mbufs off the queue and put them on * the transmit ring. */ while (xn_tx_slot_available(sc)) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (xn_assemble_tx_request(sc, m_head) != 0) break; } RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); if (RING_FULL(&sc->tx)) { sc->tx_full = 1; #if 0 netif_stop_queue(dev); #endif } } static void xn_start(struct ifnet *ifp) { struct netfront_info *sc; sc = ifp->if_softc; XN_TX_LOCK(sc); xn_start_locked(ifp); XN_TX_UNLOCK(sc); } /* equivalent of network_open() in Linux */ static void xn_ifinit_locked(struct netfront_info *sc) { struct ifnet *ifp; XN_LOCK_ASSERT(sc); ifp = sc->xn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; xn_stop(sc); network_alloc_rx_buffers(sc); sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); } static void xn_ifinit(void *xsc) { struct netfront_info *sc = xsc; XN_LOCK(sc); xn_ifinit_locked(sc); XN_UNLOCK(sc); } static int xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct netfront_info *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif int mask, error = 0; switch(cmd) { case SIOCSIFADDR: case SIOCGIFADDR: #ifdef INET XN_LOCK(sc); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) xn_ifinit_locked(sc); arp_ifinit(ifp, ifa); XN_UNLOCK(sc); } else { XN_UNLOCK(sc); #endif error = ether_ioctl(ifp, cmd, data); #ifdef INET } #endif break; case SIOCSIFMTU: /* XXX can we alter the MTU on a VN ?*/ #ifdef notyet if (ifr->ifr_mtu > XN_JUMBO_MTU) error = EINVAL; else #endif { ifp->if_mtu = ifr->ifr_mtu; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; xn_ifinit(sc); } break; case SIOCSIFFLAGS: XN_LOCK(sc); if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* No promiscuous mode with Xen */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->xn_if_flags & IFF_PROMISC)) { XN_SETBIT(sc, XN_RX_MODE, XN_RXMODE_RX_PROMISC); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->xn_if_flags & IFF_PROMISC) { XN_CLRBIT(sc, XN_RX_MODE, XN_RXMODE_RX_PROMISC); } else #endif xn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { xn_stop(sc); } } sc->xn_if_flags = ifp->if_flags; XN_UNLOCK(sc); error = 0; break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); } } if (mask & IFCAP_RXCSUM) { ifp->if_capenable ^= IFCAP_RXCSUM; } #if __FreeBSD_version >= 700000 if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } else { IPRINTK("Xen requires tx checksum offload" " be enabled to use TSO\n"); error = EINVAL; } } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; } #endif error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet if (ifp->if_drv_flags & IFF_DRV_RUNNING) { XN_LOCK(sc); xn_setmulti(sc); XN_UNLOCK(sc); error = 0; } #endif /* FALLTHROUGH */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); } return (error); } static void xn_stop(struct netfront_info *sc) { struct ifnet *ifp; XN_LOCK_ASSERT(sc); ifp = sc->xn_ifp; callout_stop(&sc->xn_stat_ch); xn_free_rx_ring(sc); xn_free_tx_ring(sc); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); } /* START of Xenolinux helper functions adapted to FreeBSD */ int network_connect(struct netfront_info *np) { int i, requeue_idx, error; grant_ref_t ref; netif_rx_request_t *req; u_int feature_rx_copy, feature_rx_flip; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-copy", NULL, "%u", &feature_rx_copy); if (error) feature_rx_copy = 0; error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-rx-flip", NULL, "%u", &feature_rx_flip); if (error) feature_rx_flip = 1; /* * Copy packets on receive path if: * (a) This was requested by user, and the backend supports it; or * (b) Flipping was requested, but this is unsupported by the backend. */ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || (MODPARM_rx_flip && !feature_rx_flip)); /* Recovery procedure: */ error = talk_to_backend(np->xbdev, np); if (error) return (error); /* Step 1: Reinitialise variables. */ xn_query_features(np); xn_configure_features(np); netif_release_tx_bufs(np); /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { struct mbuf *m; u_long pfn; if (np->rx_mbufs[i] == NULL) continue; m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i); ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); req = RING_GET_REQUEST(&np->rx, requeue_idx); pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; if (!np->copying_receiver) { gnttab_grant_foreign_transfer_ref(ref, xenbus_get_otherend_id(np->xbdev), pfn); } else { gnttab_grant_foreign_access_ref(ref, xenbus_get_otherend_id(np->xbdev), PFNTOMFN(pfn), 0); } req->gref = ref; req->id = requeue_idx; requeue_idx++; } np->rx.req_prod_pvt = requeue_idx; /* Step 3: All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver * domain a kick because we've probably just requeued some * packets. */ netfront_carrier_on(np); - notify_remote_via_irq(np->irq); + xen_intr_signal(np->xen_intr_handle); XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); network_alloc_rx_buffers(np); return (0); } static void show_device(struct netfront_info *sc) { #ifdef DEBUG if (sc) { IPRINTK("\n", sc->xn_ifno, be_state_name[sc->xn_backend_state], sc->xn_user_state ? "open" : "closed", sc->xn_evtchn, sc->xn_irq, sc->xn_tx_if, sc->xn_rx_if); } else { IPRINTK("\n"); } #endif } static void xn_query_features(struct netfront_info *np) { int val; device_printf(np->xbdev, "backend features:"); if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-sg", NULL, "%d", &val) < 0) val = 0; np->maxfrags = 1; if (val) { np->maxfrags = MAX_TX_REQ_FRAGS; printf(" feature-sg"); } if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev), "feature-gso-tcpv4", NULL, "%d", &val) < 0) val = 0; np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO); if (val) { np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; printf(" feature-gso-tcp4"); } printf("\n"); } static int xn_configure_features(struct netfront_info *np) { int err; err = 0; #if __FreeBSD_version >= 700000 if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0) tcp_lro_free(&np->xn_lro); #endif np->xn_ifp->if_capenable = np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4); np->xn_ifp->if_hwassist &= ~CSUM_TSO; #if __FreeBSD_version >= 700000 if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) { err = tcp_lro_init(&np->xn_lro); if (err) { device_printf(np->xbdev, "LRO initialization failed\n"); } else { np->xn_lro.ifp = np->xn_ifp; np->xn_ifp->if_capenable |= IFCAP_LRO; } } if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) { np->xn_ifp->if_capenable |= IFCAP_TSO4; np->xn_ifp->if_hwassist |= CSUM_TSO; } #endif return (err); } -/** Create a network device. - * @param handle device handle +/** + * Create a network device. + * @param dev Newbus device representing this virtual NIC. */ int create_netdev(device_t dev) { int i; struct netfront_info *np; int err; struct ifnet *ifp; np = device_get_softc(dev); np->xbdev = dev; XN_LOCK_INIT(np, xennetif); ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts); ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); np->rx_target = RX_MIN_TARGET; np->rx_min_target = RX_MIN_TARGET; np->rx_max_target = RX_MAX_TARGET; /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ for (i = 0; i <= NET_TX_RING_SIZE; i++) { np->tx_mbufs[i] = (void *) ((u_long) i+1); np->grant_tx_ref[i] = GRANT_REF_INVALID; } np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0; for (i = 0; i <= NET_RX_RING_SIZE; i++) { np->rx_mbufs[i] = NULL; np->grant_rx_ref[i] = GRANT_REF_INVALID; } /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(NET_TX_RING_SIZE, &np->gref_tx_head) != 0) { IPRINTK("#### netfront can't alloc tx grant refs\n"); err = ENOMEM; goto exit; } /* A grant for every rx ring slot */ if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) != 0) { WPRINTK("#### netfront can't alloc rx grant refs\n"); gnttab_free_grant_references(np->gref_tx_head); err = ENOMEM; goto exit; } err = xen_net_read_mac(dev, np->mac); if (err) goto out; /* Set up ifnet structure */ ifp = np->xn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = np; if_initname(ifp, "xn", device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = xn_ioctl; ifp->if_output = ether_output; ifp->if_start = xn_start; #ifdef notyet ifp->if_watchdog = xn_watchdog; #endif ifp->if_init = xn_ifinit; ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; ifp->if_hwassist = XN_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_hw_tsomax = NF_TSO_MAXBURST; ether_ifattach(ifp, np->mac); callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); netfront_carrier_off(np); return (0); exit: gnttab_free_grant_references(np->gref_tx_head); out: return (err); } /** * Handle the change of state of the backend to Closing. We must delete our * device-layer structures now, to ensure that writes are flushed through to * the backend. Once is this done, we can switch to Closed in * acknowledgement. */ #if 0 static void netfront_closing(device_t dev) { #if 0 struct netfront_info *info = dev->dev_driver_data; DPRINTK("netfront_closing: %s removed\n", dev->nodename); close_netdev(info); #endif xenbus_switch_state(dev, XenbusStateClosed); } #endif static int netfront_detach(device_t dev) { struct netfront_info *info = device_get_softc(dev); DPRINTK("%s\n", xenbus_get_node(dev)); netif_free(info); return 0; } static void netif_free(struct netfront_info *info) { XN_LOCK(info); xn_stop(info); XN_UNLOCK(info); callout_drain(&info->xn_stat_ch); netif_disconnect_backend(info); if (info->xn_ifp != NULL) { ether_ifdetach(info->xn_ifp); if_free(info->xn_ifp); info->xn_ifp = NULL; } ifmedia_removeall(&info->sc_media); } static void netif_disconnect_backend(struct netfront_info *info) { XN_RX_LOCK(info); XN_TX_LOCK(info); netfront_carrier_off(info); XN_TX_UNLOCK(info); XN_RX_UNLOCK(info); free_ring(&info->tx_ring_ref, &info->tx.sring); free_ring(&info->rx_ring_ref, &info->rx.sring); - if (info->irq) - unbind_from_irqhandler(info->irq); - - info->irq = 0; + xen_intr_unbind(&info->xen_intr_handle); } static void free_ring(int *ref, void *ring_ptr_ref) { void **ring_ptr_ptr = ring_ptr_ref; if (*ref != GRANT_REF_INVALID) { /* This API frees the associated storage. */ gnttab_end_foreign_access(*ref, *ring_ptr_ptr); *ref = GRANT_REF_INVALID; } *ring_ptr_ptr = NULL; } static int xn_ifmedia_upd(struct ifnet *ifp) { return (0); } static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /* ** Driver registration ** */ static device_method_t netfront_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netfront_probe), DEVMETHOD(device_attach, netfront_attach), DEVMETHOD(device_detach, netfront_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, netfront_suspend), DEVMETHOD(device_resume, netfront_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed), DEVMETHOD_END }; static driver_t netfront_driver = { "xn", netfront_methods, sizeof(struct netfront_info), }; devclass_t netfront_devclass; DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL, NULL); diff --git a/sys/dev/xen/xenpci/evtchn.c b/sys/dev/xen/xenpci/evtchn.c deleted file mode 100644 index 2d9dd6d664d1..000000000000 --- a/sys/dev/xen/xenpci/evtchn.c +++ /dev/null @@ -1,467 +0,0 @@ -/****************************************************************************** - * evtchn.c - * - * A simplified event channel for para-drivers in unmodified linux - * - * Copyright (c) 2002-2005, K A Fraser - * Copyright (c) 2005, Intel Corporation - * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#if defined(__i386__) -#define __ffs(word) (ffs(word) - 1) -#elif defined(__amd64__) -static inline unsigned long __ffs(unsigned long word) -{ - __asm__("bsfq %1,%0" - :"=r" (word) - :"rm" (word)); /* XXXRW: why no "cc"? */ - return word; -} -#else -#error "evtchn: unsupported architecture" -#endif - -#define is_valid_evtchn(x) ((x) != 0) -#define evtchn_from_irq(x) (irq_evtchn[irq].evtchn) - -static struct { - struct mtx lock; - driver_intr_t *handler; - void *arg; - int evtchn; - int close:1; /* close on unbind_from_irqhandler()? */ - int inuse:1; - int in_handler:1; - int mpsafe:1; -} irq_evtchn[256]; -static int evtchn_to_irq[NR_EVENT_CHANNELS] = { - [0 ... NR_EVENT_CHANNELS-1] = -1 }; - -static struct mtx irq_alloc_lock; -static device_t xenpci_device; - -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - -static unsigned int -alloc_xen_irq(void) -{ - static int warned; - unsigned int irq; - - mtx_lock(&irq_alloc_lock); - - for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) { - if (irq_evtchn[irq].inuse) - continue; - irq_evtchn[irq].inuse = 1; - mtx_unlock(&irq_alloc_lock); - return irq; - } - - if (!warned) { - warned = 1; - printf("alloc_xen_irq: No available IRQ to bind to: " - "increase irq_evtchn[] size in evtchn.c.\n"); - } - - mtx_unlock(&irq_alloc_lock); - - return -ENOSPC; -} - -static void -free_xen_irq(int irq) -{ - - mtx_lock(&irq_alloc_lock); - irq_evtchn[irq].inuse = 0; - mtx_unlock(&irq_alloc_lock); -} - -int -irq_to_evtchn_port(int irq) -{ - - return irq_evtchn[irq].evtchn; -} - -void -mask_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - - synch_set_bit(port, &s->evtchn_mask[0]); -} - -void -unmask_evtchn(int port) -{ - evtchn_unmask_t op = { .port = port }; - - HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op); -} - -int -bind_listening_port_to_irqhandler(unsigned int remote_domain, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp) -{ - struct evtchn_alloc_unbound alloc_unbound; - unsigned int irq; - int error; - - irq = alloc_xen_irq(); - if (irq < 0) - return irq; - - mtx_lock(&irq_evtchn[irq].lock); - - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = remote_domain; - error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (error) { - mtx_unlock(&irq_evtchn[irq].lock); - free_xen_irq(irq); - return (-error); - } - - irq_evtchn[irq].handler = handler; - irq_evtchn[irq].arg = arg; - irq_evtchn[irq].evtchn = alloc_unbound.port; - irq_evtchn[irq].close = 1; - irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; - - evtchn_to_irq[alloc_unbound.port] = irq; - - unmask_evtchn(alloc_unbound.port); - - mtx_unlock(&irq_evtchn[irq].lock); - - if (irqp) - *irqp = irq; - return (0); -} - -int -bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, const char *devname, driver_intr_t handler, - void *arg, unsigned long irqflags, unsigned int *irqp) -{ - struct evtchn_bind_interdomain bind_interdomain; - unsigned int irq; - int error; - - irq = alloc_xen_irq(); - if (irq < 0) - return irq; - - mtx_lock(&irq_evtchn[irq].lock); - - bind_interdomain.remote_dom = remote_domain; - bind_interdomain.remote_port = remote_port; - error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (error) { - mtx_unlock(&irq_evtchn[irq].lock); - free_xen_irq(irq); - return (-error); - } - - irq_evtchn[irq].handler = handler; - irq_evtchn[irq].arg = arg; - irq_evtchn[irq].evtchn = bind_interdomain.local_port; - irq_evtchn[irq].close = 1; - irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; - - evtchn_to_irq[bind_interdomain.local_port] = irq; - - unmask_evtchn(bind_interdomain.local_port); - - mtx_unlock(&irq_evtchn[irq].lock); - - if (irqp) - *irqp = irq; - return (0); -} - - -int -bind_caller_port_to_irqhandler(unsigned int caller_port, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp) -{ - unsigned int irq; - - irq = alloc_xen_irq(); - if (irq < 0) - return irq; - - mtx_lock(&irq_evtchn[irq].lock); - - irq_evtchn[irq].handler = handler; - irq_evtchn[irq].arg = arg; - irq_evtchn[irq].evtchn = caller_port; - irq_evtchn[irq].close = 0; - irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; - - evtchn_to_irq[caller_port] = irq; - - unmask_evtchn(caller_port); - - mtx_unlock(&irq_evtchn[irq].lock); - - if (irqp) - *irqp = irq; - return (0); -} - -void -unbind_from_irqhandler(unsigned int irq) -{ - int evtchn; - - mtx_lock(&irq_evtchn[irq].lock); - - evtchn = evtchn_from_irq(irq); - - if (is_valid_evtchn(evtchn)) { - evtchn_to_irq[evtchn] = -1; - mask_evtchn(evtchn); - if (irq_evtchn[irq].close) { - struct evtchn_close close = { .port = evtchn }; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) - panic("EVTCHNOP_close failed"); - } - } - - irq_evtchn[irq].handler = NULL; - irq_evtchn[irq].evtchn = 0; - - mtx_unlock(&irq_evtchn[irq].lock); - - while (irq_evtchn[irq].in_handler) - cpu_relax(); - - free_xen_irq(irq); -} - -void notify_remote_via_irq(int irq) -{ - int evtchn; - - evtchn = evtchn_from_irq(irq); - if (is_valid_evtchn(evtchn)) - notify_remote_via_evtchn(evtchn); -} - -static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, - unsigned int idx) -{ - return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]); -} - -static void -evtchn_interrupt(void *arg) -{ - unsigned int l1i, l2i, port; - unsigned long masked_l1, masked_l2; - /* XXX: All events are bound to vcpu0 but irq may be redirected. */ - int cpu = 0; /*smp_processor_id();*/ - driver_intr_t *handler; - void *handler_arg; - int irq, handler_mpsafe; - shared_info_t *s = HYPERVISOR_shared_info; - vcpu_info_t *v = &s->vcpu_info[cpu]; - struct pcpu *pc = pcpu_find(cpu); - unsigned long l1, l2; - - v->evtchn_upcall_pending = 0; - -#if 0 -#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ - /* Clear master flag /before/ clearing selector flag. */ - wmb(); -#endif -#endif - - l1 = atomic_readandclear_long(&v->evtchn_pending_sel); - - l1i = pc->pc_last_processed_l1i; - l2i = pc->pc_last_processed_l2i; - - while (l1 != 0) { - - l1i = (l1i + 1) % LONG_BIT; - masked_l1 = l1 & ((~0UL) << l1i); - - if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */ - l1i = LONG_BIT - 1; - l2i = LONG_BIT - 1; - continue; - } - l1i = __ffs(masked_l1); - - do { - l2 = active_evtchns(cpu, s, l1i); - - l2i = (l2i + 1) % LONG_BIT; - masked_l2 = l2 & ((~0UL) << l2i); - - if (masked_l2 == 0) { /* if we masked out all events, move on */ - l2i = LONG_BIT - 1; - break; - } - l2i = __ffs(masked_l2); - - /* process port */ - port = (l1i * LONG_BIT) + l2i; - synch_clear_bit(port, &s->evtchn_pending[0]); - - irq = evtchn_to_irq[port]; - if (irq < 0) - continue; - - mtx_lock(&irq_evtchn[irq].lock); - handler = irq_evtchn[irq].handler; - handler_arg = irq_evtchn[irq].arg; - handler_mpsafe = irq_evtchn[irq].mpsafe; - if (unlikely(handler == NULL)) { - printf("Xen IRQ%d (port %d) has no handler!\n", - irq, port); - mtx_unlock(&irq_evtchn[irq].lock); - continue; - } - irq_evtchn[irq].in_handler = 1; - mtx_unlock(&irq_evtchn[irq].lock); - - //local_irq_enable(); - if (!handler_mpsafe) - mtx_lock(&Giant); - handler(handler_arg); - if (!handler_mpsafe) - mtx_unlock(&Giant); - //local_irq_disable(); - - mtx_lock(&irq_evtchn[irq].lock); - irq_evtchn[irq].in_handler = 0; - mtx_unlock(&irq_evtchn[irq].lock); - - /* if this is the final port processed, we'll pick up here+1 next time */ - pc->pc_last_processed_l1i = l1i; - pc->pc_last_processed_l2i = l2i; - - } while (l2i != LONG_BIT - 1); - - l2 = active_evtchns(cpu, s, l1i); - if (l2 == 0) /* we handled all ports, so we can clear the selector bit */ - l1 &= ~(1UL << l1i); - } -} - -void -irq_suspend(void) -{ - struct xenpci_softc *scp = device_get_softc(xenpci_device); - - /* - * Take our interrupt handler out of the list of handlers - * that can handle this irq. - */ - if (scp->intr_cookie != NULL) { - if (BUS_TEARDOWN_INTR(device_get_parent(xenpci_device), - xenpci_device, scp->res_irq, scp->intr_cookie) != 0) - printf("intr teardown failed.. continuing\n"); - scp->intr_cookie = NULL; - } -} - -void -irq_resume(void) -{ - struct xenpci_softc *scp = device_get_softc(xenpci_device); - int evtchn, irq; - - for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) { - mask_evtchn(evtchn); - evtchn_to_irq[evtchn] = -1; - } - - for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) - irq_evtchn[irq].evtchn = 0; - - BUS_SETUP_INTR(device_get_parent(xenpci_device), - xenpci_device, scp->res_irq, INTR_TYPE_MISC, - NULL, evtchn_interrupt, NULL, &scp->intr_cookie); -} - -int -xenpci_irq_init(device_t device, struct xenpci_softc *scp) -{ - int irq, cpu; - int error; - - mtx_init(&irq_alloc_lock, "xen-irq-lock", NULL, MTX_DEF); - - for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) - mtx_init(&irq_evtchn[irq].lock, "irq-evtchn", NULL, MTX_DEF); - - for (cpu = 0; cpu < mp_ncpus; cpu++) { - pcpu_find(cpu)->pc_last_processed_l1i = LONG_BIT - 1; - pcpu_find(cpu)->pc_last_processed_l2i = LONG_BIT - 1; - } - - error = BUS_SETUP_INTR(device_get_parent(device), device, - scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, NULL, evtchn_interrupt, - NULL, &scp->intr_cookie); - if (error) - return (error); - - xenpci_device = device; - - return (0); -} diff --git a/sys/dev/xen/xenpci/xenpci.c b/sys/dev/xen/xenpci/xenpci.c index 2d7467691577..0b1762df03b4 100644 --- a/sys/dev/xen/xenpci/xenpci.c +++ b/sys/dev/xen/xenpci/xenpci.c @@ -1,458 +1,362 @@ /* * Copyright (c) 2008 Citrix Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include -#include -#include -#include #include #include #include #include -#include + +#include #include #include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include - #include -/* - * These variables are used by the rest of the kernel to access the - * hypervisor. - */ -char *hypercall_stubs; -shared_info_t *HYPERVISOR_shared_info; -static vm_paddr_t shared_info_pa; +extern void xen_intr_handle_upcall(struct trapframe *trap_frame); + static device_t nexus; /* * This is used to find our platform device instance. */ static devclass_t xenpci_devclass; -/* - * Return the CPUID base address for Xen functions. - */ -static uint32_t -xenpci_cpuid_base(void) +static int +xenpci_intr_filter(void *trap_frame) { - uint32_t base, regs[4]; - - for (base = 0x40000000; base < 0x40010000; base += 0x100) { - do_cpuid(base, regs); - if (!memcmp("XenVMMXenVMM", ®s[1], 12) - && (regs[0] - base) >= 2) - return (base); - } - return (0); + xen_intr_handle_upcall(trap_frame); + return (FILTER_HANDLED); } -/* - * Allocate and fill in the hypcall page. - */ static int -xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp) +xenpci_irq_init(device_t device, struct xenpci_softc *scp) { - uint32_t base, regs[4]; - int i; - - base = xenpci_cpuid_base(); - if (!base) { - device_printf(dev, "Xen platform device but not Xen VMM\n"); - return (EINVAL); - } + int error; - if (bootverbose) { - do_cpuid(base + 1, regs); - device_printf(dev, "Xen version %d.%d.\n", - regs[0] >> 16, regs[0] & 0xffff); - } + error = BUS_SETUP_INTR(device_get_parent(device), device, + scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, + xenpci_intr_filter, NULL, /*trap_frame*/NULL, + &scp->intr_cookie); + if (error) + return error; /* - * Find the hypercall pages. + * When using the PCI event delivery callback we cannot assign + * events to specific vCPUs, so all events are delivered to vCPU#0 by + * Xen. Since the PCI interrupt can fire on any CPU by default, we + * need to bind it to vCPU#0 in order to ensure that + * xen_intr_handle_upcall always gets called on vCPU#0. */ - do_cpuid(base + 2, regs); - - hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK); - - for (i = 0; i < regs[0]; i++) { - wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); - } + error = BUS_BIND_INTR(device_get_parent(device), device, + scp->res_irq, 0); + if (error) + return error; + xen_hvm_set_callback(device); return (0); } -/* - * After a resume, re-initialise the hypercall page. - */ -static void -xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp) -{ - uint32_t base, regs[4]; - int i; - - base = xenpci_cpuid_base(); - - do_cpuid(base + 2, regs); - for (i = 0; i < regs[0]; i++) { - wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); - } -} - -/* - * Tell the hypervisor how to contact us for event channel callbacks. - */ -static void -xenpci_set_callback(device_t dev) -{ - int irq; - uint64_t callback; - struct xen_hvm_param xhp; - - irq = pci_get_irq(dev); - if (irq < 16) { - callback = irq; - } else { - callback = (pci_get_intpin(dev) - 1) & 3; - callback |= pci_get_slot(dev) << 11; - callback |= 1ull << 56; - } - - xhp.domid = DOMID_SELF; - xhp.index = HVM_PARAM_CALLBACK_IRQ; - xhp.value = callback; - if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp)) - panic("Can't set evtchn callback"); -} - - /* * Deallocate anything allocated by xenpci_allocate_resources. */ static int xenpci_deallocate_resources(device_t dev) { struct xenpci_softc *scp = device_get_softc(dev); if (scp->res_irq != 0) { bus_deactivate_resource(dev, SYS_RES_IRQ, scp->rid_irq, scp->res_irq); bus_release_resource(dev, SYS_RES_IRQ, scp->rid_irq, scp->res_irq); scp->res_irq = 0; } if (scp->res_memory != 0) { bus_deactivate_resource(dev, SYS_RES_MEMORY, scp->rid_memory, scp->res_memory); bus_release_resource(dev, SYS_RES_MEMORY, scp->rid_memory, scp->res_memory); scp->res_memory = 0; } return (0); } /* * Allocate irq and memory resources. */ static int xenpci_allocate_resources(device_t dev) { struct xenpci_softc *scp = device_get_softc(dev); scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &scp->rid_irq, RF_SHAREABLE|RF_ACTIVE); if (scp->res_irq == NULL) { printf("xenpci Could not allocate irq.\n"); goto errexit; } scp->rid_memory = PCIR_BAR(1); scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &scp->rid_memory, RF_ACTIVE); if (scp->res_memory == NULL) { printf("xenpci Could not allocate memory bar.\n"); goto errexit; } scp->phys_next = rman_get_start(scp->res_memory); return (0); errexit: /* Cleanup anything we may have assigned. */ xenpci_deallocate_resources(dev); return (ENXIO); /* For want of a better idea. */ } /* * Allocate a physical address range from our mmio region. */ static int xenpci_alloc_space_int(struct xenpci_softc *scp, size_t sz, vm_paddr_t *pa) { if (scp->phys_next + sz > rman_get_end(scp->res_memory)) { return (ENOMEM); } *pa = scp->phys_next; scp->phys_next += sz; return (0); } /* * Allocate a physical address range from our mmio region. */ int xenpci_alloc_space(size_t sz, vm_paddr_t *pa) { device_t dev = devclass_get_device(xenpci_devclass, 0); if (dev) { return (xenpci_alloc_space_int(device_get_softc(dev), sz, pa)); } else { return (ENOMEM); } } static struct resource * xenpci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { return (BUS_ALLOC_RESOURCE(nexus, child, type, rid, start, end, count, flags)); } static int xenpci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { return (BUS_RELEASE_RESOURCE(nexus, child, type, rid, r)); } static int xenpci_activate_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { return (BUS_ACTIVATE_RESOURCE(nexus, child, type, rid, r)); } static int xenpci_deactivate_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { return (BUS_DEACTIVATE_RESOURCE(nexus, child, type, rid, r)); } -/* - * Called very early in the resume sequence - reinitialise the various - * bits of Xen machinery including the hypercall page and the shared - * info page. - */ -void -xenpci_resume() -{ - device_t dev = devclass_get_device(xenpci_devclass, 0); - struct xenpci_softc *scp = device_get_softc(dev); - struct xen_add_to_physmap xatp; - - xenpci_resume_hypercall_stubs(dev, scp); - - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = shared_info_pa >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - panic("HYPERVISOR_memory_op failed"); - - pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa); - - xenpci_set_callback(dev); - - gnttab_resume(); - irq_resume(); -} - /* * Probe - just check device ID. */ static int xenpci_probe(device_t dev) { if (pci_get_devid(dev) != 0x00015853) return (ENXIO); device_set_desc(dev, "Xen Platform Device"); return (bus_generic_probe(dev)); } /* * Attach - find resources and talk to Xen. */ static int xenpci_attach(device_t dev) { - int error; struct xenpci_softc *scp = device_get_softc(dev); - struct xen_add_to_physmap xatp; - vm_offset_t shared_va; devclass_t dc; + int error; /* * Find and record nexus0. Since we are not really on the * PCI bus, all resource operations are directed to nexus * instead of through our parent. */ if ((dc = devclass_find("nexus")) == 0 || (nexus = devclass_get_device(dc, 0)) == 0) { device_printf(dev, "unable to find nexus."); return (ENOENT); } error = xenpci_allocate_resources(dev); if (error) { device_printf(dev, "xenpci_allocate_resources failed(%d).\n", error); goto errexit; } - error = xenpci_init_hypercall_stubs(dev, scp); - if (error) { - device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n", - error); - goto errexit; - } - - setup_xen_features(); - - xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); - - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = shared_info_pa >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - panic("HYPERVISOR_memory_op failed"); - - shared_va = kva_alloc(PAGE_SIZE); - pmap_kenter(shared_va, shared_info_pa); - HYPERVISOR_shared_info = (void *) shared_va; - /* * Hook the irq up to evtchn */ - xenpci_irq_init(dev, scp); - xenpci_set_callback(dev); + error = xenpci_irq_init(dev, scp); + if (error) { + device_printf(dev, "xenpci_irq_init failed(%d).\n", + error); + goto errexit; + } return (bus_generic_attach(dev)); errexit: /* * Undo anything we may have done. */ xenpci_deallocate_resources(dev); return (error); } /* * Detach - reverse anything done by attach. */ static int xenpci_detach(device_t dev) { struct xenpci_softc *scp = device_get_softc(dev); device_t parent = device_get_parent(dev); /* * Take our interrupt handler out of the list of handlers * that can handle this irq. */ if (scp->intr_cookie != NULL) { if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq, scp->intr_cookie) != 0) device_printf(dev, "intr teardown failed.. continuing\n"); scp->intr_cookie = NULL; } /* * Deallocate any system resources we may have * allocated on behalf of this driver. */ return (xenpci_deallocate_resources(dev)); } +static int +xenpci_suspend(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + if (scp->intr_cookie != NULL) { + if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq, + scp->intr_cookie) != 0) + printf("intr teardown failed.. continuing\n"); + scp->intr_cookie = NULL; + } + + return (bus_generic_suspend(dev)); +} + +static int +xenpci_resume(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + BUS_SETUP_INTR(parent, dev, scp->res_irq, + INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL, + /*trap_frame*/NULL, &scp->intr_cookie); + xen_hvm_set_callback(dev); + return (bus_generic_resume(dev)); +} + static device_method_t xenpci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xenpci_probe), DEVMETHOD(device_attach, xenpci_attach), DEVMETHOD(device_detach, xenpci_detach), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), + DEVMETHOD(device_suspend, xenpci_suspend), + DEVMETHOD(device_resume, xenpci_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_alloc_resource, xenpci_alloc_resource), DEVMETHOD(bus_release_resource, xenpci_release_resource), DEVMETHOD(bus_activate_resource, xenpci_activate_resource), DEVMETHOD(bus_deactivate_resource, xenpci_deactivate_resource), { 0, 0 } }; static driver_t xenpci_driver = { "xenpci", xenpci_methods, sizeof(struct xenpci_softc), }; DRIVER_MODULE(xenpci, pci, xenpci_driver, xenpci_devclass, 0, 0); diff --git a/sys/dev/xen/xenpci/xenpcivar.h b/sys/dev/xen/xenpci/xenpcivar.h index a57c080b31d3..527a291c8801 100644 --- a/sys/dev/xen/xenpci/xenpcivar.h +++ b/sys/dev/xen/xenpci/xenpcivar.h @@ -1,44 +1,43 @@ /* * Copyright (c) 2008 Citrix Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $FreeBSD$ */ /* * One of these per allocated device. */ struct xenpci_softc { int rid_ioport; int rid_memory; int rid_irq; struct resource* res_memory; /* Resource for mem range. */ struct resource* res_irq; /* Resource for irq range. */ void *intr_cookie; vm_paddr_t phys_next; /* next page from mem range */ }; -extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp); extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa); -extern void xenpci_resume(void); -extern void xen_suspend(void); diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 618436d830ac..adedbc4999f6 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -1,396 +1,415 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD$ */ /* * Interrupt entry points for external interrupts triggered by I/O APICs * as well as IPI handlers. */ #include "opt_smp.h" #include #include #include "assym.s" /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word * in the ISR. The handler determines the highest bit set in the ISR, * translates that into a vector, and passes the vector to the * lapic_handle_intr() function. */ #define ISR_VEC(index, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ SET_KERNEL_SREGS ; \ cld ; \ FAKE_MCOUNT(TF_EIP(%esp)) ; \ movl lapic, %edx ; /* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ jz 1f ; \ addl $(32 * index),%eax ; \ pushl %esp ; \ pushl %eax ; /* pass the IRQ */ \ call lapic_handle_intr ; \ addl $8, %esp ; /* discard parameter */ \ 1: ; \ MEXITCOUNT ; \ jmp doreti /* * Handle "spurious INTerrupts". * Notes: * This is different than the "spurious INTerrupt" generated by an * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ .text SUPERALIGN_TEXT IDTVEC(spuriousint) /* No EOI cycle used here */ iret ISR_VEC(1, apic_isr1) ISR_VEC(2, apic_isr2) ISR_VEC(3, apic_isr3) ISR_VEC(4, apic_isr4) ISR_VEC(5, apic_isr5) ISR_VEC(6, apic_isr6) ISR_VEC(7, apic_isr7) /* * Local APIC periodic timer handler. */ .text SUPERALIGN_TEXT IDTVEC(timerint) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call lapic_handle_timer add $4, %esp MEXITCOUNT jmp doreti /* * Local APIC CMCI handler. */ .text SUPERALIGN_TEXT IDTVEC(cmcint) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) call lapic_handle_cmc MEXITCOUNT jmp doreti /* * Local APIC error interrupt handler. */ .text SUPERALIGN_TEXT IDTVEC(errorint) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) call lapic_handle_error MEXITCOUNT jmp doreti +#ifdef XENHVM +/* + * Xen event channel upcall interrupt handler. + * Only used when the hypervisor supports direct vector callbacks. + */ + .text + SUPERALIGN_TEXT +IDTVEC(xen_intr_upcall) + PUSH_FRAME + SET_KERNEL_SREGS + cld + FAKE_MCOUNT(TF_EIP(%esp)) + pushl %esp + call xen_intr_handle_upcall + add $4, %esp + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invltlb) pushl %eax pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) pushl %fs movl $KPSEL, %eax /* Private space selector */ movl %eax, %fs movl PCPU(CPUID), %eax popl %fs #ifdef COUNT_XINVLTLB_HITS incl xhits_gbl(,%eax,4) #endif #ifdef COUNT_IPIS movl ipi_invltlb_counts(,%eax,4),%eax incl (%eax) #endif #endif movl %cr3, %eax /* invalidate the TLB */ movl %eax, %cr3 movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popl %ds popl %eax iret /* * Single page TLB shootdown */ .text SUPERALIGN_TEXT IDTVEC(invlpg) pushl %eax pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) pushl %fs movl $KPSEL, %eax /* Private space selector */ movl %eax, %fs movl PCPU(CPUID), %eax popl %fs #ifdef COUNT_XINVLTLB_HITS incl xhits_pg(,%eax,4) #endif #ifdef COUNT_IPIS movl ipi_invlpg_counts(,%eax,4),%eax incl (%eax) #endif #endif movl smp_tlb_addr1, %eax invlpg (%eax) /* invalidate single page */ movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popl %ds popl %eax iret /* * Page range TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invlrng) pushl %eax pushl %edx pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) pushl %fs movl $KPSEL, %eax /* Private space selector */ movl %eax, %fs movl PCPU(CPUID), %eax popl %fs #ifdef COUNT_XINVLTLB_HITS incl xhits_rng(,%eax,4) #endif #ifdef COUNT_IPIS movl ipi_invlrng_counts(,%eax,4),%eax incl (%eax) #endif #endif movl smp_tlb_addr1, %edx movl smp_tlb_addr2, %eax 1: invlpg (%edx) /* invalidate single page */ addl $PAGE_SIZE, %edx cmpl %eax, %edx jb 1b movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popl %ds popl %edx popl %eax iret /* * Invalidate cache. */ .text SUPERALIGN_TEXT IDTVEC(invlcache) pushl %eax pushl %ds movl $KDSEL, %eax /* Kernel data selector */ movl %eax, %ds #ifdef COUNT_IPIS pushl %fs movl $KPSEL, %eax /* Private space selector */ movl %eax, %fs movl PCPU(CPUID), %eax popl %fs movl ipi_invlcache_counts(,%eax,4),%eax incl (%eax) #endif wbinvd movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ lock incl smp_tlb_wait popl %ds popl %eax iret /* * Handler for IPIs sent via the per-cpu IPI bitmap. */ #ifndef XEN .text SUPERALIGN_TEXT IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME SET_KERNEL_SREGS cld movl lapic, %edx movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ FAKE_MCOUNT(TF_EIP(%esp)) call ipi_bitmap_handler MEXITCOUNT jmp doreti #endif /* * Executed by a CPU when it receives an IPI_STOP from another CPU. */ .text SUPERALIGN_TEXT IDTVEC(cpustop) PUSH_FRAME SET_KERNEL_SREGS cld movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ call cpustop_handler POP_FRAME iret /* * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. */ #ifndef XEN .text SUPERALIGN_TEXT IDTVEC(cpususpend) PUSH_FRAME SET_KERNEL_SREGS cld movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ call cpususpend_handler POP_FRAME jmp doreti_iret #endif /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. * * - Calls the generic rendezvous action function. */ .text SUPERALIGN_TEXT IDTVEC(rendezvous) PUSH_FRAME SET_KERNEL_SREGS cld #ifdef COUNT_IPIS movl PCPU(CPUID), %eax movl ipi_rendezvous_counts(,%eax,4), %eax incl (%eax) #endif call smp_rendezvous_action movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ POP_FRAME iret /* * Clean up when we lose out on the lazy context switch optimization. * ie: when we are about to release a PTD but a cpu is still borrowing it. */ SUPERALIGN_TEXT IDTVEC(lazypmap) PUSH_FRAME SET_KERNEL_SREGS cld call pmap_lazyfix_action movl lapic, %eax movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ POP_FRAME iret #endif /* SMP */ diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 9a710e57f969..c43031644f8b 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1,3730 +1,3743 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atalk.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_xbox.h" #include "opt_kdtrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_APIC #include #endif #ifdef DEV_ISA #include #endif #ifdef XBOX #include int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif #ifdef XEN /* XEN includes */ -#include +#include #include -#include #include #include #include void Xhypervisor_callback(void); void failsafe_callback(void); extern trap_info_t trap_table[]; struct proc_ldt default_proc_ldt; extern int init_first; int running_xen = 1; extern unsigned long physfree; #endif /* XEN */ /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; #ifdef PAE FEATURE(pae, "Physical Address Extensions"); #endif /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif realmem = Maxmem; /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifndef XEN cpu_setregs(); #endif } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; sf.sf_addr = 0; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; if (p->p_sysent->sv_sigcode_base != 0) { regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szosigcode; } else { /* a.out sysentvec does not use shared page */ regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode; } regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); bzero(sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); bzero(sf.sf_uc.uc_mcontext.__spare__, sizeof(sf.sf_uc.uc_mcontext.__spare__)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sf.sf_uc.uc_mcontext.mc_flags = 0; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, SIGPROCMASK_OLD); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct trapframe *regs; struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct trapframe *regs; ucontext_t *ucp; int cs, eflags, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* Not applicable */ } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { uint64_t tsc1, tsc2; uint64_t acnt, mcnt, perf; register_t reg; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if ((cpu_feature & CPUID_TSC) == 0) return (EOPNOTSUPP); /* * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, * DELAY(9) based logic fails. */ if (tsc_is_invariant && !tsc_perf_stat) return (EOPNOTSUPP); #ifdef SMP if (smp_cpus > 1) { /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); } #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); if (tsc_is_invariant) { wrmsr(MSR_MPERF, 0); wrmsr(MSR_APERF, 0); tsc1 = rdtsc(); DELAY(1000); mcnt = rdmsr(MSR_MPERF); acnt = rdmsr(MSR_APERF); tsc2 = rdtsc(); intr_restore(reg); perf = 1000 * acnt / mcnt; *rate = (tsc2 - tsc1) * perf; } else { tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); *rate = (tsc2 - tsc1) * 1000; } #ifdef SMP if (smp_cpus > 1) { thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } #endif return (0); } #ifdef XEN +static void +idle_block(void) +{ + + HYPERVISOR_sched_op(SCHEDOP_block, 0); +} + void cpu_halt(void) { HYPERVISOR_shutdown(SHUTDOWN_poweroff); } int scheduler_running; static void cpu_idle_hlt(sbintime_t sbt) { scheduler_running = 1; enable_intr(); idle_block(); } #else /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) halt(); } #endif void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ TUNABLE_INT("machdep.idle_mwait", &idle_mwait); SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, 0, "Use MONITOR/MWAIT for short idle"); #define STATE_RUNNING 0x0 #define STATE_MWAIT 0x1 #define STATE_SLEEPING 0x2 static void cpu_idle_acpi(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) cpu_idle_hook(sbt); else __asm __volatile("sti; hlt"); *state = STATE_RUNNING; } #ifndef XEN static void cpu_idle_hlt(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; /* * Since we may be in a critical section from cpu_idle(), if * an interrupt fires during that critical section we may have * a pending preemption. If the CPU halts, then that thread * may not execute until a later interrupt awakens the CPU. * To handle this race, check for a runnable thread after * disabling interrupts and immediately return if one is * found. Also, we must absolutely guarentee that hlt is * the next instruction after sti. This ensures that any * interrupt that fires after the call to disable_intr() will * immediately awaken the CPU from hlt. Finally, please note * that on x86 this works fine because of interrupts enabled only * after the instruction following sti takes place, while IF is set * to 1 immediately, allowing hlt instruction to acknowledge the * interrupt. */ disable_intr(); if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); *state = STATE_RUNNING; } #endif /* * MWAIT cpu power states. Lower 4 bits are sub-states. */ #define MWAIT_C0 0xf0 #define MWAIT_C1 0x00 #define MWAIT_C2 0x10 #define MWAIT_C3 0x20 #define MWAIT_C4 0x30 static void cpu_idle_mwait(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_MWAIT; /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) { enable_intr(); *state = STATE_RUNNING; return; } cpu_monitor(state, 0, 0); if (*state == STATE_MWAIT) __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); else enable_intr(); *state = STATE_RUNNING; } static void cpu_idle_spin(sbintime_t sbt) { int *state; int i; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_RUNNING; /* * The sched_runnable() call is racy but as long as there is * a loop missing it one time will have just a little impact if any * (and it is much better than missing the check at all). */ for (i = 0; i < 1000; i++) { if (sched_runnable()) return; cpu_spinwait(); } } /* * C1E renders the local APIC timer dead, so we disable it by * reading the Interrupt Pending Message register and clearing * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). * * Reference: * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" * #32559 revision 3.00+ */ #define MSR_AMDK8_IPM 0xc0010055 #define AMDK8_SMIONCMPHALT (1ULL << 27) #define AMDK8_C1EONCMPHALT (1ULL << 28) #define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) static void cpu_probe_amdc1e(void) { /* * Detect the presence of C1E capability mostly on latest * dual-cores (or future) k8 family. */ if (cpu_vendor_id == CPU_VENDOR_AMD && (cpu_id & 0x00000f00) == 0x00000f00 && (cpu_id & 0x0fff0000) >= 0x00040000) { cpu_ident_amdc1e = 1; } } #ifdef XEN void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt; #else void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; #endif void cpu_idle(int busy) { #ifndef XEN uint64_t msr; #endif sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); #if defined(MP_WATCHDOG) && !defined(XEN) ap_watchdog(PCPU_GET(cpuid)); #endif #ifndef XEN /* If we are busy - try to use fast methods. */ if (busy) { if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { cpu_idle_mwait(busy); goto out; } } #endif /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); sbt = cpu_idleclock(); } #ifndef XEN /* Apply AMD APIC timer C1E workaround. */ if (cpu_ident_amdc1e && cpu_disable_deep_sleep) { msr = rdmsr(MSR_AMDK8_IPM); if (msr & AMDK8_CMPHALT) wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); } #endif /* Call main idle method. */ cpu_idle_fn(sbt); /* Switch timers mack into active mode. */ if (!busy) { cpu_activeclock(); critical_exit(); } #ifndef XEN out: #endif CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; int *state; pcpu = pcpu_find(cpu); state = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ if (*state == STATE_SLEEPING) return (0); if (*state == STATE_MWAIT) *state = STATE_RUNNING; return (1); } /* * Ordered by speed/power consumption. */ struct { void *id_fn; char *id_name; } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, { cpu_idle_hlt, "hlt" }, { cpu_idle_acpi, "acpi" }, { NULL, NULL } }; static int idle_sysctl_available(SYSCTL_HANDLER_ARGS) { char *avail, *p; int error; int i; avail = malloc(256, M_TEMP, M_WAITOK); p = avail; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; p += sprintf(p, "%s%s", p != avail ? ", " : "", idle_tbl[i].id_name); } error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, idle_sysctl_available, "A", "list of available idle functions"); static int idle_sysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; char *p; int i; p = "unknown"; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (idle_tbl[i].id_fn == cpu_idle_fn) { p = idle_tbl[i].id_name; break; } } strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; if (strcmp(idle_tbl[i].id_name, buf)) continue; cpu_idle_fn = idle_tbl[i].id_fn; return (0); } return (EINVAL); } SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = imgp->entry_addr; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = imgp->ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; #ifdef XEN union descriptor *gdt; union descriptor *ldt; #else union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = SEL_KPL, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, #ifndef XEN /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = (int) ldt, .ssd_limit = sizeof(ldt)-1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = (int) ldt, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = (int) &dblfault_tss, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, #endif /* !XEN */ }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), +#endif +#ifdef XENHVM + IDTVEC(xen_intr_upcall), #endif IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #ifndef XEN static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); if (smap->length == 0) return (1); #ifndef PAE if (smap->base > 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); return (1); } #endif /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = physmap_idx + 2; for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (smap->base + smap->length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && smap->base + smap->length == physmap[insert_idx]) { physmap[insert_idx] = smap->base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && smap->base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += smap->length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = smap->base; physmap[insert_idx + 1] = smap->base + smap->length; return (1); } static void basemem_setup(void) { vm_paddr_t pa; pt_entry_t *pte; int i; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } #endif /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int has_smap, off, physmap_idx, pa_indx, da_indx; u_long physmem_tunable, memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size; #ifndef XEN int hasbrokenint12, i, res; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa; struct bios_smap *smap, *smapbase, *smapend; u_int32_t smapsize; caddr_t kmdp; #endif has_smap = 0; #if defined(XEN) Maxmem = xen_start_info->nr_pages - init_first; physmem = Maxmem; basemem = 0; physmap[0] = init_first << PAGE_SHIFT; physmap[1] = ptoa(Maxmem) - round_page(msgbufsize); physmap_idx = 0; #else #ifdef XBOX if (arch_i386_is_xbox) { /* * We queried the memory size before, so chop off 4MB for * the framebuffer and inform the OS of this. */ physmap[0] = 0; physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; physmap_idx = 0; goto physmap_done; } #endif bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Check if the loader supplied an SMAP memory map. If so, * use that and do not make any VM86 calls. */ physmap_idx = 0; smapbase = NULL; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); if (kmdp != NULL) smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { /* * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes SMAP. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); has_smap = 1; for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, &physmap_idx)) break; goto have_smap; } /* * Some newer BIOSes have a broken INT 12H implementation * which causes a kernel panic immediately. In this case, we * need use the SMAP to determine the base memory size. */ hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); if (hasbrokenint12 == 0) { /* Use INT12 to determine base memory size. */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; basemem_setup(); } /* * Fetch the memory map with INT 15:E820. Map page 1 R/W into * the kernel page table so we can use it as a buffer. The * kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); KASSERT(res != 0, ("vm86_getptr() failed: address not found")); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); have_smap: /* * If we didn't fetch the "base memory" size from INT12, * figure it out from the SMAP (or just guess). */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* XXX: If we couldn't find basemem from SMAP, just guess. */ if (basemem == 0) basemem = 640; basemem_setup(); } if (physmap[1] != 0) goto physmap_done; /* * If we failed to find an SMAP, figure out the extended * memory size. We will then build a simple memory map with * two segments, one for "base memory" and the second for * "extended memory". Note that "extended memory" starts at a * physical address of 1MB and that both basemem and extmem * are in units of 1KB. * * First, try to fetch the extended memory size via INT 15:E801. */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { /* * If INT15:E801 fails, this is our last ditch effort * to determine the extended memory size. Currently * we prefer the RTC value over INT15:88. */ #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: #endif /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); /* * By default enable the memory test on real hardware, and disable * it if we appear to be running in a VM. This avoids touching all * pages unnecessarily, which doesn't matter on real hardware but is * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; #ifndef XEN /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); #else phys_avail[0] = physfree; phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; dump_avail[0] = 0; dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; #endif /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); PT_UPDATES_FLUSH(); } #ifdef XEN #define MTOPSIZE (1<<(14 + PAGE_SHIFT)) void init386(first) int first; { unsigned long gdtmachpfn; int error, gsel_tss, metadata_missing, x, pa; size_t kstack0_sz; struct pcpu *pc; struct callback_register event = { .type = CALLBACKTYPE_event, .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, }; struct callback_register failsafe = { .type = CALLBACKTYPE_failsafe, .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, }; thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = KSTACK_PAGES; kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); metadata_missing = 0; if (xen_start_info->mod_start) { preload_metadata = (caddr_t)xen_start_info->mod_start; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if ((caddr_t)xen_start_info->cmd_line) kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); boothowto |= xen_boothowto(kern_envp); /* Init basic tunables, hz etc */ init_param1(); /* * XEN occupies a portion of the upper virtual address space * At its base it manages an array mapping machine page frames * to physical page frames - hence we need to be able to * access 4GB - (64MB - 4MB + 64k) */ gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); bzero(gdt, PAGE_SIZE); for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V); PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); lgdt(&r_gdt); gdtset = 1; if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { panic("set_trap_table failed - error %d\n", error); } error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); if (error == 0) error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); #if CONFIG_XEN_COMPAT <= 0x030002 if (error == -ENOXENSYS) HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback, GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); #endif pcpu_init(pc, 0, sizeof(struct pcpu)); for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) pmap_kenter(pa + KERNBASE, pa); dpcpu_init((void *)(first + KERNBASE), 0); first += DPCPU_SIZE; physfree += DPCPU_SIZE; init_first += DPCPU_SIZE / PAGE_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW); bzero(ldt, PAGE_SIZE); ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); default_proc_ldt.ldt_base = (caddr_t)ldt; default_proc_ldt.ldt_len = 6; _default_ldt = (int)&default_proc_ldt; PCPU_SET(currentldt, _default_ldt); PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); #if defined(XEN_PRIVILEGED) /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); #endif /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + kstack0_sz - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0)); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #ifdef PAE dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, msgbufsize); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #ifdef PAE thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; cpu_probe_amdc1e(); } #else void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x, pa; size_t kstack0_sz; struct pcpu *pc; thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = KSTACK_PAGES; kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) pmap_kenter(pa + KERNBASE, pa); dpcpu_init((void *)(first + KERNBASE), 0); first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif +#ifdef XENHVM + setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL, + GSEL(GCODE_SEL, SEL_KPL)); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); #ifdef XBOX /* * The following code queries the PCI ID of 0:0:0. For the XBOX, * This should be 0x10de / 0x02a5. * * This is exactly what Linux does. */ outl(0xcf8, 0x80000000); if (inl(0xcfc) == 0x02a510de) { arch_i386_is_xbox = 1; pic16l_setled(XBOX_LED_GREEN); /* * We are an XBOX, but we may have either 64MB or 128MB of * memory. The PCI host bridge should be programmed for this, * so we just query it. */ outl(0xcf8, 0x80000084); arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; } #endif /* XBOX */ /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + kstack0_sz - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #ifdef PAE dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, msgbufsize); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #ifdef PAE thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif } #endif void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO); if (tmp == 0) panic("kmem_malloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); #ifdef DEV_NPX npxgetregs(td); #else bzero(fpregs, sizeof(*fpregs)); #endif #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm, (struct save87 *)fpregs); else #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_user_save.sv_xmm); else #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, sizeof(*fpregs)); #ifdef DEV_NPX npxuserinited(td); #endif return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; mcp->mc_flags = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); #else mcp->mc_ownedfp = npxgetregs(td); bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) ((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env. en_mxcsr &= cpu_mxcsr_mask; #endif npxsetregs(td, (union savefpu *)&mcp->mc_fpstate); #endif } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 19e67cf48815..479168bbbea9 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -1,1587 +1,1596 @@ /*- * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" #if !defined(lint) #if !defined(SMP) #error How did you get here? #endif #ifndef DEV_APIC #error The apic device is required for SMP, add "device apic" to your config file. #endif #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) #error SMP not supported with CPU_DISABLE_CMPXCHG #endif #endif /* not lint */ #include #include #include #include /* cngetc() */ #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#ifdef XENHVM +#include +#endif + #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) /* * this code MUST be enabled here and in mpboot.s. * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) && !defined(PC98) #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) #define CHECK_INIT(D); \ CHECK_WRITE(0x34, (D)); \ CHECK_WRITE(0x35, (D)); \ CHECK_WRITE(0x36, (D)); \ CHECK_WRITE(0x37, (D)); \ CHECK_WRITE(0x38, (D)); \ CHECK_WRITE(0x39, (D)); #define CHECK_PRINT(S); \ printf("%s: %d, %d, %d, %d, %d, %d\n", \ (S), \ CHECK_READ(0x34), \ CHECK_READ(0x35), \ CHECK_READ(0x36), \ CHECK_READ(0x37), \ CHECK_READ(0x38), \ CHECK_READ(0x39)); #else /* CHECK_POINTS */ #define CHECK_INIT(D) #define CHECK_PRINT(S) #define CHECK_WRITE(A, D) #endif /* CHECK_POINTS */ /* lock region used by kernel profiling */ int mcount_lock; int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ extern struct pcpu __pcpu[]; /* AP uses this during bootstrap. Do not staticize. */ char *bootSTK; static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; static void *dpcpu; struct pcb stoppcbs[MAXCPU]; struct pcb **susppcbs = NULL; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; #ifdef COUNT_IPIS /* Interrupt counts. */ static u_long *ipi_preempt_counts[MAXCPU]; static u_long *ipi_ast_counts[MAXCPU]; u_long *ipi_invltlb_counts[MAXCPU]; u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif /* * Local data and functions. */ static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready = 0; /* * Store data from cpu_add() until later in the boot when we actually setup * the APs. */ struct cpu_info { int cpu_present:1; int cpu_bsp:1; int cpu_disabled:1; int cpu_hyperthread:1; } static cpu_info[MAX_APIC_ID + 1]; int cpu_apic_ids[MAXCPU]; int apic_cpuids[MAX_APIC_ID + 1]; /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; static int cpu_logical; /* logical cpus per core */ static int cpu_cores; /* cores per package */ static void assign_cpu_ids(void); static void install_ap_tramp(void); static void set_interrupt_apic_ids(void); static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ static int hyperthreading_allowed = 1; static void mem_range_AP_init(void) { if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) mem_range_softc.mr_op->initAP(&mem_range_softc); } static void topo_probe_amd(void) { int core_id_bits; int id; /* AMD processors do not support HTT. */ cpu_logical = 1; if ((amd_feature2 & AMDID2_CMP) == 0) { cpu_cores = 1; return; } core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >> AMDID_COREID_SIZE_SHIFT; if (core_id_bits == 0) { cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; return; } /* Fam 10h and newer should get here. */ for (id = 0; id <= MAX_APIC_ID; id++) { /* Check logical CPU availability. */ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) continue; /* Check if logical CPU has the same package ID. */ if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits)) continue; cpu_cores++; } } /* * Round up to the next power of two, if necessary, and then * take log2. * Returns -1 if argument is zero. */ static __inline int mask_width(u_int x) { return (fls(x << (1 - powerof2(x))) - 1); } static void topo_probe_0x4(void) { u_int p[4]; int pkg_id_bits; int core_id_bits; int max_cores; int max_logical; int id; /* Both zero and one here mean one logical processor per package. */ max_logical = (cpu_feature & CPUID_HTT) != 0 ? (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; if (max_logical <= 1) return; /* * Because of uniformity assumption we examine only * those logical processors that belong to the same * package as BSP. Further, we count number of * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ if (cpu_high >= 0x4) { cpuid_count(0x04, 0, p); max_cores = ((p[0] >> 26) & 0x3f) + 1; } else max_cores = 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; pkg_id_bits = core_id_bits + mask_width(max_cores); for (id = 0; id <= MAX_APIC_ID; id++) { /* Check logical CPU availability. */ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) continue; /* Check if logical CPU has the same package ID. */ if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) continue; cpu_cores++; /* Check if logical CPU has the same package and core IDs. */ if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) cpu_logical++; } KASSERT(cpu_cores >= 1 && cpu_logical >= 1, ("topo_probe_0x4 couldn't find BSP")); cpu_cores /= cpu_logical; hyperthreading_cpus = cpu_logical; } static void topo_probe_0xb(void) { u_int p[4]; int bits; int cnt; int i; int logical; int type; int x; /* We only support three levels for now. */ for (i = 0; i < 3; i++) { cpuid_count(0x0b, i, p); /* Fall back if CPU leaf 11 doesn't really exist. */ if (i == 0 && p[1] == 0) { topo_probe_0x4(); return; } bits = p[0] & 0x1f; logical = p[1] &= 0xffff; type = (p[2] >> 8) & 0xff; if (type == 0 || logical == 0) break; /* * Because of uniformity assumption we examine only * those logical processors that belong to the same * package as BSP. */ for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_disabled) continue; if (x >> bits == boot_cpu_id >> bits) cnt++; } if (type == CPUID_TYPE_SMT) cpu_logical = cnt; else if (type == CPUID_TYPE_CORE) cpu_cores = cnt; } if (cpu_logical == 0) cpu_logical = 1; cpu_cores /= cpu_logical; } /* * Both topology discovery code and code that consumes topology * information assume top-down uniformity of the topology. * That is, all physical packages must be identical and each * core in a package must have the same number of threads. * Topology information is queried only on BSP, on which this * code runs and for which it can query CPUID information. * Then topology is extrapolated on all packages using the * uniformity assumption. */ static void topo_probe(void) { static int cpu_topo_probed = 0; if (cpu_topo_probed) return; CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) topo_probe_amd(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) { /* * See Intel(R) 64 Architecture Processor * Topology Enumeration article for details. * * Note that 0x1 <= cpu_high < 4 case should be * compatible with topo_probe_0x4() logic when * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) * or it should trigger the fallback otherwise. */ if (cpu_high >= 0xb) topo_probe_0xb(); else if (cpu_high >= 0x1) topo_probe_0x4(); } /* * Fallback: assume each logical CPU is in separate * physical package. That is, no multi-core, no SMT. */ if (cpu_cores == 0 || cpu_logical == 0) cpu_cores = cpu_logical = 1; cpu_topo_probed = 1; } struct cpu_group * cpu_topo(void) { int cg_flags; /* * Determine whether any threading flags are * necessry. */ topo_probe(); if (cpu_logical > 1 && hyperthreading_cpus) cg_flags = CG_FLAG_HTT; else if (cpu_logical > 1) cg_flags = CG_FLAG_SMT; else cg_flags = 0; if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { printf("WARNING: Non-uniform processors.\n"); printf("WARNING: Using suboptimal topology.\n"); return (smp_topo_none()); } /* * No multi-core or hyper-threaded. */ if (cpu_logical * cpu_cores == 1) return (smp_topo_none()); /* * Only HTT no multi-core. */ if (cpu_logical > 1 && cpu_cores == 1) return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags)); /* * Only multi-core no HTT. */ if (cpu_cores > 1 && cpu_logical == 1) return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags)); /* * Both HTT and multi-core. */ return (smp_topo_2level(CG_SHARE_L2, cpu_cores, CG_SHARE_L1, cpu_logical, cg_flags)); } /* * Calculate usable address in base memory for AP trampoline code. */ u_int mp_bootaddress(u_int basemem) { boot_address = trunc_page(basemem); /* round down to 4k boundary */ if ((basemem - boot_address) < bootMP_size) boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ return boot_address; } void cpu_add(u_int apic_id, char boot_cpu) { if (apic_id > MAX_APIC_ID) { panic("SMP: APIC ID %d too high", apic_id); return; } KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", apic_id)); cpu_info[apic_id].cpu_present = 1; if (boot_cpu) { KASSERT(boot_cpu_id == -1, ("CPU %d claims to be BSP, but CPU %d already is", apic_id, boot_cpu_id)); boot_cpu_id = apic_id; cpu_info[apic_id].cpu_bsp = 1; } if (mp_ncpus < MAXCPU) { mp_ncpus++; mp_maxid = mp_ncpus - 1; } if (bootverbose) printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : "AP"); } void cpu_mp_setmaxid(void) { /* * mp_maxid should be already set by calls to cpu_add(). * Just sanity check its value here. */ if (mp_ncpus == 0) KASSERT(mp_maxid == 0, ("%s: mp_ncpus is zero, but mp_maxid is not", __func__)); else if (mp_ncpus == 1) mp_maxid = 0; else KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } int cpu_mp_probe(void) { /* * Always record BSP in CPU map so that the mbuf init code works * correctly. */ CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup * the variables to represent a system with a single CPU * with an id of 0. */ mp_ncpus = 1; return (0); } /* At least one CPU was found. */ if (mp_ncpus == 1) { /* * One CPU was found, so this must be a UP system with * an I/O APIC. */ mp_maxid = 0; return (0); } /* At least two CPUs were found. */ return (1); } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for lazy pmap release */ setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); set_interrupt_apic_ids(); } /* * Print various information about the SMP system hardware and setup. */ void cpu_mp_announce(void) { const char *hyperthread; int i; printf("FreeBSD/SMP: %d package(s) x %d core(s)", mp_ncpus / (cpu_cores * cpu_logical), cpu_cores); if (hyperthreading_cpus > 1) printf(" x %d HTT threads", cpu_logical); else if (cpu_logical > 1) printf(" x %d SMT threads", cpu_logical); printf("\n"); /* List active CPUs first. */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1; i < mp_ncpus; i++) { if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread, cpu_apic_ids[i]); } /* List disabled CPUs last. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled) continue; if (cpu_info[i].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread, i); } } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; vm_offset_t addr; int gsel_tss; int x, myid; u_int cpuid, cr0; /* bootAP is set in start_ap() to our ID. */ myid = bootAP; /* Get per-cpu data */ pc = &__pcpu[myid]; /* prime data page for it to use */ pcpu_init(pc, myid, sizeof(struct pcpu)); dpcpu_init(dpcpu, myid); pc->pc_apic_id = cpu_apic_ids[myid]; pc->pc_prvspace = pc; pc->pc_curthread = 0; gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) &gdt[myid * NGDT]; lgdt(&r_gdt); /* does magic intra-segment return */ lidt(&r_idt); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); CHECK_WRITE(0x38, 5); /* Disable local APIC just to be sure. */ lapic_disable(); /* signal our startup to the BSP. */ mp_naps++; CHECK_WRITE(0x39, 6); /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); /* BSP may have changed PTD while we were waiting */ invltlb(); for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) invlpg(addr); #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif /* Initialize the PAT MSR if present. */ pmap_init_pat(); /* set up CPU registers and state */ cpu_setregs(); /* set up FPU state on the AP */ npxinit(); /* set up SSE registers */ enable_sse(); +#ifdef XENHVM + /* register vcpu_info area */ + xen_hvm_init_cpu(); +#endif + #ifdef PAE /* Enable the PTE no-execute bit. */ if ((amd_feature & AMDID_NX) != 0) { uint64_t msr; msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); } #endif /* A quick check from sanity claus */ cpuid = PCPU_GET(cpuid); if (PCPU_GET(apic_id) != lapic_id()) { printf("SMP: cpuid = %d\n", cpuid); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); panic("cpuid mismatch! boom!!"); } /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); mca_init(); mtx_lock_spin(&ap_boot_mtx); /* Init local apic for irq's */ lapic_setup(1); /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); smp_cpus++; CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); printf("SMP: AP CPU #%d Launched!\n", cpuid); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) CPU_SET(cpuid, &logical_cpus_mask); if (bootverbose) lapic_dump("AP"); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); smp_active = 1; /* historic */ } mtx_unlock_spin(&ap_boot_mtx); /* Wait until all the AP's are up. */ while (smp_started == 0) ia32_pause(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); /* Enter the scheduler. */ sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ } /******************************************************************* * local functions and data */ /* * We tell the I/O APIC code about all the CPUs we want to receive * interrupts. If we don't want certain CPUs to receive IRQs we * can simply not tell the I/O APIC code about them in this function. * We also do not tell it about the BSP since it tells itself about * the BSP internally to work with UP kernels and on UP machines. */ static void set_interrupt_apic_ids(void) { u_int i, apic_id; for (i = 0; i < MAXCPU; i++) { apic_id = cpu_apic_ids[i]; if (apic_id == -1) continue; if (cpu_info[apic_id].cpu_bsp) continue; if (cpu_info[apic_id].cpu_disabled) continue; /* Don't let hyperthreads service interrupts. */ if (hyperthreading_cpus > 1 && apic_id % hyperthreading_cpus != 0) continue; intr_add_cpu(i); } } /* * Assign logical CPU IDs to local APICs. */ static void assign_cpu_ids(void) { u_int i; TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", &hyperthreading_allowed); /* Check for explicitly disabled CPUs. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) continue; if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; /* * Don't use HT CPU if it has been disabled by a * tunable. */ if (hyperthreading_allowed == 0) { cpu_info[i].cpu_disabled = 1; continue; } } /* Don't use this CPU if it has been disabled by a tunable. */ if (resource_disabled("lapic", i)) { cpu_info[i].cpu_disabled = 1; continue; } } if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { hyperthreading_cpus = 0; cpu_logical = 1; } /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. * * To minimize confusion for userland, we attempt to number * CPUs such that all threads and cores in a package are * grouped together. For now we assume that the BSP is always * the first thread in a package and just start adding APs * starting with the BSP's APIC ID. */ mp_ncpus = 1; cpu_apic_ids[0] = boot_cpu_id; apic_cpuids[boot_cpu_id] = 0; for (i = boot_cpu_id + 1; i != boot_cpu_id; i == MAX_APIC_ID ? i = 0 : i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || cpu_info[i].cpu_disabled) continue; if (mp_ncpus < MAXCPU) { cpu_apic_ids[mp_ncpus] = i; apic_cpuids[i] = mp_ncpus; mp_ncpus++; } else cpu_info[i].cpu_disabled = 1; } KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } /* * start each AP in our list */ /* Lowest 1MB is already mapped: don't touch*/ #define TMPMAP_START 1 static int start_all_aps(void) { #ifndef PC98 u_char mpbiosreason; #endif u_int32_t mpbioswarmvec; int apic_id, cpu, i; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* install the AP 1st level boot code */ install_ap_tramp(); /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); #ifndef PC98 outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); #endif /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ for (i = TMPMAP_START; i < NKPT; i++) PTD[i] = PTD[KPTDI + i]; invltlb(); /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up a boot stack data page */ bootstacks[cpu] = (char *)kmem_malloc(kernel_arena, KSTACK_PAGES * PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); #ifndef PC98 outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ #endif bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4; bootAP = cpu; /* attempt to start the Application Processor */ CHECK_INIT(99); /* setup checkpoints */ if (!start_ap(apic_id)) { printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); CHECK_PRINT("trace"); /* show checkpoints */ /* better panic as the AP may be running loose */ printf("panic y/n? [y] "); if (cngetc() != 'n') panic("bye-bye"); } CHECK_PRINT("trace"); /* show checkpoints */ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; #ifndef PC98 outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); #endif /* Undo V==P hack from above */ for (i = TMPMAP_START; i < NKPT; i++) PTD[i] = 0; pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); /* number of APs actually started */ return mp_naps; } /* * load the 1st level AP boot code into base memory. */ /* targets for relocation */ extern void bigJump(void); extern void bootCodeSeg(void); extern void bootDataSeg(void); extern void MPentry(void); extern u_int MP_GDT; extern u_int mp_gdtbase; static void install_ap_tramp(void) { int x; int size = *(int *) ((u_long) & bootMP_size); vm_offset_t va = boot_address + KERNBASE; u_char *src = (u_char *) ((u_long) bootMP); u_char *dst = (u_char *) va; u_int boot_base = (u_int) bootMP; u_int8_t *dst8; u_int16_t *dst16; u_int32_t *dst32; KASSERT (size <= PAGE_SIZE, ("'size' do not fit into PAGE_SIZE, as expected.")); pmap_kenter(va, boot_address); pmap_invalidate_page (kernel_pmap, va); for (x = 0; x < size; ++x) *dst++ = *src++; /* * modify addresses in code we just moved to basemem. unfortunately we * need fairly detailed info about mpboot.s for this to work. changes * to mpboot.s might require changes here. */ /* boot code is located in KERNEL space */ dst = (u_char *) va; /* modify the lgdt arg */ dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); /* modify the ljmp target for MPentry() */ dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); *dst32 = ((u_int) MPentry - KERNBASE); /* modify the target for boot code segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; /* modify the target for boot data segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } #ifdef COUNT_XINVLTLB_HITS u_int xhits_gbl[MAXCPU]; u_int xhits_pg[MAXCPU]; u_int xhits_rng[MAXCPU]; static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, sizeof(xhits_gbl), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, sizeof(xhits_pg), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, sizeof(xhits_rng), "IU", ""); u_int ipi_global; u_int ipi_page; u_int ipi_range; u_int ipi_range_size; SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 0, ""); u_int ipi_masked_global; u_int ipi_masked_page; u_int ipi_masked_range; u_int ipi_masked_range_size; SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, &ipi_masked_global, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, &ipi_masked_page, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, &ipi_masked_range, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, &ipi_masked_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ /* * Init and startup IPI. */ void ipi_startup(int apic_id, int vector) { /* * first we do an INIT IPI: this INIT IPI might be run, resetting * and running the target CPU. OR this INIT IPI might be latched (P5 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be * ignored. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); lapic_ipi_wait(-1); DELAY(10000); /* wait ~10mS */ /* * next we do a STARTUP IPI: the previous INIT IPI might still be * latched, (P5 bug) this 1st STARTUP would then terminate * immediately, and the previously started INIT IPI would continue. OR * the previous INIT IPI has already run. and this STARTUP IPI will * run. OR the previous INIT IPI was ignored. and this STARTUP IPI * will run. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ /* * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ } /* * Send an IPI to specified CPU handling the bitmap logic. */ static void ipi_send_cpu(int cpu, u_int ipi) { u_int bitmap, old_pending, new_pending; KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; do { old_pending = cpu_ipi_pending[cpu]; new_pending = old_pending | bitmap; } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], old_pending, new_pending)); if (old_pending) return; } lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); } /* * Flush the TLB on all other CPU's */ static void smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) { u_int ncpu; ncpu = mp_ncpus - 1; /* does not shootdown self */ if (ncpu < 1) return; /* no other cpus */ if (!(read_eflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); ipi_all_but_self(vector); while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } static void smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; if (CPU_ISFULLSET(&mask)) { if (othercpus < 1) return; } else { CPU_CLR(PCPU_GET(cpuid), &mask); if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); if (CPU_ISFULLSET(&mask)) { ncpu = othercpus; ipi_all_but_self(vector); } else { ncpu = 0; while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, vector); ipi_send_cpu(cpu, vector); ncpu++; } } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } void smp_cache_flush(void) { if (smp_started) smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); } void smp_invltlb(void) { if (smp_started) { smp_tlb_shootdown(IPI_INVLTLB, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif } } void smp_invlpg(vm_offset_t addr) { if (smp_started) { smp_tlb_shootdown(IPI_INVLPG, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif } } void smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void smp_masked_invltlb(cpuset_t mask) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_global++; #endif } } void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_page++; #endif } } void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_masked_range++; ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void ipi_bitmap_handler(struct trapframe frame) { struct trapframe *oldframe; struct thread *td; int cpu = PCPU_GET(cpuid); u_int ipi_bitmap; critical_enter(); td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = &frame; ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); if (ipi_bitmap & (1 << IPI_PREEMPT)) { #ifdef COUNT_IPIS (*ipi_preempt_counts[cpu])++; #endif sched_preempt(td); } if (ipi_bitmap & (1 << IPI_AST)) { #ifdef COUNT_IPIS (*ipi_ast_counts[cpu])++; #endif /* Nothing to do for AST */ } if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { #ifdef COUNT_IPIS (*ipi_hardclock_counts[cpu])++; #endif hardclockintr(); } td->td_intr_frame = oldframe; td->td_intr_nesting_level--; critical_exit(); } /* * send an IPI to a set of cpus. */ void ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } /* * send an IPI to a specific CPU. */ void ipi_cpu(int cpu, u_int ipi) { /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } /* * send an IPI to all CPUs EXCEPT myself */ void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(other_cpus, ipi); return; } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int ipi_nmi_handler() { u_int cpuid; /* * As long as there is not a simple way to know about a NMI's * source, if the bitmask for the current CPU is present in * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) return (1); CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); cpustop_handler(); return (0); } /* * Handle an IPI_STOP by saving our current context and spinning until we * are resumed. */ void cpustop_handler(void) { u_int cpu; cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); cpustop_restartfunc = NULL; } } /* * Handle an IPI_SUSPEND by saving our current context and spinning until we * are resumed. */ void cpususpend_handler(void) { u_int cpu; cpu = PCPU_GET(cpuid); if (savectx(susppcbs[cpu])) { wbinvd(); CPU_SET_ATOMIC(cpu, &suspended_cpus); } else { pmap_init_pat(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); /* Indicate that we are resumed */ CPU_CLR_ATOMIC(cpu, &suspended_cpus); } /* Wait for resume */ while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); /* Resume MCA and local APIC */ mca_resume(); lapic_setup(0); CPU_CLR_ATOMIC(cpu, &started_cpus); } /* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ static void release_aps(void *dummy __unused) { if (mp_ncpus == 1) return; atomic_store_rel_int(&aps_ready, 1); while (smp_started == 0) ia32_pause(); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. */ static void mp_ipi_intrcnt(void *dummy) { char buf[64]; int i; CPU_FOREACH(i) { snprintf(buf, sizeof(buf), "cpu%d:invltlb", i); intrcnt_add(buf, &ipi_invltlb_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlrng", i); intrcnt_add(buf, &ipi_invlrng_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlpg", i); intrcnt_add(buf, &ipi_invlpg_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlcache", i); intrcnt_add(buf, &ipi_invlcache_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:preempt", i); intrcnt_add(buf, &ipi_preempt_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:ast", i); intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i); intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); #endif diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h index a0e622e8785e..5d1f52249de5 100644 --- a/sys/i386/include/apicvar.h +++ b/sys/i386/include/apicvar.h @@ -1,231 +1,232 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_APICVAR_H_ #define _MACHINE_APICVAR_H_ /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define MAX_APIC_ID 0xfe #define APIC_ID_ALL 0xff /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) #define IPI_LAZYPMAP (APIC_IPI_INTS + 5) /* Lazy pmap release. */ /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 6) /* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ #define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #define LVT_LINT0 0 #define LVT_LINT1 1 #define LVT_TIMER 2 #define LVT_ERROR 3 #define LVT_PMC 4 #define LVT_THERMAL 5 #define LVT_CMCI 6 #define LVT_MAX LVT_CMCI #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI /* * An APIC enumerator is a psuedo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint), IDTVEC(spuriousint), IDTVEC(timerint); extern vm_paddr_t lapic_paddr; extern int apic_cpuids[]; u_int apic_alloc_vector(u_int apic_id, u_int irq); u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align); void apic_disable_vector(u_int apic_id, u_int vector); void apic_enable_vector(u_int apic_id, u_int vector); void apic_free_vector(u_int apic_id, u_int vector, u_int irq); u_int apic_idt_to_irq(u_int apic_id, u_int vector); void apic_register_enumerator(struct apic_enumerator *enumerator); u_int apic_cpuid(u_int apic_id); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); void lapic_create(u_int apic_id, int boot_cpu); void lapic_disable(void); void lapic_disable_pmc(void); void lapic_dump(const char *str); void lapic_enable_cmc(void); int lapic_enable_pmc(void); void lapic_eoi(void); int lapic_id(void); void lapic_init(vm_paddr_t addr); int lapic_intr_pending(u_int vector); void lapic_ipi_raw(register_t icrlo, u_int dest); void lapic_ipi_vectored(u_int vector, int dest); int lapic_ipi_wait(int delay); void lapic_handle_cmc(void); void lapic_handle_error(void); void lapic_handle_intr(int vector, struct trapframe *frame); void lapic_handle_timer(struct trapframe *frame); void lapic_reenable_pmc(void); void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol); int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(int boot); +void xen_intr_handle_upcall(struct trapframe *frame); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h index b3dd122301e5..6bbe378a9e71 100644 --- a/sys/i386/include/intr_machdep.h +++ b/sys/i386/include/intr_machdep.h @@ -1,164 +1,182 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MACHINE_INTR_MACHDEP_H__ #define __MACHINE_INTR_MACHDEP_H__ #ifdef _KERNEL /* * The maximum number of I/O interrupts we allow. This number is rather * arbitrary as it is just the maximum IRQ resource value. The interrupt * source for a given IRQ maps that I/O interrupt to device interrupt * source whether it be a pin on an interrupt controller or an MSI interrupt. * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device * interrupts allocate IDT vectors on demand. Currently we have 191 IDT * vectors available for device interrupts. On many systems with I/O APICs, * a lot of the IRQs are not used, so this number can be much larger than * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. + * IRQ values from 256 to 767 are used by MSI. When running under the Xen + * Hypervisor, IRQ values from 768 to 4863 are available for binding to + * event channel events. We leave 255 unused to avoid confusion since 255 is + * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) +#ifdef XENHVM +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT \ + (FIRST_MSI_INT + NUM_MSI_INTS) +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#elif defined(XEN) +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT 0 +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#else /* !XEN && !XENHVM */ +#define NUM_EVTCHN_INTS 0 +#endif +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. * - 2 counters for each I/O interrupt. * - 1 counter for each CPU for lapic timer. * - 9 counters for each CPU for IPI counters for SMP. */ #ifdef SMP #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 9) * MAXCPU) #else #define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) #endif #ifndef LOCORE typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); #define IDTVEC(name) __CONCAT(X,name) struct intsrc; /* * Methods that a PIC provides to mask/unmask a given interrupt source, * "turn on" the interrupt on the CPU side by setting up an IDT entry, and * return the vector associated with this source. */ struct pic { void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); void (*pic_enable_intr)(struct intsrc *); void (*pic_disable_intr)(struct intsrc *); int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); void (*pic_resume)(struct pic *); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); TAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ enum { PIC_EOI, PIC_NO_EOI, }; /* * An interrupt source. The upper-layer code uses the PIC methods to * control a given source. The lower-layer PIC drivers can store additional * private data in a given interrupt source such as an interrupt pin number * or an I/O APIC pointer. */ struct intsrc { struct pic *is_pic; struct intr_event *is_event; u_long *is_count; u_long *is_straycount; u_int is_index; u_int is_handlers; }; struct trapframe; extern struct mtx icu_lock; extern int elcr_found; #ifndef DEV_ATPIC void atpic_reset(void); #endif /* XXX: The elcr_* prototypes probably belong somewhere else. */ int elcr_probe(void); enum intr_trigger elcr_read_trigger(u_int irq); void elcr_resume(void); void elcr_write_trigger(u_int irq, enum intr_trigger trigger); #ifdef SMP void intr_add_cpu(u_int cpu); #endif int intr_add_handler(const char *name, int vector, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); #ifdef SMP int intr_bind(u_int vector, u_char cpu); #endif int intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol); int intr_describe(u_int vector, void *ih, const char *descr); void intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame); u_int intr_next_cpu(void); struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); int msi_alloc(device_t dev, int count, int maxcount, int *irqs); void msi_init(void); int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int* irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ #endif /* !__MACHINE_INTR_MACHDEP_H__ */ diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index 3606d12dd1be..3a684bfde245 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -1,281 +1,269 @@ /*- * Copyright (c) Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #ifndef _SYS_CDEFS_H_ #error "sys/cdefs.h is a prerequisite for this file" #endif #include #include /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. * The reason for doing it via a struct is so that an array of pointers * to each CPU's data can be set up for things like "check curproc on all * other processors" */ #if defined(XEN) || defined(XENHVM) #ifndef NR_VIRQS #define NR_VIRQS 24 #endif #ifndef NR_IPIS #define NR_IPIS 2 #endif #endif #if defined(XEN) /* These are peridically updated in shared_info, and then copied here. */ struct shadow_time_info { uint64_t tsc_timestamp; /* TSC at last update of time vals. */ uint64_t system_timestamp; /* Time, in nanosecs, since boot. */ uint32_t tsc_to_nsec_mul; uint32_t tsc_to_usec_mul; int tsc_shift; uint32_t version; }; #define PCPU_XEN_FIELDS \ ; \ u_int pc_cr3; /* track cr3 for R1/R3*/ \ vm_paddr_t *pc_pdir_shadow; \ uint64_t pc_processed_system_time; \ struct shadow_time_info pc_shadow_time; \ - int pc_resched_irq; \ - int pc_callfunc_irq; \ - int pc_virq_to_irq[NR_VIRQS]; \ - int pc_ipi_to_irq[NR_IPIS]; \ - char __pad[77] + char __pad[189] -#elif defined(XENHVM) - -#define PCPU_XEN_FIELDS \ - ; \ - unsigned int pc_last_processed_l1i; \ - unsigned int pc_last_processed_l2i; \ - char __pad[229] - -#else /* !XEN && !XENHVM */ +#else /* !XEN */ #define PCPU_XEN_FIELDS \ ; \ char __pad[237] #endif #define PCPU_MD_FIELDS \ char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct i386tss pc_common_tss; \ struct segment_descriptor pc_common_tssd; \ struct segment_descriptor *pc_tss_gdt; \ struct segment_descriptor *pc_fsgs_gdt; \ int pc_currentldt; \ u_int pc_acpi_id; /* ACPI CPU id */ \ u_int pc_apic_id; \ int pc_private_tss; /* Flag indicating private tss*/\ u_int pc_cmci_mask /* MCx banks for CMCI */ \ PCPU_XEN_FIELDS #ifdef _KERNEL #ifdef lint extern struct pcpu *pcpup; #define PCPU_GET(member) (pcpup->pc_ ## member) #define PCPU_ADD(member, val) (pcpup->pc_ ## member += (val)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&pcpup->pc_ ## member) #define PCPU_SET(member, val) (pcpup->pc_ ## member = (val)) #elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) /* * Evaluates to the byte offset of the per-cpu variable name. */ #define __pcpu_offset(name) \ __offsetof(struct pcpu, name) /* * Evaluates to the type of the per-cpu variable name. */ #define __pcpu_type(name) \ __typeof(((struct pcpu *)0)->name) /* * Evaluates to the address of the per-cpu variable name. */ #define __PCPU_PTR(name) __extension__ ({ \ __pcpu_type(name) *__p; \ \ __asm __volatile("movl %%fs:%1,%0; addl %2,%0" \ : "=r" (__p) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \ "i" (__pcpu_offset(name))); \ \ __p; \ }) /* * Evaluates to the value of the per-cpu variable name. */ #define __PCPU_GET(name) __extension__ ({ \ __pcpu_type(name) __res; \ struct __s { \ u_char __b[MIN(sizeof(__res), 4)]; \ } __s; \ \ if (sizeof(__res) == 1 || sizeof(__res) == 2 || \ sizeof(__res) == 4) { \ __asm __volatile("mov %%fs:%1,%0" \ : "=r" (__s) \ : "m" (*(struct __s *)(__pcpu_offset(name)))); \ *(struct __s *)(void *)&__res = __s; \ } else { \ __res = *__PCPU_PTR(name); \ } \ __res; \ }) /* * Adds a value of the per-cpu counter name. The implementation * must be atomic with respect to interrupts. */ #define __PCPU_ADD(name, val) do { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__val), 4)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("add %1,%%fs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else \ *__PCPU_PTR(name) += __val; \ } while (0) /* * Increments the value of the per-cpu counter name. The implementation * must be atomic with respect to interrupts. */ #define __PCPU_INC(name) do { \ CTASSERT(sizeof(__pcpu_type(name)) == 1 || \ sizeof(__pcpu_type(name)) == 2 || \ sizeof(__pcpu_type(name)) == 4); \ if (sizeof(__pcpu_type(name)) == 1) { \ __asm __volatile("incb %%fs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } else if (sizeof(__pcpu_type(name)) == 2) { \ __asm __volatile("incw %%fs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } else if (sizeof(__pcpu_type(name)) == 4) { \ __asm __volatile("incl %%fs:%0" \ : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\ : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\ } \ } while (0) /* * Sets the value of the per-cpu variable name to value val. */ #define __PCPU_SET(name, val) do { \ __pcpu_type(name) __val; \ struct __s { \ u_char __b[MIN(sizeof(__val), 4)]; \ } __s; \ \ __val = (val); \ if (sizeof(__val) == 1 || sizeof(__val) == 2 || \ sizeof(__val) == 4) { \ __s = *(struct __s *)(void *)&__val; \ __asm __volatile("mov %1,%%fs:%0" \ : "=m" (*(struct __s *)(__pcpu_offset(name))) \ : "r" (__s)); \ } else { \ *__PCPU_PTR(name) = __val; \ } \ } while (0) #define PCPU_GET(member) __PCPU_GET(pc_ ## member) #define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val) #define PCPU_INC(member) __PCPU_INC(pc_ ## member) #define PCPU_PTR(member) __PCPU_PTR(pc_ ## member) #define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val) #define OFFSETOF_CURTHREAD 0 #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnull-dereference" #endif static __inline __pure2 struct thread * __curthread(void) { struct thread *td; __asm("movl %%fs:%1,%0" : "=r" (td) : "m" (*(char *)OFFSETOF_CURTHREAD)); return (td); } #ifdef __clang__ #pragma clang diagnostic pop #endif #define curthread (__curthread()) #define OFFSETOF_CURPCB 16 static __inline __pure2 struct pcb * __curpcb(void) { struct pcb *pcb; __asm("movl %%fs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB)); return (pcb); } #define curpcb (__curpcb()) #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */ #error "this file needs to be ported to your compiler" #endif /* lint, etc. */ #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index 94b63ca14bb2..0303c604b52b 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -1,465 +1,467 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ /* * Page-directory and page-table entries follow this format, with a few * of the fields not present here and there, depending on a lot of things. */ /* ---- Intel Nomenclature ---- */ #define PG_V 0x001 /* P Valid */ #define PG_RW 0x002 /* R/W Read/Write */ #define PG_U 0x004 /* U/S User/Supervisor */ #define PG_NC_PWT 0x008 /* PWT Write through */ #define PG_NC_PCD 0x010 /* PCD Cache disable */ #define PG_A 0x020 /* A Accessed */ #define PG_M 0x040 /* D Dirty */ #define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ #define PG_PTE_PAT 0x080 /* PAT PAT index */ #define PG_G 0x100 /* G Global */ #define PG_AVAIL1 0x200 /* / Available for system */ #define PG_AVAIL2 0x400 /* < programmers use */ #define PG_AVAIL3 0x800 /* \ */ #define PG_PDE_PAT 0x1000 /* PAT PAT index */ #ifdef PAE #define PG_NX (1ull<<63) /* No-execute */ #endif /* Our various interpretations of the above */ #define PG_W PG_AVAIL1 /* "Wired" pseudoflag */ #define PG_MANAGED PG_AVAIL2 #ifdef PAE #define PG_FRAME (0x000ffffffffff000ull) #define PG_PS_FRAME (0x000fffffffe00000ull) #else #define PG_FRAME (~PAGE_MASK) #define PG_PS_FRAME (0xffc00000) #endif #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ /* Page level cache control fields used to determine the PAT type */ #define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD) #define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD) /* * Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding * 4KB (PTE) page mappings have identical settings for the following fields: */ #define PG_PTE_PROMOTE (PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \ PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V) /* * Page Protection Exception bits */ #define PGEX_P 0x01 /* Protection violation vs. not present */ #define PGEX_W 0x02 /* during a Write cycle */ #define PGEX_U 0x04 /* access from User mode (UPL) */ #define PGEX_RSV 0x08 /* reserved PTE field is non-zero */ #define PGEX_I 0x10 /* during an instruction fetch */ /* * Size of Kernel address space. This is the number of page table pages * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). * For PAE, the page table page unit size is 2MB. This means that 512 pages * is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE. */ #ifndef KVA_PAGES #ifdef PAE #define KVA_PAGES 512 #else #define KVA_PAGES 256 #endif #endif /* * Pte related macros */ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< #include #include #include #include #ifdef PAE typedef uint64_t pdpt_entry_t; typedef uint64_t pd_entry_t; typedef uint64_t pt_entry_t; #define PTESHIFT (3) #define PDESHIFT (3) #else typedef uint32_t pd_entry_t; typedef uint32_t pt_entry_t; #define PTESHIFT (2) #define PDESHIFT (2) #endif /* * Address of current address space page table maps and directories. */ #ifdef _KERNEL extern pt_entry_t PTmap[]; extern pd_entry_t PTD[]; extern pd_entry_t PTDpde[]; #ifdef PAE extern pdpt_entry_t *IdlePDPT; #endif extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ /* * Translate a virtual address to the kernel virtual address of its page table * entry (PTE). This can be used recursively. If the address of a PTE as * previously returned by this macro is itself given as the argument, then the * address of the page directory entry (PDE) that maps the PTE will be * returned. * * This macro may be used before pmap_bootstrap() is called. */ #define vtopte(va) (PTmap + i386_btop(va)) /* * Translate a virtual address to its physical address. * * This macro may be used before pmap_bootstrap() is called. */ #define vtophys(va) pmap_kextract((vm_offset_t)(va)) #if defined(XEN) #include -#include + +#include + #include #include extern pt_entry_t pg_nx; #define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M) #define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma))) #define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m))) #define VTOM(va) xpmap_ptom(VTOP(va)) static __inline vm_paddr_t pmap_kextract_ma(vm_offset_t va) { vm_paddr_t ma; if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) { ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1)); } else { ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK); } return ma; } static __inline vm_paddr_t pmap_kextract(vm_offset_t va) { return xpmap_mtop(pmap_kextract_ma(va)); } #define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va))) vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va); void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa); void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len); void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len); static __inline pt_entry_t pte_load_store(pt_entry_t *ptep, pt_entry_t v) { pt_entry_t r; r = *ptep; PT_SET_VA(ptep, v, TRUE); return (r); } static __inline pt_entry_t pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v) { pt_entry_t r; r = *ptep; PT_SET_VA_MA(ptep, v, TRUE); return (r); } #define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL) #define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte) #define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte) #define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte) #elif !defined(XEN) /* * KPTmap is a linear mapping of the kernel page table. It differs from the * recursive mapping in two ways: (1) it only provides access to kernel page * table pages, and not user page table pages, and (2) it provides access to * a kernel page table page after the corresponding virtual addresses have * been promoted to a 2/4MB page mapping. * * KPTmap is first initialized by locore to support just NPKT page table * pages. Later, it is reinitialized by pmap_bootstrap() to allow for * expansion of the kernel page table. */ extern pt_entry_t *KPTmap; /* * Extract from the kernel page table the physical address that is mapped by * the given virtual address "va". * * This function may be used before pmap_bootstrap() is called. */ static __inline vm_paddr_t pmap_kextract(vm_offset_t va) { vm_paddr_t pa; if ((pa = PTD[va >> PDRSHIFT]) & PG_PS) { pa = (pa & PG_PS_FRAME) | (va & PDRMASK); } else { /* * Beware of a concurrent promotion that changes the PDE at * this point! For example, vtopte() must not be used to * access the PTE because it would use the new PDE. It is, * however, safe to use the old PDE because the page table * page is preserved by the promotion. */ pa = KPTmap[i386_btop(va)]; pa = (pa & PG_FRAME) | (va & PAGE_MASK); } return (pa); } #endif #if !defined(XEN) #define PT_UPDATES_FLUSH() #endif #if defined(PAE) && !defined(XEN) #define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new) #define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte) #define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0) #define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte) extern pt_entry_t pg_nx; #elif !defined(PAE) && !defined(XEN) #define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new) #define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte) #define pte_load_clear(ptep) atomic_swap_int(ptep, 0) #define pte_store(ptep, pte) do { \ *(u_int *)(ptep) = (u_int)(pte); \ } while (0) #endif /* PAE */ #define pte_clear(ptep) pte_store(ptep, 0) #define pde_store(pdep, pde) pte_store(pdep, pde) #endif /* _KERNEL */ /* * Pmap stuff */ struct pv_entry; struct pv_chunk; struct md_page { TAILQ_HEAD(,pv_entry) pv_list; int pat_mode; }; struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ #ifdef PAE pdpt_entry_t *pm_pdpt; /* KVA of page director pointer table */ #endif struct vm_radix pm_root; /* spare page table pages */ }; typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) #define PMAP_LOCK_ASSERT(pmap, type) \ mtx_assert(&(pmap)->pm_mtx, (type)) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) #define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) #endif /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_list. */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ #define _NPCM 11 #define _NPCPV 336 struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */ TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; #ifdef _KERNEL extern caddr_t CADDR1; extern pt_entry_t *CMAP1; extern vm_paddr_t phys_avail[]; extern vm_paddr_t dump_avail[]; extern int pseflag; extern int pgeflag; extern char *ptvmmap; /* poor name! */ extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; #define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) #define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz)) /* * Only the following functions or macros may be used before pmap_bootstrap() * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and * vtopte(). */ void pmap_bootstrap(vm_paddr_t); int pmap_cache_bits(int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); void pmap_init_pat(void); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void *pmap_kenter_temporary(vm_paddr_t pa, int i); void pmap_kremove(vm_offset_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); void *pmap_mapdev(vm_paddr_t, vm_size_t); void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); void pmap_unmapdev(vm_offset_t, vm_size_t); pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2; void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); void pmap_invalidate_cache(void); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ diff --git a/sys/i386/include/xen/xen-os.h b/sys/i386/include/xen/xen-os.h index 257202ec9f94..e15d66870062 100644 --- a/sys/i386/include/xen/xen-os.h +++ b/sys/i386/include/xen/xen-os.h @@ -1,367 +1,291 @@ -/****************************************************************************** - * os.h +/***************************************************************************** + * i386/xen/xen-os.h * - * random collection of macros and definition + * Random collection of macros and definition + * + * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * $FreeBSD$ */ -#ifndef _XEN_OS_H_ -#define _XEN_OS_H_ -#include +#ifndef _MACHINE_XEN_XEN_OS_H_ +#define _MACHINE_XEN_XEN_OS_H_ #ifdef PAE #define CONFIG_X86_PAE #endif -#ifdef LOCORE -#define __ASSEMBLY__ -#endif - -#if !defined(__XEN_INTERFACE_VERSION__) -#define __XEN_INTERFACE_VERSION__ 0x00030208 -#endif - -#define GRANT_REF_INVALID 0xffffffff - -#include - /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ /* Force a proper event-channel callback from Xen. */ void force_evtchn_callback(void); -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__ ( "rep;nop" : : : "memory" ); +} +#define cpu_relax() rep_nop() -#ifndef vtophys -#include -#include -#include -#endif +#ifndef XENHVM +void xc_printf(const char *fmt, ...); -extern int gdtset; #ifdef SMP +extern int gdtset; + #include /* XXX for pcpu.h */ #include /* XXX for PCPU_GET */ static inline int smp_processor_id(void) { - if (likely(gdtset)) + if (__predict_true(gdtset)) return PCPU_GET(cpuid); return 0; } #else #define smp_processor_id() 0 #endif -#ifndef NULL -#define NULL (void *)0 -#endif - #ifndef PANIC_IF -#define PANIC_IF(exp) if (unlikely(exp)) {printk("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} -#endif - -extern shared_info_t *HYPERVISOR_shared_info; - -/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented - a mechanism by which the user can annotate likely branch directions and - expect the blocks to be reordered appropriately. Define __builtin_expect - to nothing for earlier compilers. */ - -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) -{ - __asm__ __volatile__ ( "rep;nop" : : : "memory" ); -} -#define cpu_relax() rep_nop() - - -#if __GNUC__ == 2 && __GNUC_MINOR__ < 96 -#define __builtin_expect(x, expected_value) (x) +#define PANIC_IF(exp) if (__predict_false(exp)) {printf("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} #endif -#define per_cpu(var, cpu) (pcpu_find((cpu))->pc_ ## var) - -/* crude memory allocator for memory allocation early in - * boot +/* + * Crude memory allocator for memory allocation early in boot. */ void *bootmem_alloc(unsigned int size); void bootmem_free(void *ptr, unsigned int size); -#include - -void printk(const char *fmt, ...); - -/* some function prototypes */ -void trap_init(void); - -#ifndef XENHVM - /* * STI/CLI equivalents. These basically set and clear the virtual * event_enable flag in the shared_info structure. Note that when * the enable bit is set, there may be pending events to be handled. * We may therefore call into do_hypervisor_callback() directly. */ - #define __cli() \ do { \ vcpu_info_t *_vcpu; \ _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ _vcpu->evtchn_upcall_mask = 1; \ barrier(); \ } while (0) #define __sti() \ do { \ vcpu_info_t *_vcpu; \ barrier(); \ _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ _vcpu->evtchn_upcall_mask = 0; \ barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + if (__predict_false(_vcpu->evtchn_upcall_pending)) \ force_evtchn_callback(); \ } while (0) #define __restore_flags(x) \ do { \ vcpu_info_t *_vcpu; \ barrier(); \ _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + if (__predict_false(_vcpu->evtchn_upcall_pending)) \ force_evtchn_callback(); \ } \ } while (0) /* * Add critical_{enter, exit}? * */ #define __save_and_cli(x) \ do { \ vcpu_info_t *_vcpu; \ _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \ (x) = _vcpu->evtchn_upcall_mask; \ _vcpu->evtchn_upcall_mask = 1; \ barrier(); \ } while (0) #define cli() __cli() #define sti() __sti() #define save_flags(x) __save_flags(x) #define restore_flags(x) __restore_flags(x) #define save_and_cli(x) __save_and_cli(x) #define local_irq_save(x) __save_and_cli(x) #define local_irq_restore(x) __restore_flags(x) #define local_irq_disable() __cli() #define local_irq_enable() __sti() #define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));} #define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); } #define spin_lock_irqsave mtx_lock_irqsave #define spin_unlock_irqrestore mtx_unlock_irqrestore -#endif - -#ifndef xen_mb -#define xen_mb() mb() -#endif -#ifndef xen_rmb -#define xen_rmb() rmb() -#endif -#ifndef xen_wmb -#define xen_wmb() wmb() -#endif -#ifdef SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - +#endif /* !XENHVM */ /* This is a barrier for the compiler only, NOT the processor! */ #define barrier() __asm__ __volatile__("": : :"memory") #define LOCK_PREFIX "" #define LOCK "" #define ADDR (*(volatile long *) addr) /* * Make sure gcc doesn't try to be clever and move things around * on us. We need to use _exactly_ the address the user gave us, * not some alias that contains the same information. */ typedef struct { volatile int counter; } atomic_t; - - #define xen_xchg(ptr,v) \ ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((volatile struct __xchg_dummy *)(x)) static __inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) { switch (size) { case 1: __asm__ __volatile__("xchgb %b0,%1" :"=q" (x) :"m" (*__xg(ptr)), "0" (x) :"memory"); break; case 2: __asm__ __volatile__("xchgw %w0,%1" :"=r" (x) :"m" (*__xg(ptr)), "0" (x) :"memory"); break; case 4: __asm__ __volatile__("xchgl %0,%1" :"=r" (x) :"m" (*__xg(ptr)), "0" (x) :"memory"); break; } return x; } /** * test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ static __inline int test_and_clear_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( LOCK_PREFIX "btrl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit),"=m" (ADDR) :"Ir" (nr) : "memory"); return oldbit; } static __inline int constant_test_bit(int nr, const volatile void * addr) { return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } static __inline int variable_test_bit(int nr, volatile void * addr) { int oldbit; __asm__ __volatile__( "btl %2,%1\n\tsbbl %0,%0" :"=r" (oldbit) :"m" (ADDR),"Ir" (nr)); return oldbit; } #define test_bit(nr,addr) \ (__builtin_constant_p(nr) ? \ constant_test_bit((nr),(addr)) : \ variable_test_bit((nr),(addr))) /** * set_bit - Atomically set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * This function is atomic and may not be reordered. See __set_bit() * if you do not require the atomic guarantees. * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ static __inline__ void set_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btsl %1,%0" :"=m" (ADDR) :"Ir" (nr)); } /** * clear_bit - Clears a bit in memory * @nr: Bit to clear * @addr: Address to start counting from * * clear_bit() is atomic and may not be reordered. However, it does * not contain a memory barrier, so if it is used for locking purposes, * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ static __inline__ void clear_bit(int nr, volatile void * addr) { __asm__ __volatile__( LOCK_PREFIX "btrl %1,%0" :"=m" (ADDR) :"Ir" (nr)); } /** * atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. Note that the guaranteed * useful range of an atomic_t is only 24 bits. */ static __inline__ void atomic_inc(atomic_t *v) { __asm__ __volatile__( LOCK "incl %0" :"=m" (v->counter) :"m" (v->counter)); } #define rdtscll(val) \ __asm__ __volatile__("rdtsc" : "=A" (val)) - - -/* - * Kernel pointers have redundant information, so we can use a - * scheme where we can return either an error code or a dentry - * pointer with the same return value. - * - * This should be a per-architecture thing, to allow different - * error and pointer decisions. - */ -#define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L) - -static inline void *ERR_PTR(long error) -{ - return (void *) error; -} - -static inline long PTR_ERR(const void *ptr) -{ - return (long) ptr; -} - -static inline long IS_ERR(const void *ptr) -{ - return IS_ERR_VALUE((unsigned long)ptr); -} - #endif /* !__ASSEMBLY__ */ -#endif /* _OS_H_ */ +#endif /* _MACHINE_XEN_XEN_OS_H_ */ diff --git a/sys/i386/include/xen/xenfunc.h b/sys/i386/include/xen/xenfunc.h index 47f04057aa68..f02ee1212e32 100644 --- a/sys/i386/include/xen/xenfunc.h +++ b/sys/i386/include/xen/xenfunc.h @@ -1,78 +1,82 @@ /*- * Copyright (c) 2004, 2005 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _XEN_XENFUNC_H_ #define _XEN_XENFUNC_H_ -#include +#include #include + +#include + #include #include + #include #define BKPT __asm__("int3"); #define XPQ_CALL_DEPTH 5 #define XPQ_CALL_COUNT 2 #define PG_PRIV PG_AVAIL3 typedef struct { unsigned long pt_ref; unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH]; } pteinfo_t; extern pteinfo_t *pteinfo_list; #ifdef XENDEBUG_LOW #define __PRINTK(x) printk x #else #define __PRINTK(x) #endif char *xen_setbootenv(char *cmd_line); int xen_boothowto(char *envp); void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line); #ifdef INVARIANTS #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__) #else #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0) #endif void xen_update_descriptor(union descriptor *, union descriptor *); extern struct mtx balloon_lock; #if 0 #define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags) #define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags) #else #define balloon_lock(__flags) __flags = 1 #define balloon_unlock(__flags) __flags = 0 #endif #endif /* _XEN_XENFUNC_H_ */ diff --git a/sys/i386/include/xen/xenvar.h b/sys/i386/include/xen/xenvar.h index b1a0a4d8191b..2742613249df 100644 --- a/sys/i386/include/xen/xenvar.h +++ b/sys/i386/include/xen/xenvar.h @@ -1,120 +1,121 @@ /*- * Copyright (c) 2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef XENVAR_H_ #define XENVAR_H_ #include #if defined(XEN) #define XBOOTUP 0x1 #define XPMAP 0x2 extern int xendebug_flags; #ifndef NOXENDEBUG -#define XENPRINTF printk +/* Print directly to the Xen console during debugging. */ +#define XENPRINTF xc_printf #else #define XENPRINTF printf #endif extern xen_pfn_t *xen_phys_machine; extern xen_pfn_t *xen_pfn_to_mfn_frame_list[16]; extern xen_pfn_t *xen_pfn_to_mfn_frame_list_list; #if 0 #define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__) #define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__) #define TRACE_DEBUG(argflags, _f, _a...) \ if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a); #else #define TRACE_ENTER #define TRACE_EXIT #define TRACE_DEBUG(argflags, _f, _a...) #endif extern xen_pfn_t *xen_machine_phys; /* Xen starts physical pages after the 4MB ISA hole - * FreeBSD doesn't */ #undef ADD_ISA_HOLE /* XXX */ #ifdef ADD_ISA_HOLE #define ISA_INDEX_OFFSET 1024 #define ISA_PDR_OFFSET 1 #else #define ISA_INDEX_OFFSET 0 #define ISA_PDR_OFFSET 0 #endif #define PFNTOMFN(i) (xen_phys_machine[(i)]) #define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)]) #define VTOP(x) ((((uintptr_t)(x))) - KERNBASE) #define PTOV(x) (((uintptr_t)(x)) + KERNBASE) #define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT) #define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT) #define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT) #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT) #define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT) void xpq_init(void); #define BITS_PER_LONG 32 #define NR_CPUS XEN_LEGACY_MAX_VCPUS #define BITS_TO_LONGS(bits) \ (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] int xen_create_contiguous_region(vm_page_t pages, int npages); void xen_destroy_contiguous_region(void * addr, int npages); #elif defined(XENHVM) #if !defined(PAE) #define vtomach(va) pmap_kextract((vm_offset_t) (va)) #endif #define PFNTOMFN(pa) (pa) #define MFNTOPFN(ma) (ma) #define set_phys_to_machine(pfn, mfn) ((void)0) #define phys_to_machine_mapping_valid(pfn) (TRUE) #endif /* !XEN && !XENHVM */ #endif diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 3b86d8418666..622e5b7f15bb 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -1,1118 +1,1118 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_isa.h" #include "opt_npx.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NPX_DEBUG #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef XEN -#include +#include #include #endif #ifdef DEV_ISA #include #endif #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fp_divide_by_0() __asm __volatile( \ "fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) #ifdef CPU_ENABLE_SSE #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) #endif #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnsave(caddr_t addr); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fp_divide_by_0(void); void frstor(caddr_t addr); #ifdef CPU_ENABLE_SSE void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void stmxcsr(u_int *csr); #endif #endif /* __GNUCLIKE_ASM && !lint */ #ifdef XEN #define start_emulating() (HYPERVISOR_fpu_taskswitch(1)) #define stop_emulating() (HYPERVISOR_fpu_taskswitch(0)) #else #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() #endif #ifdef CPU_ENABLE_SSE #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ else \ (savefpu)->sv_87.sv_env.en_cw = (value); \ } while (0) #else /* CPU_ENABLE_SSE */ #define GET_FPU_CW(thread) \ (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) \ (savefpu)->sv_87.sv_env.en_cw = (value) #endif /* CPU_ENABLE_SSE */ typedef u_char bool_t; #ifdef CPU_ENABLE_SSE static void fpu_clean_state(void); #endif static void fpusave(union savefpu *); static void fpurstor(union savefpu *); static int npx_attach(device_t dev); static void npx_identify(driver_t *driver, device_t parent); static int npx_probe(device_t dev); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); static volatile u_int npx_traps_while_probing; static union savefpu npx_initialstate; alias_for_inthand_t probetrap; __asm(" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(probetrap)) ",@function \n\ " __XSTRING(CNAME(probetrap)) ": \n\ ss \n\ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ fnclex \n\ iret \n\ "); /* * Identify routine. Create a connection point on our parent for probing. */ static void npx_identify(driver, parent) driver_t *driver; device_t parent; { device_t child; child = BUS_ADD_CHILD(parent, 0, "npx", 0); if (child == NULL) panic("npx_identify"); } /* * Probe routine. Set flags to tell npxattach() what to do. Set up an * interrupt handler if npx needs to use interrupts. */ static int npx_probe(device_t dev) { struct gate_descriptor save_idt_npxtrap; u_short control, status; device_set_desc(dev, "math processor"); /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the * common case right away. */ if (cpu_feature & CPUID_FPU) { hw_float = 1; device_quiet(dev); return (0); } save_idt_npxtrap = idt[IDT_MF]; setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Don't trap while we're probing. */ stop_emulating(); /* * Finish resetting the coprocessor, if any. If there is an error * pending, then we may get a bogus IRQ13, but npx_intr() will handle * it OK. Bogus halts have never been observed, but we enabled * IRQ13 and cleared the BUSY# latch early to handle them anyway. */ fninit(); /* * Don't use fwait here because it might hang. * Don't use fnop here because it usually hangs if there is no FPU. */ DELAY(1000); /* wait for any IRQ13 */ #ifdef DIAGNOSTIC if (npx_traps_while_probing != 0) printf("fninit caused %u bogus npx trap(s)\n", npx_traps_while_probing); #endif /* * Check for a status of mostly zero. */ status = 0x5a5a; fnstsw(&status); if ((status & 0xb8ff) == 0) { /* * Good, now check for a proper control word. */ control = 0x5a5a; fnstcw(&control); if ((control & 0x1f3f) == 0x033f) { /* * We have an npx, now divide by 0 to see if exception * 16 works. */ control &= ~(1 << 2); /* enable divide by 0 trap */ fldcw(control); #ifdef FPU_ERROR_BROKEN /* * FPU error signal doesn't work on some CPU * accelerator board. */ hw_float = 1; return (0); #endif npx_traps_while_probing = 0; fp_divide_by_0(); if (npx_traps_while_probing != 0) { /* * Good, exception 16 works. */ hw_float = 1; goto cleanup; } device_printf(dev, "FPU does not use exception 16 for error reporting\n"); goto cleanup; } } /* * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ device_printf(dev, "WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; return (hw_float ? 0 : ENXIO); } /* * Attach routine - announce which it is, and wire into system */ static int npx_attach(device_t dev) { npxinit(); critical_enter(); stop_emulating(); fpusave(&npx_initialstate); start_emulating(); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask) cpu_mxcsr_mask = npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; bzero(npx_initialstate.sv_xmm.sv_fp, sizeof(npx_initialstate.sv_xmm.sv_fp)); bzero(npx_initialstate.sv_xmm.sv_xmm, sizeof(npx_initialstate.sv_xmm.sv_xmm)); /* XXX might need even more zeroing. */ } else #endif bzero(npx_initialstate.sv_87.sv_ac, sizeof(npx_initialstate.sv_87.sv_ac)); critical_exit(); return (0); } /* * Initialize floating point unit. */ void npxinit(void) { static union savefpu dummy; register_t saveintr; u_short control; if (!hw_float) return; /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. npxsave() initializes * the fpu and sets fpcurthread = NULL as important side effects. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); npxsave(&dummy); stop_emulating(); #ifdef CPU_ENABLE_SSE /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ if (cpu_fxsr) fninit(); #endif control = __INITIAL_NPXCW__; fldcw(control); start_emulating(); intr_restore(saveintr); } /* * Free coprocessor (if we have it). */ void npxexit(td) struct thread *td; { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxsave(curpcb->pcb_save); critical_exit(); #ifdef NPX_DEBUG if (hw_float) { u_int masked_exceptions; masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). */ if (masked_exceptions & 0x0d) log(LOG_ERR, "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", td->td_proc->p_pid, td->td_proc->p_comm, masked_exceptions); } #endif } int npxformat() { if (!hw_float) return (_MC_FPFMT_NODEV); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) return (_MC_FPFMT_XMM); #endif return (_MC_FPFMT_387); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int npxtrap_x87(void) { u_short control, status; if (!hw_float) { printf( "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } #ifdef CPU_ENABLE_SSE int npxtrap_sse(void) { u_int mxcsr; if (!hw_float) { printf( "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } #endif /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ static int err_count = 0; int npxdna(void) { if (!hw_float) return (0); critical_enter(); if (PCPU_GET(fpcurthread) == curthread) { printf("npxdna: fpcurthread == curthread %d times\n", ++err_count); stop_emulating(); critical_exit(); return (1); } if (PCPU_GET(fpcurthread) != NULL) { printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, curthread, curthread->td_proc->p_pid); panic("npxdna"); } stop_emulating(); /* * Record new context early in case frstor causes an IRQ13. */ PCPU_SET(fpcurthread, curthread); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpu_clean_state(); #endif if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. */ fpurstor(&npx_initialstate); if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(curpcb->pcb_initial_npxcw); curpcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(curpcb)) curpcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { /* * The following fpurstor() may cause an IRQ13 when the * state being restored has a pending error. The error will * appear to have been triggered by the current (npx) user * instruction even when that instruction is a no-wait * instruction that should not trigger an error (e.g., * fnclex). On at least one 486 system all of the no-wait * instructions are broken the same as frstor, so our * treatment does not amplify the breakage. On at least * one 386/Cyrix 387 system, fnclex works correctly while * frstor and fnsave are broken, so our treatment breaks * fnclex if it is the first FPU instruction after a context * switch. */ fpurstor(curpcb->pcb_save); } critical_exit(); return (1); } /* * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by * no-wait npx instructions. See the Intel application note AP-578 for * details. This doesn't cause any additional complications here. IRQ13's * are inherently asynchronous unless the CPU is frozen to deliver them -- * one that started in userland may be delivered many instructions later, * after the process has entered the kernel. It may even be delivered after * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in * the same way as a very-late-arriving non-spurious IRQ13 from user mode: * it is normally ignored at first because we set fpcurthread to NULL; it is * normally retriggered in npxdna() after return to user mode. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. * * A previous version of npxsave() went to great lengths to excecute fnsave * with interrupts enabled in case executing it froze the CPU. This case * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply * spurious freezes. */ void npxsave(addr) union savefpu *addr; { stop_emulating(); fpusave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } void npxdrop() { struct thread *td; /* * Discard pending exceptions in the !cpu_fxsr case so that unmasked * ones don't cause a panic on the next frstor. */ #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif fnclex(); td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int npxgetregs(struct thread *td) { struct pcb *pcb; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(&npx_initialstate, &pcb->pcb_user_save, sizeof(npx_initialstate)); SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw); npxuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread)) { fpusave(&pcb->pcb_user_save); #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); critical_exit(); return (_MC_FPOWNED_FPU); } else { critical_exit(); return (_MC_FPOWNED_PCB); } } void npxuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXINITDONE; pcb->pcb_flags |= PCB_NPXUSERINITDONE; } void npxsetregs(struct thread *td, union savefpu *addr) { struct pcb *pcb; if (!hw_float) return; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif fnclex(); /* As in npxdrop(). */ if (((uintptr_t)addr & 0xf) != 0) { bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); fpurstor(&pcb->pcb_user_save); } else fpurstor(addr); critical_exit(); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } else { critical_exit(); bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); npxuserinited(td); } } static void fpusave(addr) union savefpu *addr; { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fxsave(addr); else #endif fnsave(addr); } #ifdef CPU_ENABLE_SSE /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } #endif /* CPU_ENABLE_SSE */ static void fpurstor(addr) union savefpu *addr; { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fxrstor(addr); else #endif frstor(addr); } static device_method_t npx_methods[] = { /* Device interface */ DEVMETHOD(device_identify, npx_identify), DEVMETHOD(device_probe, npx_probe), DEVMETHOD(device_attach, npx_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npx_driver = { "npx", npx_methods, 1, /* no softc */ }; static devclass_t npx_devclass; /* * We prefer to attach to the root nexus so that the usual case (exception 16) * doesn't describe the processor as being `on isa'. */ DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id npxisa_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int npxisa_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { device_quiet(dev); } return(result); } static int npxisa_attach(device_t dev) { return (0); } static device_method_t npxisa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, npxisa_probe), DEVMETHOD(device_attach, npxisa_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npxisa_driver = { "npxisa", npxisa_methods, 1, /* no softc */ }; static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); #ifndef PC98 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define XSAVE_AREA_ALIGN 64 #define FPU_KERN_CTX_NPXINITDONE 0x01 struct fpu_kern_ctx { union savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + sizeof(union savefpu); res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static union savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((union savefpu *)p); } int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); ctx->flags = 0; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; return (0); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); pcb->pcb_save = ctx->prev; if (pcb->pcb_save == &pcb->pcb_user_save) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; pcb->pcb_flags &= ~PCB_KERNNPX; } else { if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { struct pcb *pcb; pcb = curpcb; KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save, ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); curpcb->pcb_flags |= PCB_KERNNPX; return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNNPX) != 0); } diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c index a10b5462b7a1..524fa149207f 100644 --- a/sys/i386/xen/clock.c +++ b/sys/i386/xen/clock.c @@ -1,905 +1,917 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz and Don Ahn. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); /* #define DELAYDEBUG */ /* * Routines to handle clock hardware. */ #include "opt_ddb.h" #include "opt_clock.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) #include #endif #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we * can use a simple formula for leap years. */ #define LEAPYEAR(y) (!((y) % 4)) #define DAYSPERYEAR (28+30*4+31*7) #ifndef TIMER_FREQ #define TIMER_FREQ 1193182 #endif #ifdef CYC2NS_SCALE_FACTOR #undef CYC2NS_SCALE_FACTOR #endif #define CYC2NS_SCALE_FACTOR 10 /* Values for timerX_state: */ #define RELEASED 0 #define RELEASE_PENDING 1 #define ACQUIRED 2 #define ACQUIRE_PENDING 3 struct mtx clock_lock; #define RTC_LOCK_INIT \ mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE) #define RTC_LOCK mtx_lock_spin(&clock_lock) #define RTC_UNLOCK mtx_unlock_spin(&clock_lock) int adjkerntz; /* local offset from GMT in seconds */ int clkintr_pending; int pscnt = 1; int psdiv = 1; int wall_cmos_clock; u_int timer_freq = TIMER_FREQ; static int independent_wallclock; static int xen_disable_rtc_set; static u_long cyc2ns_scale; static struct timespec shadow_tv; static uint32_t shadow_tv_version; /* XXX: lazy locking */ static uint64_t processed_system_time; /* stime (ns) at last processing. */ static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; +int ap_cpu_initclocks(int cpu); + SYSCTL_INT(_machdep, OID_AUTO, independent_wallclock, CTLFLAG_RW, &independent_wallclock, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, xen_disable_rtc_set, CTLFLAG_RW, &xen_disable_rtc_set, 1, ""); #define do_div(n,base) ({ \ unsigned long __upper, __low, __high, __mod, __base; \ __base = (base); \ __asm("":"=a" (__low), "=d" (__high):"A" (n)); \ __upper = __high; \ if (__high) { \ __upper = __high % (__base); \ __high = __high / (__base); \ } \ __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \ __asm("":"=A" (n):"a" (__low),"d" (__high)); \ __mod; \ }) #define NS_PER_TICK (1000000000ULL/hz) #define rdtscll(val) \ __asm__ __volatile__("rdtsc" : "=A" (val)) /* convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) * ns = cycles * (10^9 / (cpu_mhz * 10^6)) * ns = cycles * (10^3 / cpu_mhz) * * Then we use scaling math (suggested by george@mvista.com) to get: * ns = cycles * (10^3 * SC / cpu_mhz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static inline void set_cyc2ns_scale(unsigned long cpu_mhz) { cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) { return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; } /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ static inline uint64_t scale_delta(uint64_t delta, uint32_t mul_frac, int shift) { uint64_t product; uint32_t tmp1, tmp2; if ( shift < 0 ) delta >>= -shift; else delta <<= shift; __asm__ ( "mul %5 ; " "mov %4,%%eax ; " "mov %%edx,%4 ; " "mul %5 ; " "xor %5,%5 ; " "add %4,%%eax ; " "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) ); return product; } static uint64_t get_nsec_offset(struct shadow_time_info *shadow) { uint64_t now, delta; rdtscll(now); delta = now - shadow->tsc_timestamp; return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); } static void update_wallclock(void) { shared_info_t *s = HYPERVISOR_shared_info; do { shadow_tv_version = s->wc_version; rmb(); shadow_tv.tv_sec = s->wc_sec; shadow_tv.tv_nsec = s->wc_nsec; rmb(); } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version)); } static void add_uptime_to_wallclock(void) { struct timespec ut; xen_fetch_uptime(&ut); timespecadd(&shadow_tv, &ut); } /* * Reads a consistent set of time-base values from Xen, into a shadow data * area. Must be called with the xtime_lock held for writing. */ static void __get_time_values_from_xen(void) { shared_info_t *s = HYPERVISOR_shared_info; struct vcpu_time_info *src; struct shadow_time_info *dst; uint32_t pre_version, post_version; + struct pcpu *pc; + pc = pcpu_find(smp_processor_id()); src = &s->vcpu_info[smp_processor_id()].time; - dst = &per_cpu(shadow_time, smp_processor_id()); + dst = &pc->pc_shadow_time; spinlock_enter(); do { pre_version = dst->version = src->version; rmb(); dst->tsc_timestamp = src->tsc_timestamp; dst->system_timestamp = src->system_time; dst->tsc_to_nsec_mul = src->tsc_to_system_mul; dst->tsc_shift = src->tsc_shift; rmb(); post_version = src->version; } while ((pre_version & 1) | (pre_version ^ post_version)); dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; spinlock_exit(); } static inline int time_values_up_to_date(int cpu) { struct vcpu_time_info *src; struct shadow_time_info *dst; + struct pcpu *pc; src = &HYPERVISOR_shared_info->vcpu_info[cpu].time; - dst = &per_cpu(shadow_time, cpu); + pc = pcpu_find(cpu); + dst = &pc->pc_shadow_time; rmb(); return (dst->version == src->version); } static unsigned xen_get_timecount(struct timecounter *tc); static struct timecounter xen_timecounter = { xen_get_timecount, /* get_timecount */ 0, /* no poll_pps */ ~0u, /* counter_mask */ 0, /* frequency */ "ixen", /* name */ 0 /* quality */ }; static struct eventtimer xen_et; struct xen_et_state { int mode; #define MODE_STOP 0 #define MODE_PERIODIC 1 #define MODE_ONESHOT 2 int64_t period; int64_t next; }; static DPCPU_DEFINE(struct xen_et_state, et_state); static int clkintr(void *arg) { int64_t now; int cpu = smp_processor_id(); - struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + struct pcpu *pc = pcpu_find(cpu); + struct shadow_time_info *shadow = &pc->pc_shadow_time; struct xen_et_state *state = DPCPU_PTR(et_state); do { __get_time_values_from_xen(); now = shadow->system_timestamp + get_nsec_offset(shadow); } while (!time_values_up_to_date(cpu)); /* Process elapsed ticks since last call. */ processed_system_time = now; if (state->mode == MODE_PERIODIC) { while (now >= state->next) { state->next += state->period; if (xen_et.et_active) xen_et.et_event_cb(&xen_et, xen_et.et_arg); } HYPERVISOR_set_timer_op(state->next + 50000); } else if (state->mode == MODE_ONESHOT) { if (xen_et.et_active) xen_et.et_event_cb(&xen_et, xen_et.et_arg); } /* * Take synchronised time from Xen once a minute if we're not * synchronised ourselves, and we haven't chosen to keep an independent * time base. */ if (shadow_tv_version != HYPERVISOR_shared_info->wc_version && !independent_wallclock) { printf("[XEN] hypervisor wallclock nudged; nudging TOD.\n"); update_wallclock(); add_uptime_to_wallclock(); tc_setclock(&shadow_tv); } /* XXX TODO */ return (FILTER_HANDLED); } static uint32_t getit(void) { struct shadow_time_info *shadow; uint64_t time; uint32_t local_time_version; + struct pcpu *pc; - shadow = &per_cpu(shadow_time, smp_processor_id()); + pc = pcpu_find(smp_processor_id()); + shadow = &pc->pc_shadow_time; do { local_time_version = shadow->version; barrier(); time = shadow->system_timestamp + get_nsec_offset(shadow); if (!time_values_up_to_date(smp_processor_id())) __get_time_values_from_xen(/*cpu */); barrier(); } while (local_time_version != shadow->version); return (time); } /* * XXX: timer needs more SMP work. */ void i8254_init(void) { RTC_LOCK_INIT; } /* * Wait "n" microseconds. * Relies on timer 1 counting down from (timer_freq / hz) * Note: timer had better have been programmed before this is first used! */ void DELAY(int n) { int delta, ticks_left; uint32_t tick, prev_tick; #ifdef DELAYDEBUG int getit_calls = 1; int n1; static int state = 0; if (state == 0) { state = 1; for (n1 = 1; n1 <= 10000000; n1 *= 10) DELAY(n1); state = 2; } if (state == 1) printf("DELAY(%d)...", n); #endif /* * Read the counter first, so that the rest of the setup overhead is * counted. Guess the initial overhead is 20 usec (on most systems it * takes about 1.5 usec for each of the i/o's in getit(). The loop * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The * multiplications and divisions to scale the count take a while). * * However, if ddb is active then use a fake counter since reading * the i8254 counter involves acquiring a lock. ddb must not go * locking for many reasons, but it calls here for at least atkbd * input. */ prev_tick = getit(); n -= 0; /* XXX actually guess no initial overhead */ /* * Calculate (n * (timer_freq / 1e6)) without using floating point * and without any avoidable overflows. */ if (n <= 0) ticks_left = 0; else if (n < 256) /* * Use fixed point to avoid a slow division by 1000000. * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. * 2^15 is the first power of 2 that gives exact results * for n between 0 and 256. */ ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; else /* * Don't bother using fixed point, although gcc-2.7.2 * generates particularly poor code for the long long * division, since even the slow way will complete long * before the delay is up (unless we're interrupted). */ ticks_left = ((u_int)n * (long long)timer_freq + 999999) / 1000000; while (ticks_left > 0) { tick = getit(); #ifdef DELAYDEBUG ++getit_calls; #endif delta = tick - prev_tick; prev_tick = tick; if (delta < 0) { /* * Guard against timer0_max_count being wrong. * This shouldn't happen in normal operation, * but it may happen if set_timer_freq() is * traced. */ /* delta += timer0_max_count; ??? */ if (delta < 0) delta = 0; } ticks_left -= delta; } #ifdef DELAYDEBUG if (state == 1) printf(" %d calls to getit() at %d usec each\n", getit_calls, (n + 5) / getit_calls); #endif } /* * Restore all the timers non-atomically (XXX: should be atomically). * * This function is called from pmtimer_resume() to restore all the timers. * This should not be necessary, but there are broken laptops that do not * restore all the timers on resume. */ void timer_restore(void) { struct xen_et_state *state = DPCPU_PTR(et_state); + struct pcpu *pc; /* Get timebases for new environment. */ __get_time_values_from_xen(); /* Reset our own concept of passage of system time. */ - processed_system_time = per_cpu(shadow_time, 0).system_timestamp; + pc = pcpu_find(0); + processed_system_time = pc->pc_shadow_time.system_timestamp; state->next = processed_system_time; } void startrtclock() { unsigned long long alarm; uint64_t __cpu_khz; uint32_t cpu_khz; struct vcpu_time_info *info; + struct pcpu *pc; + + pc = pcpu_find(0); /* initialize xen values */ __get_time_values_from_xen(); - processed_system_time = per_cpu(shadow_time, 0).system_timestamp; + processed_system_time = pc->pc_shadow_time.system_timestamp; __cpu_khz = 1000000ULL << 32; info = &HYPERVISOR_shared_info->vcpu_info[0].time; (void)do_div(__cpu_khz, info->tsc_to_system_mul); if ( info->tsc_shift < 0 ) cpu_khz = __cpu_khz << -info->tsc_shift; else cpu_khz = __cpu_khz >> info->tsc_shift; printf("Xen reported: %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz = (2^32 * 1 / (clocks/us)) */ set_cyc2ns_scale(cpu_khz/1000); tsc_freq = cpu_khz * 1000; timer_freq = 1000000000LL; xen_timecounter.tc_frequency = timer_freq >> 9; tc_init(&xen_timecounter); rdtscll(alarm); } /* * RTC support routines */ static __inline int readrtc(int port) { return(bcd2bin(rtcin(port))); } #ifdef XEN_PRIVILEGED_GUEST /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ static void domu_inittodr(time_t base) { unsigned long sec; int s, y; struct timespec ts; update_wallclock(); add_uptime_to_wallclock(); RTC_LOCK; if (base) { ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); } sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - shadow_tv.tv_sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ tc_setclock(&shadow_tv); } RTC_UNLOCK; } /* * Write system time back to RTC. */ static void domu_resettodr(void) { unsigned long tm; int s; dom0_op_t op; struct shadow_time_info *shadow; + struct pcpu *pc; - shadow = &per_cpu(shadow_time, smp_processor_id()); + pc = pcpu_find(smp_processor_id()); + shadow = &pc->pc_shadow_time; if (xen_disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); if ((xen_start_info->flags & SIF_INITDOMAIN) && !independent_wallclock) { op.cmd = DOM0_SETTIME; op.u.settime.secs = tm; op.u.settime.nsecs = 0; op.u.settime.system_time = shadow->system_timestamp; HYPERVISOR_dom0_op(&op); update_wallclock(); add_uptime_to_wallclock(); } else if (independent_wallclock) { /* notyet */ ; } } /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { unsigned long sec, days; int year, month; int y, m, s; struct timespec ts; if (!(xen_start_info->flags & SIF_INITDOMAIN)) { domu_inittodr(base); return; } if (base) { s = splclock(); ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); splx(s); } /* Look if we have a RTC present and the time is valid */ if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) goto wrong_time; /* wait for time update to complete */ /* If RTCSA_TUP is zero, we have at least 244us before next update */ s = splhigh(); while (rtcin(RTC_STATUSA) & RTCSA_TUP) { splx(s); s = splhigh(); } days = 0; #ifdef USE_RTC_CENTURY year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; #else year = readrtc(RTC_YEAR) + 1900; if (year < 1970) year += 100; #endif if (year < 1970) { splx(s); goto wrong_time; } month = readrtc(RTC_MONTH); for (m = 1; m < month; m++) days += daysinmonth[m-1]; if ((month > 2) && LEAPYEAR(year)) days ++; days += readrtc(RTC_DAY) - 1; for (y = 1970; y < year; y++) days += DAYSPERYEAR + LEAPYEAR(y); sec = ((( days * 24 + readrtc(RTC_HRS)) * 60 + readrtc(RTC_MIN)) * 60 + readrtc(RTC_SEC)); /* sec now contains the number of seconds, since Jan 1 1970, in the local time zone */ sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); y = time_second - sec; if (y <= -2 || y >= 2) { /* badly off, adjust it */ ts.tv_sec = sec; ts.tv_nsec = 0; tc_setclock(&ts); } splx(s); return; wrong_time: printf("Invalid time in real time clock.\n"); printf("Check and reset the date immediately!\n"); } /* * Write system time back to RTC */ void resettodr() { unsigned long tm; int y, m, s; if (!(xen_start_info->flags & SIF_INITDOMAIN)) { domu_resettodr(); return; } if (xen_disable_rtc_set) return; s = splclock(); tm = time_second; splx(s); /* Disable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); /* Calculate local time to put in RTC */ tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ /* We have now the days since 01-01-1970 in tm */ writertc(RTC_WDAY, (tm + 4) % 7 + 1); /* Write back Weekday */ for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); tm >= m; y++, m = DAYSPERYEAR + LEAPYEAR(y)) tm -= m; /* Now we have the years in y and the day-of-the-year in tm */ writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ #ifdef USE_RTC_CENTURY writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ #endif for (m = 0; ; m++) { int ml; ml = daysinmonth[m]; if (m == 1 && LEAPYEAR(y)) ml++; if (tm < ml) break; tm -= ml; } writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ /* Reenable RTC updates and interrupts. */ writertc(RTC_STATUSB, RTCSB_24HR); rtcin(RTC_INTR); } #endif static int xen_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct xen_et_state *state = DPCPU_PTR(et_state); struct shadow_time_info *shadow; int64_t fperiod; + struct pcpu *pc; __get_time_values_from_xen(); if (period != 0) { state->mode = MODE_PERIODIC; state->period = (1000000000LLU * period) >> 32; } else { state->mode = MODE_ONESHOT; state->period = 0; } if (first != 0) fperiod = (1000000000LLU * first) >> 32; else fperiod = state->period; - shadow = &per_cpu(shadow_time, smp_processor_id()); + pc = pcpu_find(smp_processor_id()); + shadow = &pc->pc_shadow_time; state->next = shadow->system_timestamp + get_nsec_offset(shadow); state->next += fperiod; HYPERVISOR_set_timer_op(state->next + 50000); return (0); } static int xen_et_stop(struct eventtimer *et) { struct xen_et_state *state = DPCPU_PTR(et_state); state->mode = MODE_STOP; HYPERVISOR_set_timer_op(0); return (0); } /* * Start clocks running. */ void cpu_initclocks(void) { - unsigned int time_irq; + xen_intr_handle_t time_irq; int error; HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL); - error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "cpu0:timer", + error = xen_intr_bind_virq(root_bus, VIRQ_TIMER, 0, clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq); if (error) panic("failed to register clock interrupt\n"); /* should fast clock be enabled ? */ bzero(&xen_et, sizeof(xen_et)); xen_et.et_name = "ixen"; xen_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; xen_et.et_quality = 600; xen_et.et_frequency = 1000000000; xen_et.et_min_period = 0x00400000LL; xen_et.et_max_period = (0xfffffffeLLU << 32) / xen_et.et_frequency; xen_et.et_start = xen_et_start; xen_et.et_stop = xen_et_stop; xen_et.et_priv = NULL; et_register(&xen_et); cpu_initclocks_bsp(); } int ap_cpu_initclocks(int cpu) { - char buf[MAXCOMLEN + 1]; - unsigned int time_irq; + xen_intr_handle_t time_irq; int error; HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL); - snprintf(buf, sizeof(buf), "cpu%d:timer", cpu); - error = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, buf, + error = xen_intr_bind_virq(root_bus, VIRQ_TIMER, cpu, clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq); if (error) panic("failed to register clock interrupt\n"); return (0); } static uint32_t xen_get_timecount(struct timecounter *tc) { uint64_t clk; struct shadow_time_info *shadow; - shadow = &per_cpu(shadow_time, smp_processor_id()); + struct pcpu *pc; + + pc = pcpu_find(smp_processor_id()); + shadow = &pc->pc_shadow_time; __get_time_values_from_xen(); clk = shadow->system_timestamp + get_nsec_offset(shadow); return (uint32_t)(clk >> 9); } /* Return system time offset by ticks */ uint64_t get_system_time(int ticks) { return processed_system_time + (ticks * NS_PER_TICK); } -void -idle_block(void) -{ - - HYPERVISOR_sched_op(SCHEDOP_block, 0); -} - int timer_spkr_acquire(void) { return (0); } int timer_spkr_release(void) { return (0); } void timer_spkr_setfreq(int freq) { } diff --git a/sys/i386/xen/exception.s b/sys/i386/xen/exception.s index e965ffd026d3..95f1c0e6703f 100644 --- a/sys/i386/xen/exception.s +++ b/sys/i386/xen/exception.s @@ -1,494 +1,494 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_apic.h" #include "opt_npx.h" #include #include #include #include "assym.s" #define SEL_RPL_MASK 0x0002 #define __HYPERVISOR_iret 23 /* Offsets into shared_info_t. */ #define evtchn_upcall_pending /* 0 */ #define evtchn_upcall_mask 1 #define sizeof_vcpu_shift 6 #ifdef SMP #define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \ shl $sizeof_vcpu_shift,reg ; \ addl HYPERVISOR_shared_info,reg #else #define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg #endif #define __DISABLE_INTERRUPTS(reg) movb $1,evtchn_upcall_mask(reg) #define __ENABLE_INTERRUPTS(reg) movb $0,evtchn_upcall_mask(reg) #define DISABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \ __DISABLE_INTERRUPTS(reg) #define ENABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \ __ENABLE_INTERRUPTS(reg) #define __TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) #define POPA \ popl %edi; \ popl %esi; \ popl %ebp; \ popl %ebx; \ popl %ebx; \ popl %edx; \ popl %ecx; \ popl %eax; .text /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines. * * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on * the stack that mostly looks like an interrupt, but does not disable * interrupts. A few of the traps we are use are interrupt gates, * SDT_SYS386IGT, which are nearly the same thing except interrupts are * disabled on entry. * * The cpu will push a certain amount of state onto the kernel stack for * the current process. The amount of state depends on the type of trap * and whether the trap crossed rings or not. See i386/include/frame.h. * At the very least the current EFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy * error code for those traps where the cpu doesn't in order to maintain * a consistent frame. We also push a contrived 'trap number'. * * The cpu does not push the general registers, we must do that, and we * must restore them prior to calling 'iret'. The cpu adjusts the %cs and * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we * must load them with appropriate values for supervisor mode operation. */ MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) #define TRAP(a) pushl $(a) ; jmp alltraps IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(dbg) pushl $0; TRAP(T_TRCTRAP) IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) pushl $0; TRAP(T_BPTFLT) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) IDTVEC(missing) TRAP(T_SEGNPFLT) IDTVEC(stk) TRAP(T_STKFLT) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(mchk) pushl $0; TRAP(T_MCHK) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) pushl $0; TRAP(T_ARITHTRAP) IDTVEC(align) TRAP(T_ALIGNFLT) IDTVEC(xmm) pushl $0; TRAP(T_XMMFLT) IDTVEC(hypervisor_callback) pushl $0; pushl $0; pushal pushl %ds pushl %es pushl %fs upcall_with_regs_pushed: SET_KERNEL_SREGS FAKE_MCOUNT(TF_EIP(%esp)) call_evtchn_upcall: movl TF_EIP(%esp),%eax cmpl $scrit,%eax jb 10f cmpl $ecrit,%eax jb critical_region_fixup 10: pushl %esp - call evtchn_do_upcall + call xen_intr_handle_upcall addl $4,%esp /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti hypervisor_callback_pending: DISABLE_INTERRUPTS(%esi) /* cli */ jmp 10b /* * alltraps entry point. Interrupts are enabled if this was a trap * gate (TGT), else disabled if this was an interrupt gate (IGT). * Note that int0x80_syscall is a trap gate. Only page faults * use an interrupt gate. */ SUPERALIGN_TEXT .globl alltraps .type alltraps,@function alltraps: pushal pushl %ds pushl %es pushl %fs alltraps_with_regs_pushed: SET_KERNEL_SREGS FAKE_MCOUNT(TF_EIP(%esp)) calltrap: push %esp call trap add $4, %esp /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * SYSCALL CALL GATE (old entry point for a.out binaries) * * The intersegment call has been set up to specify one dummy parameter. * * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly * pushed into (what will be) tf_err and then copied later into the * final spot. It has to be done this way because esp can't be just * temporarily altered for the pushfl - an interrupt might come in * and clobber the saved cs/eip. */ SUPERALIGN_TEXT IDTVEC(lcall_syscall) pushfl /* save eflags */ popl 8(%esp) /* shuffle into tf_eflags */ pushl $7 /* sizeof "lcall 7,0" */ subl $4,%esp /* skip over tf_trapno */ pushal pushl %ds pushl %es pushl %fs SET_KERNEL_SREGS FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call syscall add $4, %esp MEXITCOUNT jmp doreti /* * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) * * Even though the name says 'int0x80', this is actually a TGT (trap gate) * rather then an IGT (interrupt gate). Thus interrupts are enabled on * entry just as they are for a normal syscall. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) pushl $2 /* sizeof "int 0x80" */ pushl $0xBEEF /* for debug */ pushal pushl %ds pushl %es pushl %fs SET_KERNEL_SREGS FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call syscall add $4, %esp MEXITCOUNT jmp doreti ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ call fork_exit addl $12,%esp /* cut from syscall */ /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * To efficiently implement classification of trap and interrupt handlers * for profiling, there must be only trap handlers between the labels btrap * and bintr, and only interrupt handlers between the labels bintr and * eintr. This is implemented (partly) by including files that contain * some of the handlers. Before including the files, set up a normal asm * environment so that the included files doen't need to know that they are * included. */ .data .p2align 4 .text SUPERALIGN_TEXT MCOUNT_LABEL(bintr) #ifdef DEV_APIC .data .p2align 4 .text SUPERALIGN_TEXT #include #endif .data .p2align 4 .text SUPERALIGN_TEXT #include .text MCOUNT_LABEL(eintr) /* * void doreti(struct trapframe) * * Handle return from interrupts, traps and syscalls. */ .text SUPERALIGN_TEXT .type doreti,@function doreti: FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ doreti_next: #ifdef notyet /* * Check if ASTs can be handled now. PSL_VM must be checked first * since segment registers only have an RPL in non-VM86 mode. */ testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */ jz doreti_notvm86 movl PCPU(CURPCB),%ecx testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */ jz doreti_ast /* can handle ASTS now if not */ jmp doreti_exit doreti_notvm86: #endif testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: /* * Check for ASTs atomically with returning. Disabling CPU * interrupts provides sufficient locking even in the SMP case, * since we will be informed of any new ASTs by an IPI. */ DISABLE_INTERRUPTS(%esi) /* cli */ movl PCPU(CURTHREAD),%eax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) je doreti_exit ENABLE_INTERRUPTS(%esi) /* sti */ pushl %esp /* pass a pointer to the trapframe */ call ast add $4,%esp jmp doreti_ast /* * doreti_exit: pop registers, iret. * * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment * registers. The fault is handled in trap.c. */ doreti_exit: ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti) .globl scrit scrit: __TEST_PENDING(%esi) jnz hypervisor_callback_pending /* More to go */ MEXITCOUNT .globl doreti_popl_fs doreti_popl_fs: popl %fs .globl doreti_popl_es doreti_popl_es: popl %es .globl doreti_popl_ds doreti_popl_ds: popl %ds /* * This is important: as nothing is atomic over here (we can get * interrupted any time), we use the critical_region_fixup() in * order to figure out where out stack is. Therefore, do NOT use * 'popal' here without fixing up the table! */ POPA addl $8,%esp .globl doreti_iret doreti_iret: jmp hypercall_page + (__HYPERVISOR_iret * 32) .globl ecrit ecrit: /* * doreti_iret_fault and friends. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (i386/i386/trap.c) catches this specific * case, sends the process a signal and continues in the * corresponding place in the code below. */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: subl $8,%esp pushal pushl %ds .globl doreti_popl_ds_fault doreti_popl_ds_fault: pushl %es .globl doreti_popl_es_fault doreti_popl_es_fault: pushl %fs .globl doreti_popl_fs_fault doreti_popl_fs_fault: movl $0,TF_ERR(%esp) /* XXX should be the error code */ movl $T_PROTFLT,TF_TRAPNO(%esp) jmp alltraps_with_regs_pushed /* # [How we do the fixup]. We want to merge the current stack frame with the # just-interrupted frame. How we do this depends on where in the critical # region the interrupted handler was executing, and so how many saved # registers are in each frame. We do this quickly using the lookup table # 'critical_fixup_table'. For each byte offset in the critical region, it # provides the number of bytes which have already been popped from the # interrupted stack frame. */ .globl critical_region_fixup critical_region_fixup: addl $critical_fixup_table-scrit,%eax movzbl (%eax),%eax # %eax contains num bytes popped movl %esp,%esi add %eax,%esi # %esi points at end of src region movl %esp,%edi add $0x40,%edi # %edi points at end of dst region movl %eax,%ecx shr $2,%ecx # convert bytes to words je 16f # skip loop if nothing to copy 15: subl $4,%esi # pre-decrementing copy loop subl $4,%edi movl (%esi),%eax movl %eax,(%edi) loop 15b 16: movl %edi,%esp # final %edi is top of merged stack jmp hypervisor_callback_pending critical_fixup_table: .byte 0x0,0x0,0x0 #testb $0x1,(%esi) .byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea .byte 0x0,0x0 #pop %fs .byte 0x04 #pop %es .byte 0x08 #pop %ds .byte 0x0c #pop %edi .byte 0x10 #pop %esi .byte 0x14 #pop %ebp .byte 0x18 #pop %ebx .byte 0x1c #pop %ebx .byte 0x20 #pop %edx .byte 0x24 #pop %ecx .byte 0x28 #pop %eax .byte 0x2c,0x2c,0x2c #add $0x8,%esp #if 0 .byte 0x34 #iret #endif .byte 0x34,0x34,0x34,0x34,0x34 #HYPERVISOR_iret /* # Hypervisor uses this for application faults while it executes.*/ ENTRY(failsafe_callback) pushal call xen_failsafe_handler /*# call install_safe_pf_handler */ movl 28(%esp),%ebx 1: movl %ebx,%ds movl 32(%esp),%ebx 2: movl %ebx,%es movl 36(%esp),%ebx 3: movl %ebx,%fs movl 40(%esp),%ebx 4: movl %ebx,%gs /*# call install_normal_pf_handler */ popal addl $12,%esp iret diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 05531cbdca02..407731a7eada 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -1,1250 +1,1269 @@ /*- * Copyright (c) 1996, by Steve Passe * Copyright (c) 2008, by Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_mp_watchdog.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" #if !defined(lint) #if !defined(SMP) #error How did you get here? #endif #ifndef DEV_APIC #error The apic device is required for SMP, add "device apic" to your config file. #endif #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) #error SMP not supported with CPU_DISABLE_CMPXCHG #endif #endif /* not lint */ #include #include #include #include /* cngetc() */ #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ extern struct pcpu __pcpu[]; static int bootAP; static union descriptor *bootAPgdt; -static char resched_name[NR_CPUS][15]; -static char callfunc_name[NR_CPUS][15]; - /* Free these after use */ void *bootstacks[MAXCPU]; struct pcb stoppcbs[MAXCPU]; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; typedef void call_data_func_t(uintptr_t , uintptr_t); static u_int logical_cpus; static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready = 0; /* * Store data from cpu_add() until later in the boot when we actually setup * the APs. */ struct cpu_info { int cpu_present:1; int cpu_bsp:1; int cpu_disabled:1; } static cpu_info[MAX_APIC_ID + 1]; int cpu_apic_ids[MAXCPU]; int apic_cpuids[MAX_APIC_ID + 1]; /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; static int cpu_logical; static int cpu_cores; static void assign_cpu_ids(void); static void set_interrupt_apic_ids(void); int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; static cpuset_t hyperthreading_cpus_mask; extern void Xhypervisor_callback(void); extern void failsafe_callback(void); extern void pmap_lazyfix_action(void); +extern int ap_cpu_initclocks(int cpu); + +DPCPU_DEFINE(xen_intr_handle_t, ipi_port[NR_IPIS]); +DPCPU_DEFINE(struct vcpu_info *, vcpu_info); struct cpu_group * cpu_topo(void) { if (cpu_cores == 0) cpu_cores = 1; if (cpu_logical == 0) cpu_logical = 1; if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { printf("WARNING: Non-uniform processors.\n"); printf("WARNING: Using suboptimal topology.\n"); return (smp_topo_none()); } /* * No multi-core or hyper-threaded. */ if (cpu_logical * cpu_cores == 1) return (smp_topo_none()); /* * Only HTT no multi-core. */ if (cpu_logical > 1 && cpu_cores == 1) return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); /* * Only multi-core no HTT. */ if (cpu_cores > 1 && cpu_logical == 1) return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); /* * Both HTT and multi-core. */ return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); } /* * Calculate usable address in base memory for AP trampoline code. */ u_int mp_bootaddress(u_int basemem) { return (basemem); } void cpu_add(u_int apic_id, char boot_cpu) { if (apic_id > MAX_APIC_ID) { panic("SMP: APIC ID %d too high", apic_id); return; } KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", apic_id)); cpu_info[apic_id].cpu_present = 1; if (boot_cpu) { KASSERT(boot_cpu_id == -1, ("CPU %d claims to be BSP, but CPU %d already is", apic_id, boot_cpu_id)); boot_cpu_id = apic_id; cpu_info[apic_id].cpu_bsp = 1; } if (mp_ncpus < MAXCPU) mp_ncpus++; if (bootverbose) printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : "AP"); } void cpu_mp_setmaxid(void) { mp_maxid = MAXCPU - 1; } int cpu_mp_probe(void) { /* * Always record BSP in CPU map so that the mbuf init code works * correctly. */ CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup * the variables to represent a system with a single CPU * with an id of 0. */ mp_ncpus = 1; return (0); } /* At least one CPU was found. */ if (mp_ncpus == 1) { /* * One CPU was found, so this must be a UP system with * an I/O APIC. */ return (0); } /* At least two CPUs were found. */ return (1); } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); cpu_apic_ids[0] = boot_cpu_id; apic_cpuids[boot_cpu_id] = 0; assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); /* Setup the initial logical CPUs info. */ logical_cpus = 0; CPU_ZERO(&logical_cpus_mask); if (cpu_feature & CPUID_HTT) logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; set_interrupt_apic_ids(); } static void iv_rendezvous(uintptr_t a, uintptr_t b) { smp_rendezvous_action(); } static void iv_invltlb(uintptr_t a, uintptr_t b) { xen_tlb_flush(); } static void iv_invlpg(uintptr_t a, uintptr_t b) { xen_invlpg(a); } static void iv_invlrng(uintptr_t a, uintptr_t b) { vm_offset_t start = (vm_offset_t)a; vm_offset_t end = (vm_offset_t)b; while (start < end) { xen_invlpg(start); start += PAGE_SIZE; } } static void iv_invlcache(uintptr_t a, uintptr_t b) { wbinvd(); atomic_add_int(&smp_tlb_wait, 1); } static void iv_lazypmap(uintptr_t a, uintptr_t b) { pmap_lazyfix_action(); atomic_add_int(&smp_tlb_wait, 1); } /* * These start from "IPI offset" APIC_IPI_INTS */ static call_data_func_t *ipi_vectors[6] = { iv_rendezvous, iv_invltlb, iv_invlpg, iv_invlrng, iv_invlcache, iv_lazypmap, }; /* * Reschedule call back. Nothing to do, * all the work is done automatically when * we return from the interrupt. */ static int smp_reschedule_interrupt(void *unused) { int cpu = PCPU_GET(cpuid); u_int ipi_bitmap; ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); if (ipi_bitmap & (1 << IPI_PREEMPT)) { #ifdef COUNT_IPIS (*ipi_preempt_counts[cpu])++; #endif sched_preempt(curthread); } if (ipi_bitmap & (1 << IPI_AST)) { #ifdef COUNT_IPIS (*ipi_ast_counts[cpu])++; #endif /* Nothing to do for AST */ } return (FILTER_HANDLED); } struct _call_data { uint16_t func_id; uint16_t wait; uintptr_t arg1; uintptr_t arg2; atomic_t started; atomic_t finished; }; static struct _call_data *call_data; static int smp_call_function_interrupt(void *unused) { call_data_func_t *func; uintptr_t arg1 = call_data->arg1; uintptr_t arg2 = call_data->arg2; int wait = call_data->wait; atomic_t *started = &call_data->started; atomic_t *finished = &call_data->finished; /* We only handle function IPIs, not bitmap IPIs */ if (call_data->func_id < APIC_IPI_INTS || call_data->func_id > IPI_BITMAP_VECTOR) panic("invalid function id %u", call_data->func_id); func = ipi_vectors[call_data->func_id - APIC_IPI_INTS]; /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function */ mb(); atomic_inc(started); /* * At this point the info structure may be out of scope unless wait==1 */ (*func)(arg1, arg2); if (wait) { mb(); atomic_inc(finished); } atomic_add_int(&smp_tlb_wait, 1); return (FILTER_HANDLED); } /* * Print various information about the SMP system hardware and setup. */ void cpu_mp_announce(void) { int i, x; /* List CPUs */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) continue; if (cpu_info[x].cpu_disabled) printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); else { KASSERT(i < mp_ncpus, ("mp_ncpus and actual cpus are out of whack")); printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); } } } static int -xen_smp_intr_init(unsigned int cpu) +xen_smp_cpu_init(unsigned int cpu) { int rc; - unsigned int irq; - - per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; + xen_intr_handle_t irq_handle; - sprintf(resched_name[cpu], "resched%u", cpu); - rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, - cpu, - resched_name[cpu], - smp_reschedule_interrupt, - INTR_TYPE_TTY, &irq); + DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL); + DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL); - printf("[XEN] IPI cpu=%d irq=%d vector=RESCHEDULE_VECTOR (%d)\n", - cpu, irq, RESCHEDULE_VECTOR); - - per_cpu(resched_irq, cpu) = irq; - - sprintf(callfunc_name[cpu], "callfunc%u", cpu); - rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, - cpu, - callfunc_name[cpu], - smp_call_function_interrupt, - INTR_TYPE_TTY, &irq); + /* + * The PCPU variable pc_device is not initialized on i386 PV, + * so we have to use the root_bus device in order to setup + * the IPIs. + */ + rc = xen_intr_bind_ipi(root_bus, RESCHEDULE_VECTOR, + cpu, smp_reschedule_interrupt, INTR_TYPE_TTY, &irq_handle); if (rc < 0) goto fail; - per_cpu(callfunc_irq, cpu) = irq; + xen_intr_describe(irq_handle, "resched%u", cpu); + DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], irq_handle); - printf("[XEN] IPI cpu=%d irq=%d vector=CALL_FUNCTION_VECTOR (%d)\n", - cpu, irq, CALL_FUNCTION_VECTOR); + printf("[XEN] IPI cpu=%d port=%d vector=RESCHEDULE_VECTOR (%d)\n", + cpu, xen_intr_port(irq_handle), RESCHEDULE_VECTOR); + + rc = xen_intr_bind_ipi(root_bus, CALL_FUNCTION_VECTOR, + cpu, smp_call_function_interrupt, INTR_TYPE_TTY, &irq_handle); + if (rc < 0) + goto fail; + xen_intr_describe(irq_handle, "callfunc%u", cpu); + DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], irq_handle); + + printf("[XEN] IPI cpu=%d port=%d vector=CALL_FUNCTION_VECTOR (%d)\n", + cpu, xen_intr_port(irq_handle), CALL_FUNCTION_VECTOR); - if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0)) goto fail; return 0; fail: - if (per_cpu(resched_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(resched_irq, cpu)); - if (per_cpu(callfunc_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(callfunc_irq, cpu)); + xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[RESCHEDULE_VECTOR])); + DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL); + xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[CALL_FUNCTION_VECTOR])); + DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL); return rc; } static void xen_smp_intr_init_cpus(void *unused) { int i; for (i = 0; i < mp_ncpus; i++) - xen_smp_intr_init(i); + xen_smp_cpu_init(i); +} + +static void +xen_smp_intr_setup_cpus(void *unused) +{ + int i; + + for (i = 0; i < mp_ncpus; i++) + DPCPU_ID_SET(i, vcpu_info, + &HYPERVISOR_shared_info->vcpu_info[i]); } #define MTOPSIZE (1<<(14 + PAGE_SHIFT)) /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { vm_offset_t addr; u_int cpuid; int gsel_tss; /* bootAP is set in start_ap() to our ID. */ PCPU_SET(currentldt, _default_ldt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); #if 0 gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; #endif PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); #if 0 PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); #endif PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ /* * signal our startup to the BSP. */ mp_naps++; /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); /* BSP may have changed PTD while we were waiting */ invltlb(); for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) invlpg(addr); /* set up FPU state on the AP */ npxinit(); #if 0 /* set up SSE registers */ enable_sse(); #endif #if 0 && defined(PAE) /* Enable the PTE no-execute bit. */ if ((amd_feature & AMDID_NX) != 0) { uint64_t msr; msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); } #endif #if 0 /* A quick check from sanity claus */ if (PCPU_GET(apic_id) != lapic_id()) { printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); panic("cpuid mismatch! boom!!"); } #endif /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); mtx_lock_spin(&ap_boot_mtx); #if 0 /* Init local apic for irq's */ lapic_setup(1); #endif smp_cpus++; cpuid = PCPU_GET(cpuid); CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); printf("SMP: AP CPU #%d Launched!\n", cpuid); /* Determine if we are a logical CPU. */ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) CPU_SET(cpuid, &logical_cpus_mask); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) CPU_SET(cpuid, &hyperthreading_cpus_mask); #if 0 if (bootverbose) lapic_dump("AP"); #endif if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); smp_active = 1; /* historic */ } mtx_unlock_spin(&ap_boot_mtx); /* wait until all the AP's are up */ while (smp_started == 0) ia32_pause(); PCPU_SET(curthread, PCPU_GET(idlethread)); /* Start per-CPU event timers. */ cpu_initclocks_ap(); /* enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ } /******************************************************************* * local functions and data */ /* * We tell the I/O APIC code about all the CPUs we want to receive * interrupts. If we don't want certain CPUs to receive IRQs we * can simply not tell the I/O APIC code about them in this function. * We also do not tell it about the BSP since it tells itself about * the BSP internally to work with UP kernels and on UP machines. */ static void set_interrupt_apic_ids(void) { u_int i, apic_id; for (i = 0; i < MAXCPU; i++) { apic_id = cpu_apic_ids[i]; if (apic_id == -1) continue; if (cpu_info[apic_id].cpu_bsp) continue; if (cpu_info[apic_id].cpu_disabled) continue; /* Don't let hyperthreads service interrupts. */ if (hyperthreading_cpus > 1 && apic_id % hyperthreading_cpus != 0) continue; intr_add_cpu(i); } } /* * Assign logical CPU IDs to local APICs. */ static void assign_cpu_ids(void) { u_int i; /* Check for explicitly disabled CPUs. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) continue; /* Don't use this CPU if it has been disabled by a tunable. */ if (resource_disabled("lapic", i)) { cpu_info[i].cpu_disabled = 1; continue; } } /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 has already been assigned to the BSP, * so we only have to assign IDs for APs. */ mp_ncpus = 1; for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || cpu_info[i].cpu_disabled) continue; if (mp_ncpus < MAXCPU) { cpu_apic_ids[mp_ncpus] = i; apic_cpuids[i] = mp_ncpus; mp_ncpus++; } else cpu_info[i].cpu_disabled = 1; } KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } /* * start each AP in our list */ /* Lowest 1MB is already mapped: don't touch*/ #define TMPMAP_START 1 int start_all_aps(void) { int x,apic_id, cpu; struct pcpu *pc; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; bootAP = cpu; bootAPgdt = gdt + (512*cpu); /* Get per-cpu data */ pc = &__pcpu[bootAP]; pcpu_init(pc, bootAP, sizeof(struct pcpu)); dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO), bootAP); pc->pc_apic_id = cpu_apic_ids[bootAP]; pc->pc_prvspace = pc; pc->pc_curthread = 0; gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW); bzero(bootAPgdt, PAGE_SIZE); for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); #ifdef notyet if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); #ifdef CONFIG_ACPI if (acpiid != 0xff) x86_acpiid_to_apicid[acpiid] = apicid; #endif } #endif /* attempt to start the Application Processor */ if (!start_ap(cpu)) { printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); /* better panic as the AP may be running loose */ printf("panic y/n? [y] "); if (cngetc() != 'n') panic("bye-bye"); } CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); /* number of APs actually started */ return mp_naps; } extern uint8_t *pcpu_boot_stack; extern trap_info_t trap_table[]; static void smp_trap_init(trap_info_t *trap_ctxt) { const trap_info_t *t = trap_table; for (t = trap_table; t->address; t++) { trap_ctxt[t->vector].flags = t->flags; trap_ctxt[t->vector].cs = t->cs; trap_ctxt[t->vector].address = t->address; } } extern struct rwlock pvh_global_lock; extern int nkpt; static void cpu_initialize_context(unsigned int cpu) { /* vcpu_guest_context_t is too large to allocate on the stack. * Hence we allocate statically and protect it with a lock */ vm_page_t m[NPGPTD + 2]; static vcpu_guest_context_t ctxt; vm_offset_t boot_stack; vm_offset_t newPTD; vm_paddr_t ma[NPGPTD]; int i; /* * Page 0,[0-3] PTD * Page 1, [4] boot stack * Page [5] PDPT * */ for (i = 0; i < NPGPTD + 2; i++) { m[i] = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); pmap_zero_page(m[i]); } boot_stack = kva_alloc(PAGE_SIZE); newPTD = kva_alloc(NPGPTD * PAGE_SIZE); ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; #ifdef PAE pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); for (i = 0; i < NPGPTD; i++) { ((vm_paddr_t *)boot_stack)[i] = ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; } #endif /* * Copy cpu0 IdlePTD to new IdlePTD - copying only * kernel mappings */ pmap_qenter(newPTD, m, 4); memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), nkpt*sizeof(vm_paddr_t)); pmap_qremove(newPTD, 4); kva_free(newPTD, 4 * PAGE_SIZE); /* * map actual idle stack to boot_stack */ pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); rw_wlock(&pvh_global_lock); for (i = 0; i < 4; i++) { int pdir = (PTDPTDI + i) / NPDEPG; int curoffset = (PTDPTDI + i) % NPDEPG; xen_queue_pt_update((vm_paddr_t) ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), ma[i]); } PT_UPDATES_FLUSH(); rw_wunlock(&pvh_global_lock); memset(&ctxt, 0, sizeof(ctxt)); ctxt.flags = VGCF_IN_KERNEL; ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.eip = (unsigned long)init_secondary; ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); smp_trap_init(ctxt.trap_ctxt); ctxt.ldt_ents = 0; ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); ctxt.gdt_ents = 512; #ifdef __i386__ ctxt.user_regs.esp = boot_stack + PAGE_SIZE; ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.kernel_sp = boot_stack + PAGE_SIZE; ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); #else /* __x86_64__ */ ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.kernel_sp = idle->thread.rsp0; ctxt.event_callback_eip = (unsigned long)hypervisor_callback; ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.syscall_callback_eip = (unsigned long)system_call; ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); #endif printf("gdtpfn=%lx pdptpfn=%lx\n", ctxt.gdt_frames[0], ctxt.ctrlreg[3] >> PAGE_SHIFT); PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); DELAY(3000); PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ int cpus; static int start_ap(int apic_id) { int ms; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; cpu_initialize_context(apic_id); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } +static void +ipi_pcpu(int cpu, u_int ipi) +{ + KASSERT((ipi <= NR_IPIS), ("invalid IPI")); + xen_intr_signal(DPCPU_ID_GET(cpu, ipi_port[ipi])); +} + /* * send an IPI to a specific CPU. */ static void ipi_send_cpu(int cpu, u_int ipi) { u_int bitmap, old_pending, new_pending; if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; do { old_pending = cpu_ipi_pending[cpu]; new_pending = old_pending | bitmap; } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], old_pending, new_pending)); if (!old_pending) ipi_pcpu(cpu, RESCHEDULE_VECTOR); } else { KASSERT(call_data != NULL, ("call_data not set")); ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); } } /* * Flush the TLB on all other CPU's */ static void smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) { u_int ncpu; struct _call_data data; ncpu = mp_ncpus - 1; /* does not shootdown self */ if (ncpu < 1) return; /* no other cpus */ if (!(read_eflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); KASSERT(call_data == NULL, ("call_data isn't null?!")); call_data = &data; call_data->func_id = vector; call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); ipi_all_but_self(vector); while (smp_tlb_wait < ncpu) ia32_pause(); call_data = NULL; mtx_unlock_spin(&smp_ipi_mtx); } static void smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { int cpu, ncpu, othercpus; struct _call_data data; othercpus = mp_ncpus - 1; if (CPU_ISFULLSET(&mask)) { if (othercpus < 1) return; } else { CPU_CLR(PCPU_GET(cpuid), &mask); if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); KASSERT(call_data == NULL, ("call_data isn't null?!")); call_data = &data; call_data->func_id = vector; call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); if (CPU_ISFULLSET(&mask)) { ncpu = othercpus; ipi_all_but_self(vector); } else { ncpu = 0; while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, vector); ipi_send_cpu(cpu, vector); ncpu++; } } while (smp_tlb_wait < ncpu) ia32_pause(); call_data = NULL; mtx_unlock_spin(&smp_ipi_mtx); } void smp_cache_flush(void) { if (smp_started) smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); } void smp_invltlb(void) { if (smp_started) { smp_tlb_shootdown(IPI_INVLTLB, 0, 0); } } void smp_invlpg(vm_offset_t addr) { if (smp_started) { smp_tlb_shootdown(IPI_INVLPG, addr, 0); } } void smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); } } void smp_masked_invltlb(cpuset_t mask) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); } } void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); } } void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); } } /* * send an IPI to a set of cpus. */ void ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } /* * send an IPI to a specific CPU. */ void ipi_cpu(int cpu, u_int ipi) { /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } /* * send an IPI to all CPUs EXCEPT myself */ void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); ipi_selected(other_cpus, ipi); } int ipi_nmi_handler() { u_int cpuid; /* * As long as there is not a simple way to know about a NMI's * source, if the bitmask for the current CPU is present in * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) return (1); CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); cpustop_handler(); return (0); } /* * Handle an IPI_STOP by saving our current context and spinning until we * are resumed. */ void cpustop_handler(void) { int cpu; cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); cpustop_restartfunc = NULL; } } /* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ static void release_aps(void *dummy __unused) { if (mp_ncpus == 1) return; atomic_store_rel_int(&aps_ready, 1); while (smp_started == 0) ia32_pause(); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); -SYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); +SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); +SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL); diff --git a/sys/i386/xen/mptable.c b/sys/i386/xen/mptable.c index 0c1efe849b4e..74cb9ab1a512 100644 --- a/sys/i386/xen/mptable.c +++ b/sys/i386/xen/mptable.c @@ -1,107 +1,107 @@ /*- * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include static int mptable_probe(void); static int mptable_probe_cpus(void); static void mptable_register(void *dummy); static int mptable_setup_local(void); static int mptable_setup_io(void); static struct apic_enumerator mptable_enumerator = { "MPTable", mptable_probe, mptable_probe_cpus, mptable_setup_local, mptable_setup_io }; static int mptable_probe(void) { return (-100); } static int mptable_probe_cpus(void) { int i, rc; for (i = 0; i < MAXCPU; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) cpu_add(i, (i == 0)); } return (0); } /* * Initialize the local APIC on the BSP. */ static int mptable_setup_local(void) { return (0); } static int mptable_setup_io(void) { return (0); } static void mptable_register(void *dummy __unused) { apic_register_enumerator(&mptable_enumerator); } SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register, NULL); diff --git a/sys/i386/xen/xen_clock_util.c b/sys/i386/xen/xen_clock_util.c index c14a627a122a..c12451551648 100644 --- a/sys/i386/xen/xen_clock_util.c +++ b/sys/i386/xen/xen_clock_util.c @@ -1,101 +1,102 @@ /*- * Copyright (c) 2009 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include +#include #include + #include #include #include #include -#include #include #include #include #include #include /* * Read the current hypervisor start time (wall clock) from Xen. */ void xen_fetch_wallclock(struct timespec *ts) { shared_info_t *s = HYPERVISOR_shared_info; uint32_t ts_version; do { ts_version = s->wc_version; rmb(); ts->tv_sec = s->wc_sec; ts->tv_nsec = s->wc_nsec; rmb(); } while ((s->wc_version & 1) | (ts_version ^ s->wc_version)); } /* * Read the current hypervisor system uptime value from Xen. */ void xen_fetch_uptime(struct timespec *ts) { shared_info_t *s = HYPERVISOR_shared_info; struct vcpu_time_info *src; struct shadow_time_info dst; uint32_t pre_version, post_version; src = &s->vcpu_info[smp_processor_id()].time; spinlock_enter(); do { pre_version = dst.version = src->version; rmb(); dst.system_timestamp = src->system_time; rmb(); post_version = src->version; } while ((pre_version & 1) | (pre_version ^ post_version)); spinlock_exit(); ts->tv_sec = dst.system_timestamp / 1000000000; ts->tv_nsec = dst.system_timestamp % 1000000000; } diff --git a/sys/i386/xen/xen_machdep.c b/sys/i386/xen/xen_machdep.c index 9b5edd384cf9..7049be6d561a 100644 --- a/sys/i386/xen/xen_machdep.c +++ b/sys/i386/xen/xen_machdep.c @@ -1,1260 +1,1270 @@ /* * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christian Limpach. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); int xendebug_flags; start_info_t *xen_start_info; shared_info_t *HYPERVISOR_shared_info; xen_pfn_t *xen_machine_phys = machine_to_phys_mapping; xen_pfn_t *xen_phys_machine; xen_pfn_t *xen_pfn_to_mfn_frame_list[16]; xen_pfn_t *xen_pfn_to_mfn_frame_list_list; int preemptable, init_first; extern unsigned int avail_space; +int xen_vector_callback_enabled = 0; +enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN; void ni_cli(void); void ni_sti(void); void ni_cli(void) { CTR0(KTR_SPARE2, "ni_cli disabling interrupts"); __asm__("pushl %edx;" "pushl %eax;" ); __cli(); __asm__("popl %eax;" "popl %edx;" ); } void ni_sti(void) { __asm__("pushl %edx;" "pushl %esi;" "pushl %eax;" ); __sti(); __asm__("popl %eax;" "popl %esi;" "popl %edx;" ); } +void +force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} + /* * Modify the cmd_line by converting ',' to NULLs so that it is in a format * suitable for the static env vars. */ char * xen_setbootenv(char *cmd_line) { char *cmd_line_next; /* Skip leading spaces */ for (; *cmd_line == ' '; cmd_line++); - printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line); + xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line); for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;); return cmd_line; } static struct { const char *ev; int mask; } howto_names[] = { {"boot_askname", RB_ASKNAME}, {"boot_single", RB_SINGLE}, {"boot_nosync", RB_NOSYNC}, {"boot_halt", RB_ASKNAME}, {"boot_serial", RB_SERIAL}, {"boot_cdrom", RB_CDROM}, {"boot_gdb", RB_GDB}, {"boot_gdb_pause", RB_RESERVED1}, {"boot_verbose", RB_VERBOSE}, {"boot_multicons", RB_MULTIPLE}, {NULL, 0} }; int xen_boothowto(char *envp) { int i, howto = 0; /* get equivalents from the environment */ for (i = 0; howto_names[i].ev != NULL; i++) if (getenv(howto_names[i].ev) != NULL) howto |= howto_names[i].mask; return howto; } -#define PRINTK_BUFSIZE 1024 +#define XC_PRINTF_BUFSIZE 1024 void -printk(const char *fmt, ...) +xc_printf(const char *fmt, ...) { __va_list ap; int retval; - static char buf[PRINTK_BUFSIZE]; + static char buf[XC_PRINTF_BUFSIZE]; va_start(ap, fmt); - retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap); + retval = vsnprintf(buf, XC_PRINTF_BUFSIZE - 1, fmt, ap); va_end(ap); buf[retval] = 0; (void)HYPERVISOR_console_write(buf, retval); } #define XPQUEUE_SIZE 128 struct mmu_log { char *file; int line; }; #ifdef SMP /* per-cpu queues and indices */ #ifdef INVARIANTS static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE]; #endif static int xpq_idx[XEN_LEGACY_MAX_VCPUS]; static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE]; #define XPQ_QUEUE_LOG xpq_queue_log[vcpu] #define XPQ_QUEUE xpq_queue[vcpu] #define XPQ_IDX xpq_idx[vcpu] #define SET_VCPU() int vcpu = smp_processor_id() #else static mmu_update_t xpq_queue[XPQUEUE_SIZE]; #ifdef INVARIANTS static struct mmu_log xpq_queue_log[XPQUEUE_SIZE]; #endif static int xpq_idx = 0; #define XPQ_QUEUE_LOG xpq_queue_log #define XPQ_QUEUE xpq_queue #define XPQ_IDX xpq_idx #define SET_VCPU() #endif /* !SMP */ #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1); #if 0 static void xen_dump_queue(void) { int _xpq_idx = XPQ_IDX; int i; if (_xpq_idx <= 1) return; - printk("xen_dump_queue(): %u entries\n", _xpq_idx); + xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx); for (i = 0; i < _xpq_idx; i++) { - printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); + xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, + XPQ_QUEUE[i].ptr); } } #endif static __inline void _xen_flush_queue(void) { SET_VCPU(); int _xpq_idx = XPQ_IDX; int error, i; #ifdef INVARIANTS if (__predict_true(gdtset)) CRITICAL_ASSERT(curthread); #endif XPQ_IDX = 0; /* Make sure index is cleared first to avoid double updates. */ error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE, _xpq_idx, NULL, DOMID_SELF); #if 0 if (__predict_true(gdtset)) for (i = _xpq_idx; i > 0;) { if (i >= 3) { CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx " "ptr: %lx val: %lx ptr: %lx", (XPQ_QUEUE[i-1].val & 0xffffffff), (XPQ_QUEUE[i-1].ptr & 0xffffffff), (XPQ_QUEUE[i-2].val & 0xffffffff), (XPQ_QUEUE[i-2].ptr & 0xffffffff), (XPQ_QUEUE[i-3].val & 0xffffffff), (XPQ_QUEUE[i-3].ptr & 0xffffffff)); i -= 3; } else if (i == 2) { CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx", (XPQ_QUEUE[i-1].val & 0xffffffff), (XPQ_QUEUE[i-1].ptr & 0xffffffff), (XPQ_QUEUE[i-2].val & 0xffffffff), (XPQ_QUEUE[i-2].ptr & 0xffffffff)); i = 0; } else { CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", (XPQ_QUEUE[i-1].val & 0xffffffff), (XPQ_QUEUE[i-1].ptr & 0xffffffff)); i = 0; } } #endif if (__predict_false(error < 0)) { for (i = 0; i < _xpq_idx; i++) printf("val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); panic("Failed to execute MMU updates: %d", error); } } void xen_flush_queue(void) { SET_VCPU(); if (__predict_true(gdtset)) critical_enter(); if (XPQ_IDX != 0) _xen_flush_queue(); if (__predict_true(gdtset)) critical_exit(); } static __inline void xen_increment_idx(void) { SET_VCPU(); XPQ_IDX++; if (__predict_false(XPQ_IDX == XPQUEUE_SIZE)) xen_flush_queue(); } void xen_check_queue(void) { #ifdef INVARIANTS SET_VCPU(); KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX)); #endif } void xen_invlpg(vm_offset_t va) { struct mmuext_op op; op.cmd = MMUEXT_INVLPG_ALL; op.arg1.linear_addr = va & ~PAGE_MASK; PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_load_cr3(u_int val) { struct mmuext_op op; #ifdef INVARIANTS SET_VCPU(); KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX)); #endif op.cmd = MMUEXT_NEW_BASEPTR; op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT; PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } #ifdef KTR static __inline u_int rebp(void) { u_int data; __asm __volatile("movl 4(%%ebp),%0" : "=r" (data)); return (data); } #endif u_int read_eflags(void) { vcpu_info_t *_vcpu; u_int eflags; eflags = _read_eflags(); _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; if (_vcpu->evtchn_upcall_mask) eflags &= ~PSL_I; return (eflags); } void write_eflags(u_int eflags) { u_int intr; CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags); intr = ((eflags & PSL_I) == 0); __restore_flags(intr); _write_eflags(eflags); } void xen_cli(void) { CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp()); __cli(); } void xen_sti(void) { CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp()); __sti(); } u_int xen_rcr2(void) { return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2); } void _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line) { SET_VCPU(); if (__predict_true(gdtset)) critical_enter(); XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; XPQ_QUEUE[XPQ_IDX].val = pfn; #ifdef INVARIANTS XPQ_QUEUE_LOG[XPQ_IDX].file = file; XPQ_QUEUE_LOG[XPQ_IDX].line = line; #endif xen_increment_idx(); if (__predict_true(gdtset)) critical_exit(); } extern struct rwlock pvh_global_lock; void _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line) { SET_VCPU(); if (__predict_true(gdtset)) rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((ptr & 7) == 0, ("misaligned update")); if (__predict_true(gdtset)) critical_enter(); XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE; XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val; #ifdef INVARIANTS XPQ_QUEUE_LOG[XPQ_IDX].file = file; XPQ_QUEUE_LOG[XPQ_IDX].line = line; #endif xen_increment_idx(); if (__predict_true(gdtset)) critical_exit(); } void xen_pgdpt_pin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_PIN_L3_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_pin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_PIN_L2_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_unpin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_UNPIN_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pt_pin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_PIN_L1_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pt_unpin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_UNPIN_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_set_ldt(vm_paddr_t ptr, unsigned long len) { struct mmuext_op op; op.cmd = MMUEXT_SET_LDT; op.arg1.linear_addr = ptr; op.arg2.nr_ents = len; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_tlb_flush(void) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_LOCAL; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_update_descriptor(union descriptor *table, union descriptor *entry) { vm_paddr_t pa; pt_entry_t *ptp; ptp = vtopte((vm_offset_t)table); pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK); if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry)) panic("HYPERVISOR_update_descriptor failed\n"); } #if 0 /* * Bitmap is indexed by page number. If bit is set, the page is part of a * xen_create_contiguous_region() area of memory. */ unsigned long *contiguous_bitmap; static void contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages) { unsigned long start_off, end_off, curr_idx, end_idx; curr_idx = first_page / BITS_PER_LONG; start_off = first_page & (BITS_PER_LONG-1); end_idx = (first_page + nr_pages) / BITS_PER_LONG; end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); if (curr_idx == end_idx) { contiguous_bitmap[curr_idx] |= ((1UL<> PAGE_SHIFT; mfn = PFNTOMFN(pfn); PFNTOMFN(pfn) = INVALID_P2M_ENTRY; PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Get a new contiguous memory extent. */ reservation.extent_order = order; /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not * running with a broxen driver XXXEN */ reservation.address_bits = 31; if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1) goto fail; /* 3. Map the new extent in place of old pages. */ for (i = 0; i < (1 << order); i++) { int pfn; pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT; xen_machphys_update(mfn+i, pfn); PFNTOMFN(pfn) = mfn+i; } xen_tlb_flush(); #if 0 contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order); #endif balloon_unlock(flags); return 0; fail: reservation.extent_order = 0; reservation.address_bits = 0; for (i = 0; i < (1 << order); i++) { int pfn; pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT; PANIC_IF(HYPERVISOR_memory_op( XENMEM_increase_reservation, &reservation) != 1); xen_machphys_update(mfn, pfn); PFNTOMFN(pfn) = mfn; } xen_tlb_flush(); balloon_unlock(flags); return ENOMEM; } void xen_destroy_contiguous_region(void *addr, int npages) { unsigned long mfn, i, flags, order, pfn0; struct xen_memory_reservation reservation = { .nr_extents = 1, .extent_order = 0, .domid = DOMID_SELF }; set_xen_guest_handle(reservation.extent_start, &mfn); pfn0 = vtophys(addr) >> PAGE_SHIFT; #if 0 scrub_pages(vstart, 1 << order); #endif /* can currently only handle power of two allocation */ PANIC_IF(ffs(npages) != fls(npages)); /* 0. determine order */ order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages); balloon_lock(flags); #if 0 contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order); #endif /* 1. Zap current PTEs, giving away the underlying pages. */ for (i = 0; i < (1 << order); i++) { int pfn; uint64_t new_val = 0; pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT; PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0)); PFNTOMFN(pfn) = INVALID_P2M_ENTRY; PANIC_IF(HYPERVISOR_memory_op( XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Map new pages in place of old pages. */ for (i = 0; i < (1 << order); i++) { int pfn; uint64_t new_val; pfn = pfn0 + i; PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1); new_val = mfn << PAGE_SHIFT; PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), new_val, PG_KERNEL)); xen_machphys_update(mfn, pfn); PFNTOMFN(pfn) = mfn; } xen_tlb_flush(); balloon_unlock(flags); } extern vm_offset_t proc0kstack; extern int vm86paddr, vm86phystk; char *bootmem_start, *bootmem_current, *bootmem_end; pteinfo_t *pteinfo_list; void initvalues(start_info_t *startinfo); struct xenstore_domain_interface; extern struct xenstore_domain_interface *xen_store; char *console_page; void * bootmem_alloc(unsigned int size) { char *retptr; retptr = bootmem_current; PANIC_IF(retptr + size > bootmem_end); bootmem_current += size; return retptr; } void bootmem_free(void *ptr, unsigned int size) { char *tptr; tptr = ptr; PANIC_IF(tptr != bootmem_current - size || bootmem_current - size < bootmem_start); bootmem_current -= size; } #if 0 static vm_paddr_t xpmap_mtop2(vm_paddr_t mpa) { return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT) ) | (mpa & ~PG_FRAME); } static pd_entry_t xpmap_get_bootpde(vm_paddr_t va) { return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22]; } static pd_entry_t xpmap_get_vbootpde(vm_paddr_t va) { pd_entry_t pde; pde = xpmap_get_bootpde(va); if ((pde & PG_V) == 0) return (pde & ~PG_FRAME); return (pde & ~PG_FRAME) | (xpmap_mtop2(pde & PG_FRAME) + KERNBASE); } static pt_entry_t 8* xpmap_get_bootptep(vm_paddr_t va) { pd_entry_t pde; pde = xpmap_get_vbootpde(va); if ((pde & PG_V) == 0) return (void *)-1; #define PT_MASK 0x003ff000 /* page table address bits */ return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]); } static pt_entry_t xpmap_get_bootpte(vm_paddr_t va) { return xpmap_get_bootptep(va)[0]; } #endif #ifdef ADD_ISA_HOLE static void shift_phys_machine(unsigned long *phys_machine, int nr_pages) { unsigned long *tmp_page, *current_page, *next_page; int i; tmp_page = bootmem_alloc(PAGE_SIZE); current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long)); next_page = current_page - (PAGE_SIZE/sizeof(unsigned long)); bcopy(phys_machine, tmp_page, PAGE_SIZE); while (current_page > phys_machine) { /* save next page */ bcopy(next_page, tmp_page, PAGE_SIZE); /* shift down page */ bcopy(current_page, next_page, PAGE_SIZE); /* finish swap */ bcopy(tmp_page, current_page, PAGE_SIZE); current_page -= (PAGE_SIZE/sizeof(unsigned long)); next_page -= (PAGE_SIZE/sizeof(unsigned long)); } bootmem_free(tmp_page, PAGE_SIZE); for (i = 0; i < nr_pages; i++) { xen_machphys_update(phys_machine[i], i); } memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE); } #endif /* ADD_ISA_HOLE */ /* * Build a directory of the pages that make up our Physical to Machine * mapping table. The Xen suspend/restore code uses this to find our * mapping table. */ static void init_frame_list_list(void *arg) { unsigned long nr_pages = xen_start_info->nr_pages; #define FPP (PAGE_SIZE/sizeof(xen_pfn_t)) int i, j, k; xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); for (i = 0, j = 0, k = -1; i < nr_pages; i += FPP, j++) { if ((j & (FPP - 1)) == 0) { k++; xen_pfn_to_mfn_frame_list[k] = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); xen_pfn_to_mfn_frame_list_list[k] = VTOMFN(xen_pfn_to_mfn_frame_list[k]); j = 0; } xen_pfn_to_mfn_frame_list[k][j] = VTOMFN(&xen_phys_machine[i]); } HYPERVISOR_shared_info->arch.max_pfn = nr_pages; HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = VTOMFN(xen_pfn_to_mfn_frame_list_list); } SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL); extern unsigned long physfree; int pdir, curoffset; extern int nkpt; extern uint32_t kernbase; void initvalues(start_info_t *startinfo) { vm_offset_t cur_space, cur_space_pt; struct physdev_set_iopl set_iopl; int l3_pages, l2_pages, l1_pages, offset; vm_paddr_t console_page_ma, xen_store_ma; vm_offset_t tmpva; vm_paddr_t shinfo; #ifdef PAE vm_paddr_t IdlePDPTma, IdlePDPTnewma; vm_paddr_t IdlePTDnewma[4]; pd_entry_t *IdlePDPTnew, *IdlePTDnew; vm_paddr_t IdlePTDma[4]; #else vm_paddr_t IdlePTDma[1]; #endif unsigned long i; int ncpus = MAXCPU; nkpt = min( min( max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt), NPGPTD*NPDEPG - KPTDI), (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); #ifdef notyet /* * need to install handler */ HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify); #endif xen_start_info = startinfo; xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list; IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE); l1_pages = 0; #ifdef PAE l3_pages = 1; l2_pages = 0; IdlePDPT = (pd_entry_t *)startinfo->pt_base; IdlePDPTma = VTOM(startinfo->pt_base); for (i = (KERNBASE >> 30); (i < 4) && (IdlePDPT[i] != 0); i++) l2_pages++; /* * Note that only one page directory has been allocated at this point. * Thus, if KERNBASE */ for (i = 0; i < l2_pages; i++) IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE); l2_pages = (l2_pages == 0) ? 1 : l2_pages; #else l3_pages = 0; l2_pages = 1; #endif for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT); (i>PDRSHIFT)); i++) { if (IdlePTD[i] == 0) break; l1_pages++; } /* number of pages allocated after the pts + 1*/; cur_space = xen_start_info->pt_base + (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE; - printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space); + xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space, + cur_space); - printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n", + xc_printf("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n", KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base), xen_start_info->nr_pt_frames); xendebug_flags = 0; /* 0xffffffff; */ #ifdef ADD_ISA_HOLE shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages); #endif XENPRINTF("IdlePTD %p\n", IdlePTD); XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx " "mod_start: 0x%lx mod_len: 0x%lx\n", xen_start_info->nr_pages, xen_start_info->shared_info, xen_start_info->flags, xen_start_info->pt_base, xen_start_info->mod_start, xen_start_info->mod_len); #ifdef PAE IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE; bzero(IdlePDPTnew, PAGE_SIZE); IdlePDPTnewma = VTOM(IdlePDPTnew); IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE; bzero(IdlePTDnew, 4*PAGE_SIZE); for (i = 0; i < 4; i++) IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE); /* * L3 * * Copy the 4 machine addresses of the new PTDs in to the PDPT * */ for (i = 0; i < 4; i++) IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V; __asm__("nop;"); /* * * re-map the new PDPT read-only */ PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V); /* * * Unpin the current PDPT */ xen_pt_unpin(IdlePDPTma); #endif /* PAE */ /* Map proc0's KSTACK */ proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE); - printk("proc0kstack=%u\n", proc0kstack); + xc_printf("proc0kstack=%u\n", proc0kstack); /* vm86/bios stack */ cur_space += PAGE_SIZE; /* Map space for the vm86 region */ vm86paddr = (vm_offset_t)cur_space; cur_space += (PAGE_SIZE * 3); /* allocate 4 pages for bootmem allocator */ bootmem_start = bootmem_current = (char *)cur_space; cur_space += (4 * PAGE_SIZE); bootmem_end = (char *)cur_space; /* allocate pages for gdt */ gdt = (union descriptor *)cur_space; cur_space += PAGE_SIZE*ncpus; /* allocate page for ldt */ ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE; cur_space += PAGE_SIZE; /* unmap remaining pages from initial chunk * */ for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<> 18)), ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK), l1_pages*sizeof(pt_entry_t)); for (i = 0; i < 4; i++) { PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE, IdlePTDnewma[i] | PG_V); } xen_load_cr3(VTOP(IdlePDPTnew)); xen_pgdpt_pin(VTOM(IdlePDPTnew)); /* allocate remainder of nkpt pages */ cur_space_pt = cur_space; for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt; i++, cur_space += PAGE_SIZE) { pdir = (offset + i) / NPDEPG; curoffset = ((offset + i) % NPDEPG); if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS) break; /* * make sure that all the initial page table pages * have been zeroed */ PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW); bzero((char *)cur_space, PAGE_SIZE); PT_SET_MA(cur_space, (vm_paddr_t)0); xen_pt_pin(VTOM(cur_space)); xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] + curoffset*sizeof(vm_paddr_t)), VTOM(cur_space) | PG_KERNEL); PT_UPDATES_FLUSH(); } for (i = 0; i < 4; i++) { pdir = (PTDPTDI + i) / NPDEPG; curoffset = (PTDPTDI + i) % NPDEPG; xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] + curoffset*sizeof(vm_paddr_t)), IdlePTDnewma[i] | PG_V); } PT_UPDATES_FLUSH(); IdlePTD = IdlePTDnew; IdlePDPT = IdlePDPTnew; IdlePDPTma = IdlePDPTnewma; HYPERVISOR_shared_info = (shared_info_t *)cur_space; cur_space += PAGE_SIZE; xen_store = (struct xenstore_domain_interface *)cur_space; cur_space += PAGE_SIZE; console_page = (char *)cur_space; cur_space += PAGE_SIZE; /* * shared_info is an unsigned long so this will randomly break if * it is allocated above 4GB - I guess people are used to that * sort of thing with Xen ... sigh */ shinfo = xen_start_info->shared_info; PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL); - printk("#4\n"); + xc_printf("#4\n"); xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT); PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL); console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT); PT_SET_MA(console_page, console_page_ma | PG_KERNEL); - printk("#5\n"); + xc_printf("#5\n"); set_iopl.iopl = 1; PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl)); - printk("#6\n"); + xc_printf("#6\n"); #if 0 /* add page table for KERNBASE */ xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), VTOM(cur_space) | PG_KERNEL); xen_flush_queue(); #ifdef PAE xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), VTOM(cur_space) | PG_V | PG_A); #else xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), VTOM(cur_space) | PG_V | PG_A); #endif xen_flush_queue(); cur_space += PAGE_SIZE; - printk("#6\n"); + xc_printf("#6\n"); #endif /* 0 */ #ifdef notyet if (xen_start_info->flags & SIF_INITDOMAIN) { /* Map first megabyte */ for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD); xen_flush_queue(); } #endif /* * re-map kernel text read-only * */ for (i = (((vm_offset_t)&btext) & ~PAGE_MASK); i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE) PT_SET_MA(i, VTOM(i) | PG_V | PG_A); - printk("#7\n"); + xc_printf("#7\n"); physfree = VTOP(cur_space); init_first = physfree >> PAGE_SHIFT; IdlePTD = (pd_entry_t *)VTOP(IdlePTD); IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT); setup_xen_features(); - printk("#8, proc0kstack=%u\n", proc0kstack); + xc_printf("#8, proc0kstack=%u\n", proc0kstack); } trap_info_t trap_table[] = { { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, { 1, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, { 3, 3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, /* This is UPL on Linux and KPL on BSD */ { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, { 7, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, /* * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, * no handler for double fault */ { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, {14, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, { 0, 0, 0, 0 } }; /* Perform a multicall and check that individual calls succeeded. */ int HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls) { int ret = 0; int i; /* Perform the multicall. */ PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls)); /* Check the results of individual hypercalls. */ for (i = 0; i < nr_calls; i++) - if (unlikely(call_list[i].result < 0)) + if (__predict_false(call_list[i].result < 0)) ret++; - if (unlikely(ret > 0)) + if (__predict_false(ret > 0)) panic("%d multicall(s) failed: cpu %d\n", ret, smp_processor_id()); /* If we didn't panic already, everything succeeded. */ return (0); } /********** CODE WORTH KEEPING ABOVE HERE *****************/ void xen_failsafe_handler(void); void xen_failsafe_handler(void) { panic("xen_failsafe_handler called!\n"); } void xen_handle_thread_switch(struct pcb *pcb); /* This is called by cpu_switch() when switching threads. */ /* The pcb arg refers to the process control block of the */ /* next thread which is to run */ void xen_handle_thread_switch(struct pcb *pcb) { uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0]; uint32_t *b = (uint32_t *)&pcb->pcb_fsd; multicall_entry_t mcl[3]; int i = 0; /* Notify Xen of task switch */ mcl[i].op = __HYPERVISOR_stack_switch; mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL); mcl[i++].args[1] = (unsigned long)pcb; /* Check for update of fsd */ if (*a != *b || *(a+1) != *(b+1)) { mcl[i].op = __HYPERVISOR_update_descriptor; *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a); *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b; } a += 2; b += 2; /* Check for update of gsd */ if (*a != *b || *(a+1) != *(b+1)) { mcl[i].op = __HYPERVISOR_update_descriptor; *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a); *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b; } (void)HYPERVISOR_multicall(mcl, i); } diff --git a/sys/i386/xen/xen_rtc.c b/sys/i386/xen/xen_rtc.c index 8e1e0175959f..8dc3ecbe96fe 100644 --- a/sys/i386/xen/xen_rtc.c +++ b/sys/i386/xen/xen_rtc.c @@ -1,144 +1,146 @@ /*- * Copyright (c) 2009 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include +#include #include +#include +#include +#include + #include #include + #include -#include -#include #include -#include -#include #include #include #include "clock_if.h" static int xen_rtc_probe(device_t dev) { device_set_desc(dev, "Xen Hypervisor Clock"); printf("[XEN] xen_rtc_probe: probing Hypervisor RTC clock\n"); if (! HYPERVISOR_shared_info) { device_printf(dev, "No hypervisor shared page found; RTC can not start.\n"); return (EINVAL); } return (0); } static int xen_rtc_attach(device_t dev) { printf("[XEN] xen_rtc_attach: attaching Hypervisor RTC clock\n"); clock_register(dev, 1000000); return(0); } static int xen_rtc_settime(device_t dev __unused, struct timespec *ts) { device_printf(dev, "[XEN] xen_rtc_settime\n"); /* * Don't return EINVAL here; just silently fail if the domain isn't privileged enough * to set the TOD. */ return(0); } /* * The Xen time structures document the hypervisor start time and the * uptime-since-hypervisor-start (in nsec.) They need to be combined * in order to calculate a TOD clock. */ static int xen_rtc_gettime(device_t dev, struct timespec *ts) { struct timespec w_ts, u_ts; device_printf(dev, "[XEN] xen_rtc_gettime\n"); xen_fetch_wallclock(&w_ts); device_printf(dev, "[XEN] xen_rtc_gettime: wallclock %ld sec; %ld nsec\n", (long int) w_ts.tv_sec, (long int) w_ts.tv_nsec); xen_fetch_uptime(&u_ts); device_printf(dev, "[XEN] xen_rtc_gettime: uptime %ld sec; %ld nsec\n", (long int) u_ts.tv_sec, (long int) u_ts.tv_nsec); timespecclear(ts); timespecadd(ts, &w_ts); timespecadd(ts, &u_ts); device_printf(dev, "[XEN] xen_rtc_gettime: TOD %ld sec; %ld nsec\n", (long int) ts->tv_sec, (long int) ts->tv_nsec); return(0); } static void xen_rtc_identify(driver_t *drv, device_t parent) { BUS_ADD_CHILD(parent, 0, "rtc", 0); } static device_method_t xen_rtc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xen_rtc_probe), DEVMETHOD(device_attach, xen_rtc_attach), DEVMETHOD(device_identify, xen_rtc_identify), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* clock interface */ DEVMETHOD(clock_gettime, xen_rtc_gettime), DEVMETHOD(clock_settime, xen_rtc_settime), { 0, 0 } }; static driver_t xen_rtc_driver = { "rtc", xen_rtc_methods, 0 }; static devclass_t xen_rtc_devclass; DRIVER_MODULE(rtc, nexus, xen_rtc_driver, xen_rtc_devclass, 0, 0); diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 687b42caf09c..68e0858ac611 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -1,375 +1,380 @@ /*- * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernel.h 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS_KERNEL_H_ #define _SYS_KERNEL_H_ #include #ifdef _KERNEL /* for intrhook below */ #include /* Global variables for the kernel. */ /* 1.1 */ extern char kernelname[MAXPATHLEN]; extern int tick; /* usec per tick (1000000 / hz) */ extern int hz; /* system clock's frequency */ extern int psratio; /* ratio: prof / stat */ extern int stathz; /* statistics clock's frequency */ extern int profhz; /* profiling clock's frequency */ extern int profprocs; /* number of process's profiling */ extern volatile int ticks; #endif /* _KERNEL */ /* * Enumerated types for known system startup interfaces. * * Startup occurs in ascending numeric order; the list entries are * sorted prior to attempting startup to guarantee order. Items * of the same level are arbitrated for order based on the 'order' * element. * * These numbers are arbitrary and are chosen ONLY for ordering; the * enumeration values are explicit rather than implicit to provide * for binary compatibility with inserted elements. * * The SI_SUB_LAST value must have the highest lexical value. * * The SI_SUB_SWAP values represent a value used by * the BSD 4.4Lite but not by FreeBSD; it is maintained in dependent * order to support porting. */ enum sysinit_sub_id { SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/ SI_SUB_DONE = 0x0000001, /* processed*/ SI_SUB_TUNABLES = 0x0700000, /* establish tunable values */ SI_SUB_COPYRIGHT = 0x0800001, /* first use of console*/ SI_SUB_SETTINGS = 0x0880000, /* check and recheck settings */ SI_SUB_MTX_POOL_STATIC = 0x0900000, /* static mutex pool */ SI_SUB_LOCKMGR = 0x0980000, /* lockmgr locks */ SI_SUB_VM = 0x1000000, /* virtual memory system init*/ SI_SUB_KMEM = 0x1800000, /* kernel memory*/ SI_SUB_KVM_RSRC = 0x1A00000, /* kvm operational limits*/ + SI_SUB_HYPERVISOR = 0x1A40000, /* + * Hypervisor detection and + * virtualization support + * setup. + */ SI_SUB_WITNESS = 0x1A80000, /* witness initialization */ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ SI_SUB_RACCT = 0x2110000, /* resource accounting */ SI_SUB_RANDOM = 0x2120000, /* random number generator */ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */ SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */ SI_SUB_VNET = 0x21E0000, /* vnet 0 */ SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/ SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/ SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */ SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/ SI_SUB_KTRACE = 0x2480000, /* ktrace */ SI_SUB_OPENSOLARIS = 0x2490000, /* OpenSolaris compatibility */ SI_SUB_CYCLIC = 0x24A0000, /* Cyclic timers */ SI_SUB_AUDIT = 0x24C0000, /* audit */ SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/ SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */ SI_SUB_MBUF = 0x2700000, /* mbuf subsystem */ SI_SUB_INTR = 0x2800000, /* interrupt threads */ SI_SUB_SOFTINTR = 0x2800001, /* start soft interrupt thread */ SI_SUB_ACL = 0x2900000, /* start for filesystem ACLs */ SI_SUB_DEVFS = 0x2F00000, /* devfs ready for devices */ SI_SUB_INIT_IF = 0x3000000, /* prep for net interfaces */ SI_SUB_NETGRAPH = 0x3010000, /* Let Netgraph initialize */ SI_SUB_DTRACE = 0x3020000, /* DTrace subsystem */ SI_SUB_DTRACE_PROVIDER = 0x3048000, /* DTrace providers */ SI_SUB_DTRACE_ANON = 0x308C000, /* DTrace anon enabling */ SI_SUB_DRIVERS = 0x3100000, /* Let Drivers initialize */ SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */ SI_SUB_VFS = 0x4000000, /* virtual filesystem*/ SI_SUB_CLOCKS = 0x4800000, /* real time and stat clocks*/ SI_SUB_CLIST = 0x5800000, /* clists*/ SI_SUB_SYSV_SHM = 0x6400000, /* System V shared memory*/ SI_SUB_SYSV_SEM = 0x6800000, /* System V semaphores*/ SI_SUB_SYSV_MSG = 0x6C00000, /* System V message queues*/ SI_SUB_P1003_1B = 0x6E00000, /* P1003.1B realtime */ SI_SUB_PSEUDO = 0x7000000, /* pseudo devices*/ SI_SUB_EXEC = 0x7400000, /* execve() handlers */ SI_SUB_PROTO_BEGIN = 0x8000000, /* VNET initialization */ SI_SUB_PROTO_IF = 0x8400000, /* interfaces*/ SI_SUB_PROTO_DOMAININIT = 0x8600000, /* domain registration system */ SI_SUB_PROTO_DOMAIN = 0x8800000, /* domains (address families?)*/ SI_SUB_PROTO_IFATTACHDOMAIN = 0x8800001, /* domain dependent data init*/ SI_SUB_PROTO_END = 0x8ffffff, /* VNET helper functions */ SI_SUB_KPROF = 0x9000000, /* kernel profiling*/ SI_SUB_KICK_SCHEDULER = 0xa000000, /* start the timeout events*/ SI_SUB_INT_CONFIG_HOOKS = 0xa800000, /* Interrupts enabled config */ SI_SUB_ROOT_CONF = 0xb000000, /* Find root devices */ SI_SUB_DUMP_CONF = 0xb200000, /* Find dump devices */ SI_SUB_RAID = 0xb380000, /* Configure GEOM classes */ SI_SUB_SWAP = 0xc000000, /* swap */ SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/ SI_SUB_SYSCALLS = 0xd800000, /* register system calls */ SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */ SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/ SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/ SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/ SI_SUB_KTHREAD_BUF = 0xea00000, /* buffer daemon*/ SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/ SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ SI_SUB_SMP = 0xf000000, /* start the APs*/ SI_SUB_RACCTD = 0xf100000, /* start raccd*/ SI_SUB_LAST = 0xfffffff /* final initialization */ }; /* * Some enumerated orders; "ANY" sorts last. */ enum sysinit_elem_order { SI_ORDER_FIRST = 0x0000000, /* first*/ SI_ORDER_SECOND = 0x0000001, /* second*/ SI_ORDER_THIRD = 0x0000002, /* third*/ SI_ORDER_FOURTH = 0x0000003, /* fourth*/ SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ SI_ORDER_ANY = 0xfffffff /* last*/ }; /* * A system initialization call instance * * At the moment there is one instance of sysinit. We probably do not * want two which is why this code is if'd out, but we definitely want * to discern SYSINIT's which take non-constant data pointers and * SYSINIT's which take constant data pointers, * * The C_* macros take functions expecting const void * arguments * while the non-C_* macros take functions expecting just void * arguments. * * With -Wcast-qual on, the compiler issues warnings: * - if we pass non-const data or functions taking non-const data * to a C_* macro. * * - if we pass const data to the normal macros * * However, no warning is issued if we pass a function taking const data * through a normal non-const macro. This is ok because the function is * saying it won't modify the data so we don't care whether the data is * modifiable or not. */ typedef void (*sysinit_nfunc_t)(void *); typedef void (*sysinit_cfunc_t)(const void *); struct sysinit { enum sysinit_sub_id subsystem; /* subsystem identifier*/ enum sysinit_elem_order order; /* init order within subsystem*/ sysinit_cfunc_t func; /* function */ const void *udata; /* multiplexer/argument */ }; /* * Default: no special processing * * The C_ version of SYSINIT is for data pointers to const * data ( and functions taking data pointers to const data ). * At the moment it is no different from SYSINIT and thus * still results in warnings. * * The casts are necessary to have the compiler produce the * correct warnings when -Wcast-qual is used. * */ #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_init = { \ subsystem, \ order, \ func, \ (ident) \ }; \ DATA_SET(sysinit_set,uniquifier ## _sys_init) #define SYSINIT(uniquifier, subsystem, order, func, ident) \ C_SYSINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) /* * Called on module unload: no special processing */ #define C_SYSUNINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_uninit = { \ subsystem, \ order, \ func, \ (ident) \ }; \ DATA_SET(sysuninit_set,uniquifier ## _sys_uninit) #define SYSUNINIT(uniquifier, subsystem, order, func, ident) \ C_SYSUNINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) void sysinit_add(struct sysinit **set, struct sysinit **set_end); /* * Infrastructure for tunable 'constants'. Value may be specified at compile * time or kernel load time. Rules relating tunables together can be placed * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_ANY. * * WARNING: developers should never use the reserved suffixes specified in * loader.conf(5) for any tunables or conflicts will result. */ /* * int * please avoid using for new tunables! */ extern void tunable_int_init(void *); struct tunable_int { const char *path; int *var; }; #define TUNABLE_INT(path, var) \ static struct tunable_int __CONCAT(__tunable_int_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int_init, \ &__CONCAT(__tunable_int_, __LINE__)) #define TUNABLE_INT_FETCH(path, var) getenv_int((path), (var)) /* * long */ extern void tunable_long_init(void *); struct tunable_long { const char *path; long *var; }; #define TUNABLE_LONG(path, var) \ static struct tunable_long __CONCAT(__tunable_long_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_long_init,\ &__CONCAT(__tunable_long_, __LINE__)) #define TUNABLE_LONG_FETCH(path, var) getenv_long((path), (var)) /* * unsigned long */ extern void tunable_ulong_init(void *); struct tunable_ulong { const char *path; unsigned long *var; }; #define TUNABLE_ULONG(path, var) \ static struct tunable_ulong __CONCAT(__tunable_ulong_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_ulong_init, \ &__CONCAT(__tunable_ulong_, __LINE__)) #define TUNABLE_ULONG_FETCH(path, var) getenv_ulong((path), (var)) /* * quad */ extern void tunable_quad_init(void *); struct tunable_quad { const char *path; quad_t *var; }; #define TUNABLE_QUAD(path, var) \ static struct tunable_quad __CONCAT(__tunable_quad_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_quad_init, \ &__CONCAT(__tunable_quad_, __LINE__)) #define TUNABLE_QUAD_FETCH(path, var) getenv_quad((path), (var)) extern void tunable_str_init(void *); struct tunable_str { const char *path; char *var; int size; }; #define TUNABLE_STR(path, var, size) \ static struct tunable_str __CONCAT(__tunable_str_, __LINE__) = { \ (path), \ (var), \ (size), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_str_init, \ &__CONCAT(__tunable_str_, __LINE__)) #define TUNABLE_STR_FETCH(path, var, size) \ getenv_string((path), (var), (size)) struct intr_config_hook { TAILQ_ENTRY(intr_config_hook) ich_links; void (*ich_func)(void *arg); void *ich_arg; }; int config_intrhook_establish(struct intr_config_hook *hook); void config_intrhook_disestablish(struct intr_config_hook *hook); #endif /* !_SYS_KERNEL_H_*/ diff --git a/sys/x86/include/segments.h b/sys/x86/include/segments.h index 74066ef48e79..0d6a282c3c8f 100644 --- a/sys/x86/include/segments.h +++ b/sys/x86/include/segments.h @@ -1,286 +1,287 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)segments.h 7.1 (Berkeley) 5/9/91 * $FreeBSD$ */ #ifndef _X86_SEGMENTS_H_ #define _X86_SEGMENTS_H_ /* * X86 Segmentation Data Structures and definitions */ /* * Selectors */ #define SEL_RPL_MASK 3 /* requester priv level */ #define ISPL(s) ((s)&3) /* priority level of a selector */ #ifdef XEN #define SEL_KPL 1 /* kernel priority level */ #else #define SEL_KPL 0 /* kernel priority level */ #endif #define SEL_UPL 3 /* user priority level */ #define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */ #define SEL_LDT 4 /* local descriptor table */ #define IDXSEL(s) (((s)>>3) & 0x1fff) /* index of selector */ #define LSEL(s,r) (((s)<<3) | SEL_LDT | r) /* a local selector */ #define GSEL(s,r) (((s)<<3) | r) /* a global selector */ /* * User segment descriptors (%cs, %ds etc for i386 apps. 64 bit wide) * For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl, * sd_p, sd_l and sd_def32 which must be zero). %ds only has sd_p. */ struct segment_descriptor { unsigned sd_lolimit:16; /* segment extent (lsb) */ unsigned sd_lobase:24; /* segment base address (lsb) */ unsigned sd_type:5; /* segment type */ unsigned sd_dpl:2; /* segment descriptor priority level */ unsigned sd_p:1; /* segment descriptor present */ unsigned sd_hilimit:4; /* segment extent (msb) */ unsigned sd_xx:2; /* unused */ unsigned sd_def32:1; /* default 32 vs 16 bit size */ unsigned sd_gran:1; /* limit granularity (byte/page units)*/ unsigned sd_hibase:8; /* segment base address (msb) */ } __packed; struct user_segment_descriptor { unsigned sd_lolimit:16; /* segment extent (lsb) */ unsigned sd_lobase:24; /* segment base address (lsb) */ unsigned sd_type:5; /* segment type */ unsigned sd_dpl:2; /* segment descriptor priority level */ unsigned sd_p:1; /* segment descriptor present */ unsigned sd_hilimit:4; /* segment extent (msb) */ unsigned sd_xx:1; /* unused */ unsigned sd_long:1; /* long mode (cs only) */ unsigned sd_def32:1; /* default 32 vs 16 bit size */ unsigned sd_gran:1; /* limit granularity (byte/page units)*/ unsigned sd_hibase:8; /* segment base address (msb) */ } __packed; #define USD_GETBASE(sd) (((sd)->sd_lobase) | (sd)->sd_hibase << 24) #define USD_SETBASE(sd, b) (sd)->sd_lobase = (b); \ (sd)->sd_hibase = ((b) >> 24); #define USD_GETLIMIT(sd) (((sd)->sd_lolimit) | (sd)->sd_hilimit << 16) #define USD_SETLIMIT(sd, l) (sd)->sd_lolimit = (l); \ (sd)->sd_hilimit = ((l) >> 16); #ifdef __i386__ /* * Gate descriptors (e.g. indirect descriptors) */ struct gate_descriptor { unsigned gd_looffset:16; /* gate offset (lsb) */ unsigned gd_selector:16; /* gate segment selector */ unsigned gd_stkcpy:5; /* number of stack wds to cpy */ unsigned gd_xx:3; /* unused */ unsigned gd_type:5; /* segment type */ unsigned gd_dpl:2; /* segment descriptor priority level */ unsigned gd_p:1; /* segment descriptor present */ unsigned gd_hioffset:16; /* gate offset (msb) */ } __packed; /* * Generic descriptor */ union descriptor { struct segment_descriptor sd; struct gate_descriptor gd; }; #else /* * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit) * Only interrupt and trap gates have gd_ist. */ struct gate_descriptor { uint64_t gd_looffset:16; /* gate offset (lsb) */ uint64_t gd_selector:16; /* gate segment selector */ uint64_t gd_ist:3; /* IST table index */ uint64_t gd_xx:5; /* unused */ uint64_t gd_type:5; /* segment type */ uint64_t gd_dpl:2; /* segment descriptor priority level */ uint64_t gd_p:1; /* segment descriptor present */ uint64_t gd_hioffset:48; /* gate offset (msb) */ uint64_t sd_xx1:32; } __packed; /* * Generic descriptor */ union descriptor { struct user_segment_descriptor sd; struct gate_descriptor gd; }; #endif /* system segments and gate types */ #define SDT_SYSNULL 0 /* system null */ #define SDT_SYS286TSS 1 /* system 286 TSS available */ #define SDT_SYSLDT 2 /* system local descriptor table */ #define SDT_SYS286BSY 3 /* system 286 TSS busy */ #define SDT_SYS286CGT 4 /* system 286 call gate */ #define SDT_SYSTASKGT 5 /* system task gate */ #define SDT_SYS286IGT 6 /* system 286 interrupt gate */ #define SDT_SYS286TGT 7 /* system 286 trap gate */ #define SDT_SYSNULL2 8 /* system null again */ #define SDT_SYS386TSS 9 /* system 386 TSS available */ #define SDT_SYSTSS 9 /* system available 64 bit TSS */ #define SDT_SYSNULL3 10 /* system null again */ #define SDT_SYS386BSY 11 /* system 386 TSS busy */ #define SDT_SYSBSY 11 /* system busy 64 bit TSS */ #define SDT_SYS386CGT 12 /* system 386 call gate */ #define SDT_SYSCGT 12 /* system 64 bit call gate */ #define SDT_SYSNULL4 13 /* system null again */ #define SDT_SYS386IGT 14 /* system 386 interrupt gate */ #define SDT_SYSIGT 14 /* system 64 bit interrupt gate */ #define SDT_SYS386TGT 15 /* system 386 trap gate */ #define SDT_SYSTGT 15 /* system 64 bit trap gate */ /* memory segment types */ #define SDT_MEMRO 16 /* memory read only */ #define SDT_MEMROA 17 /* memory read only accessed */ #define SDT_MEMRW 18 /* memory read write */ #define SDT_MEMRWA 19 /* memory read write accessed */ #define SDT_MEMROD 20 /* memory read only expand dwn limit */ #define SDT_MEMRODA 21 /* memory read only expand dwn limit accessed */ #define SDT_MEMRWD 22 /* memory read write expand dwn limit */ #define SDT_MEMRWDA 23 /* memory read write expand dwn limit accessed*/ #define SDT_MEME 24 /* memory execute only */ #define SDT_MEMEA 25 /* memory execute only accessed */ #define SDT_MEMER 26 /* memory execute read */ #define SDT_MEMERA 27 /* memory execute read accessed */ #define SDT_MEMEC 28 /* memory execute only conforming */ #define SDT_MEMEAC 29 /* memory execute only accessed conforming */ #define SDT_MEMERC 30 /* memory execute read conforming */ #define SDT_MEMERAC 31 /* memory execute read accessed conforming */ /* * Size of IDT table */ #define NIDT 256 /* 32 reserved, 0x80 syscall, most are h/w */ #define NRSVIDT 32 /* reserved entries for cpu exceptions */ /* * Entries in the Interrupt Descriptor Table (IDT) */ #define IDT_DE 0 /* #DE: Divide Error */ #define IDT_DB 1 /* #DB: Debug */ #define IDT_NMI 2 /* Nonmaskable External Interrupt */ #define IDT_BP 3 /* #BP: Breakpoint */ #define IDT_OF 4 /* #OF: Overflow */ #define IDT_BR 5 /* #BR: Bound Range Exceeded */ #define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */ #define IDT_NM 7 /* #NM: No Math Coprocessor */ #define IDT_DF 8 /* #DF: Double Fault */ #define IDT_FPUGP 9 /* Coprocessor Segment Overrun */ #define IDT_TS 10 /* #TS: Invalid TSS */ #define IDT_NP 11 /* #NP: Segment Not Present */ #define IDT_SS 12 /* #SS: Stack Segment Fault */ #define IDT_GP 13 /* #GP: General Protection Fault */ #define IDT_PF 14 /* #PF: Page Fault */ #define IDT_MF 16 /* #MF: FPU Floating-Point Error */ #define IDT_AC 17 /* #AC: Alignment Check */ #define IDT_MC 18 /* #MC: Machine Check */ #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ #define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ +#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */ #if defined(__i386__) || defined(__ia64__) /* * Entries in the Global Descriptor Table (GDT) * Note that each 4 entries share a single 32 byte L1 cache line. * Some of the fast syscall instructions require a specific order here. */ #define GNULL_SEL 0 /* Null Descriptor */ #define GPRIV_SEL 1 /* SMP Per-Processor Private Data */ #define GUFS_SEL 2 /* User %fs Descriptor (order critical: 1) */ #define GUGS_SEL 3 /* User %gs Descriptor (order critical: 2) */ #define GCODE_SEL 4 /* Kernel Code Descriptor (order critical: 1) */ #define GDATA_SEL 5 /* Kernel Data Descriptor (order critical: 2) */ #define GUCODE_SEL 6 /* User Code Descriptor (order critical: 3) */ #define GUDATA_SEL 7 /* User Data Descriptor (order critical: 4) */ #define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */ #define GPROC0_SEL 9 /* Task state process slot zero and up */ #define GLDT_SEL 10 /* Default User LDT */ #define GUSERLDT_SEL 11 /* User LDT */ #define GPANIC_SEL 12 /* Task state to consider panic from */ #define GBIOSCODE32_SEL 13 /* BIOS interface (32bit Code) */ #define GBIOSCODE16_SEL 14 /* BIOS interface (16bit Code) */ #define GBIOSDATA_SEL 15 /* BIOS interface (Data) */ #define GBIOSUTIL_SEL 16 /* BIOS interface (Utility) */ #define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */ #define GNDIS_SEL 18 /* For the NDIS layer */ #ifdef XEN #define NGDT 9 #else #define NGDT 19 #endif /* * Entries in the Local Descriptor Table (LDT) */ #define LSYS5CALLS_SEL 0 /* forced by intel BCS */ #define LSYS5SIGR_SEL 1 #define L43BSDCALLS_SEL 2 /* notyet */ #define LUCODE_SEL 3 #define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */ #define LUDATA_SEL 5 /* separate stack, es,fs,gs sels ? */ /* #define LPOSIXCALLS_SEL 5*/ /* notyet */ #define LBSDICALLS_SEL 16 /* BSDI system call gate */ #define NLDT (LBSDICALLS_SEL + 1) #else /* !__i386__ && !__ia64__ */ /* * Entries in the Global Descriptor Table (GDT) */ #define GNULL_SEL 0 /* Null Descriptor */ #define GNULL2_SEL 1 /* Null Descriptor */ #define GUFS32_SEL 2 /* User 32 bit %fs Descriptor */ #define GUGS32_SEL 3 /* User 32 bit %gs Descriptor */ #define GCODE_SEL 4 /* Kernel Code Descriptor */ #define GDATA_SEL 5 /* Kernel Data Descriptor */ #define GUCODE32_SEL 6 /* User 32 bit code Descriptor */ #define GUDATA_SEL 7 /* User 32/64 bit Data Descriptor */ #define GUCODE_SEL 8 /* User 64 bit Code Descriptor */ #define GPROC0_SEL 9 /* TSS for entering kernel etc */ /* slot 10 is second half of GPROC0_SEL */ #define GUSERLDT_SEL 11 /* LDT */ /* slot 12 is second half of GUSERLDT_SEL */ #define NGDT 13 #endif /* __i386__ || __ia64__ */ #endif /* !_X86_SEGMENTS_H_ */ diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 42ffa48f7365..ac651cdcd64d 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -1,1516 +1,1524 @@ /*- * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Local APIC support on Pentium and later processors. */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef __amd64__ #define SDT_APIC SDT_SYSIGT #define SDT_APICT SDT_SYSIGT #define GSEL_APIC 0 #else #define SDT_APIC SDT_SYS386IGT #define SDT_APICT SDT_SYS386TGT #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) #endif /* Sanity checks on IDT vectors. */ CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* Magic IRQ values for the timer and syscalls. */ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) +#define IRQ_EVTCHN (NUM_IO_INTS + 4) /* * Support for local APICs. Local APICs manage interrupts on each * individual processor as opposed to I/O APICs which receive interrupts * from I/O devices and then forward them on to the local APICs. * * Local APICs can also send interrupts to each other thus providing the * mechanism for IPIs. */ struct lvt { u_int lvt_edgetrigger:1; u_int lvt_activehi:1; u_int lvt_masked:1; u_int lvt_active:1; u_int lvt_mode:16; u_int lvt_vector:8; }; struct lapic { struct lvt la_lvts[LVT_MAX + 1]; u_int la_id:8; u_int la_cluster:4; u_int la_cluster_id:2; u_int la_present:1; u_long *la_timer_count; u_long la_timer_period; u_int la_timer_mode; uint32_t lvt_timer_cache; /* Include IDT_SYSCALL to make indexing easier. */ int la_ioint_irqs[APIC_NUM_IOINTS + 1]; } static lapics[MAX_APIC_ID + 1]; /* Global defaults for local APIC LVT entries. */ static struct lvt lvts[LVT_MAX + 1] = { { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ }; static inthand_t *ioint_handlers[] = { NULL, /* 0 - 31 */ IDTVEC(apic_isr1), /* 32 - 63 */ IDTVEC(apic_isr2), /* 64 - 95 */ IDTVEC(apic_isr3), /* 96 - 127 */ IDTVEC(apic_isr4), /* 128 - 159 */ IDTVEC(apic_isr5), /* 160 - 191 */ IDTVEC(apic_isr6), /* 192 - 223 */ IDTVEC(apic_isr7), /* 224 - 255 */ }; static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; extern inthand_t IDTVEC(rsvd); volatile lapic_t *lapic; vm_paddr_t lapic_paddr; static u_long lapic_timer_divisor; static struct eventtimer lapic_et; static void lapic_enable(void); static void lapic_resume(struct pic *pic); static void lapic_timer_oneshot(struct lapic *, u_int count, int enable_int); static void lapic_timer_periodic(struct lapic *, u_int count, int enable_int); static void lapic_timer_stop(struct lapic *); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period); static int lapic_et_stop(struct eventtimer *et); struct pic lapic_pic = { .pic_resume = lapic_resume }; static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value) { struct lvt *lvt; KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin)); if (la->la_lvts[pin].lvt_active) lvt = &la->la_lvts[pin]; else lvt = &lvts[pin]; value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | APIC_LVT_VECTOR); if (lvt->lvt_edgetrigger == 0) value |= APIC_LVT_TM; if (lvt->lvt_activehi == 0) value |= APIC_LVT_IIPP_INTALO; if (lvt->lvt_masked) value |= APIC_LVT_M; value |= lvt->lvt_mode; switch (lvt->lvt_mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: if (!lvt->lvt_edgetrigger) { printf("lapic%u: Forcing LINT%u to edge trigger\n", la->la_id, pin); value |= APIC_LVT_TM; } /* Use a vector of 0. */ break; case APIC_LVT_DM_FIXED: value |= lvt->lvt_vector; break; default: panic("bad APIC LVT delivery mode: %#x\n", value); } return (value); } /* * Map the local APIC and setup necessary interrupt vectors. */ void lapic_init(vm_paddr_t addr) { u_int regs[4]; int i, arat; /* Map the local APIC and setup the spurious interrupt handler. */ KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic_paddr = addr; lapic = pmap_mapdev(addr, sizeof(lapic_t)); setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Perform basic initialization of the BSP's local APIC. */ lapic_enable(); /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); /* Local APIC timer interrupt. */ setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Local APIC error interrupt. */ setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC); /* XXX: Thermal interrupt */ /* Local APIC CMCI. */ setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC); if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { arat = 0; /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { do_cpuid(0x06, regs); if ((regs[0] & CPUTPM1_ARAT) != 0) arat = 1; } bzero(&lapic_et, sizeof(lapic_et)); lapic_et.et_name = "LAPIC"; lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; lapic_et.et_quality = 600; if (!arat) { lapic_et.et_flags |= ET_FLAGS_C3STOP; lapic_et.et_quality -= 200; } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */ lapic_et.et_min_period = 0x00001000LL; lapic_et.et_max_period = SBT_1S; lapic_et.et_start = lapic_et_start; lapic_et.et_stop = lapic_et_stop; lapic_et.et_priv = NULL; et_register(&lapic_et); } } /* * Create a local APIC instance. */ void lapic_create(u_int apic_id, int boot_cpu) { int i; if (apic_id > MAX_APIC_ID) { printf("APIC: Ignoring local APIC with ID %d\n", apic_id); if (boot_cpu) panic("Can't ignore BSP"); return; } KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", apic_id)); /* * Assume no local LVT overrides and a cluster of 0 and * intra-cluster ID of 0. */ lapics[apic_id].la_present = 1; lapics[apic_id].la_id = apic_id; for (i = 0; i <= LVT_MAX; i++) { lapics[apic_id].la_lvts[i] = lvts[i]; lapics[apic_id].la_lvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) lapics[apic_id].la_ioint_irqs[i] = -1; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif +#ifdef XENHVM + lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; +#endif #ifdef SMP cpu_add(apic_id, boot_cpu); #endif } /* * Dump contents of local APIC registers */ void lapic_dump(const char* str) { uint32_t maxlvt; maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; printf("cpu%d %s:\n", PCPU_GET(cpuid), str); printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n", lapic->id, lapic->version, lapic->ldr, lapic->dfr); printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error); if (maxlvt >= LVT_PMC) printf(" pmc: 0x%08x", lapic->lvt_pcint); printf("\n"); if (maxlvt >= LVT_CMCI) printf(" cmci: 0x%08x\n", lapic->lvt_cmci); } void lapic_setup(int boot) { struct lapic *la; u_int32_t maxlvt; register_t saveintr; char buf[MAXCOMLEN + 1]; la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); saveintr = intr_disable(); maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); /* Setup spurious vector and enable the local APIC. */ lapic_enable(); /* Program LINT[01] LVT entries. */ lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0); lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1); /* Program the PMC LVT entry if present. */ if (maxlvt >= LVT_PMC) lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint); /* Program timer LVT and setup handler. */ la->lvt_timer_cache = lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer); if (boot) { snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); intrcnt_add(buf, &la->la_timer_count); } /* Setup the timer if configured. */ if (la->la_timer_mode != 0) { KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", lapic_id())); lapic_timer_set_divisor(lapic_timer_divisor); if (la->la_timer_mode == 1) lapic_timer_periodic(la, la->la_timer_period, 1); else lapic_timer_oneshot(la, la->la_timer_period, 1); } /* Program error LVT and clear any existing errors. */ lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error); lapic->esr = 0; /* XXX: Thermal LVT */ /* Program the CMCI LVT entry if present. */ if (maxlvt >= LVT_CMCI) lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci); intr_restore(saveintr); } void lapic_reenable_pmc(void) { #ifdef HWPMC_HOOKS uint32_t value; value = lapic->lvt_pcint; value &= ~APIC_LVT_M; lapic->lvt_pcint = value; #endif } #ifdef HWPMC_HOOKS static void lapic_update_pmc(void *dummy) { struct lapic *la; la = &lapics[lapic_id()]; lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint); } #endif int lapic_enable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (lapic == NULL) return (0); /* Fail if the PMC LVT is not present. */ maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < LVT_PMC) return (0); lvts[LVT_PMC].lvt_masked = 0; #ifdef SMP /* * If hwpmc was loaded at boot time then the APs may not be * started yet. In that case, don't forward the request to * them as they will program the lvt when they start. */ if (smp_started) smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); else #endif lapic_update_pmc(NULL); return (1); #else return (0); #endif } void lapic_disable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (lapic == NULL) return; /* Fail if the PMC LVT is not present. */ maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < LVT_PMC) return; lvts[LVT_PMC].lvt_masked = 1; #ifdef SMP /* The APs should always be started when hwpmc is unloaded. */ KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); #endif smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #endif } static int lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct lapic *la; u_long value; la = &lapics[PCPU_GET(apic_id)]; if (et->et_frequency == 0) { /* Start off with a divisor of 2 (power on reset default). */ lapic_timer_divisor = 2; /* Try to calibrate the local APIC timer. */ do { lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(la, APIC_TIMER_MAX_COUNT, 0); DELAY(1000000); value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; } while (lapic_timer_divisor <= 128); if (lapic_timer_divisor > 128) panic("lapic: Divisor too big"); if (bootverbose) printf("lapic: Divisor %lu, Frequency %lu Hz\n", lapic_timer_divisor, value); et->et_frequency = value; et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; } if (la->la_timer_mode == 0) lapic_timer_set_divisor(lapic_timer_divisor); if (period != 0) { la->la_timer_mode = 1; la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32; lapic_timer_periodic(la, la->la_timer_period, 1); } else { la->la_timer_mode = 2; la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32; lapic_timer_oneshot(la, la->la_timer_period, 1); } return (0); } static int lapic_et_stop(struct eventtimer *et) { struct lapic *la = &lapics[PCPU_GET(apic_id)]; la->la_timer_mode = 0; lapic_timer_stop(la); return (0); } void lapic_disable(void) { uint32_t value; /* Software disable the local APIC. */ value = lapic->svr; value &= ~APIC_SVR_SWEN; lapic->svr = value; } static void lapic_enable(void) { u_int32_t value; /* Program the spurious vector to enable the local APIC. */ value = lapic->svr; value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT); lapic->svr = value; } /* Reset the local APIC on the BSP during resume. */ static void lapic_resume(struct pic *pic) { lapic_setup(0); } int lapic_id(void) { KASSERT(lapic != NULL, ("local APIC is not mapped")); return (lapic->id >> APIC_ID_SHIFT); } int lapic_intr_pending(u_int vector) { volatile u_int32_t *irr; /* * The IRR registers are an array of 128-bit registers each of * which only describes 32 interrupts in the low 32 bits.. Thus, * we divide the vector by 32 to get the 128-bit index. We then * multiply that index by 4 to get the equivalent index from * treating the IRR as an array of 32-bit registers. Finally, we * modulus the vector by 32 to determine the individual bit to * test. */ irr = &lapic->irr0; return (irr[(vector / 32) * 4] & 1 << (vector % 32)); } void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { struct lapic *la; KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", __func__, apic_id)); KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", __func__, cluster)); KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, ("%s: intra cluster id %u too big", __func__, cluster_id)); la = &lapics[apic_id]; la->la_cluster = cluster; la->la_cluster_id = cluster_id; } int lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) { if (pin > LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_masked = masked; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_masked = masked; lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); return (0); } int lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) { struct lvt *lvt; if (pin > LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvt = &lvts[pin]; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lvt = &lapics[apic_id].la_lvts[pin]; lvt->lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } lvt->lvt_mode = mode; switch (mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: lvt->lvt_edgetrigger = 1; lvt->lvt_activehi = 1; if (mode == APIC_LVT_DM_EXTINT) lvt->lvt_masked = 1; else lvt->lvt_masked = 0; break; default: panic("Unsupported delivery mode: 0x%x\n", mode); } if (bootverbose) { printf(" Routing "); switch (mode) { case APIC_LVT_DM_NMI: printf("NMI"); break; case APIC_LVT_DM_SMI: printf("SMI"); break; case APIC_LVT_DM_INIT: printf("INIT"); break; case APIC_LVT_DM_EXTINT: printf("ExtINT"); break; } printf(" -> LINT%u\n", pin); } return (0); } int lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) { if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_active = 1; lapics[apic_id].la_lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u polarity: %s\n", pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } int lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger) { if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u trigger: %s\n", pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Adjust the TPR of the current CPU so that it blocks all interrupts below * the passed in vector. */ void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR lapic->tpr = vector; #else u_int32_t tpr; tpr = lapic->tpr & ~APIC_TPR_PRIO; tpr |= vector; lapic->tpr = tpr; #endif } void lapic_eoi(void) { lapic->eoi = 0; } void lapic_handle_intr(int vector, struct trapframe *frame) { struct intsrc *isrc; isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), vector)); intr_execute_handlers(isrc, frame); } void lapic_handle_timer(struct trapframe *frame) { struct lapic *la; struct trapframe *oldframe; struct thread *td; /* Send EOI first thing. */ lapic_eoi(); #if defined(SMP) && !defined(SCHED_ULE) /* * Don't do any accounting for the disabled HTT cores, since it * will provide misleading numbers for the userland. * * No locking is necessary here, since even if we lose the race * when hlt_cpus_mask changes it is not a big deal, really. * * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask * and unlike other schedulers it actually schedules threads to * those CPUs. */ if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)]; (*la->la_timer_count)++; critical_enter(); if (lapic_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } critical_exit(); } static void lapic_timer_set_divisor(u_int divisor) { KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) / sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor)); lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1]; } static void lapic_timer_oneshot(struct lapic *la, u_int count, int enable_int) { u_int32_t value; value = la->lvt_timer_cache; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT; if (enable_int) value &= ~APIC_LVT_M; lapic->lvt_timer = value; lapic->icr_timer = count; } static void lapic_timer_periodic(struct lapic *la, u_int count, int enable_int) { u_int32_t value; value = la->lvt_timer_cache; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_PERIODIC; if (enable_int) value &= ~APIC_LVT_M; lapic->lvt_timer = value; lapic->icr_timer = count; } static void lapic_timer_stop(struct lapic *la) { u_int32_t value; value = la->lvt_timer_cache; value &= ~APIC_LVTT_TM; value |= APIC_LVT_M; lapic->lvt_timer = value; } void lapic_handle_cmc(void) { lapic_eoi(); cmc_intr(); } /* * Called from the mca_init() to activate the CMC interrupt if this CPU is * responsible for monitoring any MC banks for CMC events. Since mca_init() * is called prior to lapic_setup() during boot, this just needs to unmask * this CPU's LVT_CMCI entry. */ void lapic_enable_cmc(void) { u_int apic_id; #ifdef DEV_ATPIC if (lapic == NULL) return; #endif apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[LVT_CMCI].lvt_masked = 0; lapics[apic_id].la_lvts[LVT_CMCI].lvt_active = 1; if (bootverbose) printf("lapic%u: CMCI unmasked\n", apic_id); } void lapic_handle_error(void) { u_int32_t esr; /* * Read the contents of the error status register. Write to * the register first before reading from it to force the APIC * to update its value to indicate any errors that have * occurred since the previous write to the register. */ lapic->esr = 0; esr = lapic->esr; printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); lapic_eoi(); } u_int apic_cpuid(u_int apic_id) { #ifdef SMP return apic_cpuids[apic_id]; #else return 0; #endif } /* Request a free IDT vector to be used by the specified IRQ. */ u_int apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple * algorithm to find the first free vector. */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { if (lapics[apic_id].la_ioint_irqs[vector] != -1) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); return (0); } /* * Request 'count' free contiguous IDT vectors to be used by 'count' * IRQs. 'count' must be a power of two and the vectors will be * aligned on a boundary of 'align'. If the request cannot be * satisfied, 0 is returned. */ u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { u_int first, run, vector; KASSERT(powerof2(count), ("bad count")); KASSERT(powerof2(align), ("bad align")); KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif /* * Search for 'count' free vectors. As with apic_alloc_vector(), * this just uses a simple first fit algorithm. */ run = 0; first = 0; mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ if (lapics[apic_id].la_ioint_irqs[vector] != -1) { run = 0; first = 0; continue; } /* Start a new run if run == 0 and vector is aligned. */ if (run == 0) { if ((vector & (align - 1)) != 0) continue; first = vector; } run++; /* Keep looping if the run isn't long enough yet. */ if (run < count) continue; /* Found a run, assign IRQs and return the first vector. */ for (vector = 0; vector < count; vector++) lapics[apic_id].la_ioint_irqs[first + vector] = irqs[vector]; mtx_unlock_spin(&icu_lock); return (first + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); return (0); } /* * Enable a vector for a particular apic_id. Since all lapics share idt * entries and ioint_handlers this enables the vector on all lapics. lapics * which do not have the vector configured would report spurious interrupts * should it fire. */ void apic_enable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL, GSEL_APIC); } void apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef notyet /* * We can not currently clear the idt entry because other cpus * may have a valid vector at this offset. */ setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); #endif } /* Release an APIC vector when it's no longer in use. */ void apic_free_vector(u_int apic_id, u_int vector, u_int irq) { struct thread *td; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif /* * Bind us to the cpu that owned the vector before freeing it so * we don't lose an interrupt delivery race. */ td = curthread; if (!rebooting) { thread_lock(td); if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); thread_unlock(td); } mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); sched_unbind(td); thread_unlock(td); } } /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ u_int apic_idt_to_irq(u_int apic_id, u_int vector) { int irq; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; if (irq < 0) irq = 0; return (irq); } #ifdef DDB /* * Dump data about APIC IDT vector mappings. */ DB_SHOW_COMMAND(apic, db_show_apic) { struct intsrc *isrc; int i, verbose; u_int apic_id; u_int irq; if (strcmp(modif, "vv") == 0) verbose = 2; else if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { if (lapics[apic_id].la_present == 0) continue; db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; if (irq == -1 || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) continue; +#endif +#ifdef XENHVM + if (irq == IRQ_EVTCHN) + continue; #endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); else if (irq < NUM_IO_INTS) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); else db_dump_intr_event(isrc->is_event, verbose == 2); } else db_printf("IRQ %u ???\n", irq); } } } static void dump_mask(const char *prefix, uint32_t v, int base) { int i, first; first = 1; for (i = 0; i < 32; i++) if (v & (1 << i)) { if (first) { db_printf("%s:", prefix); first = 0; } db_printf(" %02x", base + i); } if (!first) db_printf("\n"); } /* Show info from the lapic regs for this CPU. */ DB_SHOW_COMMAND(lapic, db_show_lapic) { uint32_t v; db_printf("lapic ID = %d\n", lapic_id()); v = lapic->version; db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, v & 0xf); db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); v = lapic->svr; db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, v & APIC_SVR_ENABLE ? "enabled" : "disabled"); db_printf("TPR = %02x\n", lapic->tpr); #define dump_field(prefix, index) \ dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index, \ index * 32) db_printf("In-service Interrupts:\n"); dump_field(isr, 0); dump_field(isr, 1); dump_field(isr, 2); dump_field(isr, 3); dump_field(isr, 4); dump_field(isr, 5); dump_field(isr, 6); dump_field(isr, 7); db_printf("TMR Interrupts:\n"); dump_field(tmr, 0); dump_field(tmr, 1); dump_field(tmr, 2); dump_field(tmr, 3); dump_field(tmr, 4); dump_field(tmr, 5); dump_field(tmr, 6); dump_field(tmr, 7); db_printf("IRR Interrupts:\n"); dump_field(irr, 0); dump_field(irr, 1); dump_field(irr, 2); dump_field(irr, 3); dump_field(irr, 4); dump_field(irr, 5); dump_field(irr, 6); dump_field(irr, 7); #undef dump_field } #endif /* * APIC probing support code. This includes code to manage enumerators. */ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; void apic_register_enumerator(struct apic_enumerator *enumerator) { #ifdef INVARIANTS struct apic_enumerator *apic_enum; SLIST_FOREACH(apic_enum, &enumerators, apic_next) { if (apic_enum == enumerator) panic("%s: Duplicate register of %s", __func__, enumerator->apic_name); } #endif SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); } /* * We have to look for CPU's very, very early because certain subsystems * want to know how many CPU's we have extremely early on in the boot * process. */ static void apic_init(void *dummy __unused) { struct apic_enumerator *enumerator; #ifndef __amd64__ uint64_t apic_base; #endif int retval, best; /* We only support built in local APICs. */ if (!(cpu_feature & CPUID_APIC)) return; /* Don't probe if APIC mode is disabled. */ if (resource_disabled("apic", 0)) return; /* Probe all the enumerators to find the best match. */ best_enum = NULL; best = 0; SLIST_FOREACH(enumerator, &enumerators, apic_next) { retval = enumerator->apic_probe(); if (retval > 0) continue; if (best_enum == NULL || best < retval) { best_enum = enumerator; best = retval; } } if (best_enum == NULL) { if (bootverbose) printf("APIC: Could not find any APICs.\n"); #ifndef DEV_ATPIC panic("running without device atpic requires a local APIC"); #endif return; } if (bootverbose) printf("APIC: Using the %s enumerator.\n", best_enum->apic_name); #ifndef __amd64__ /* * To work around an errata, we disable the local APIC on some * CPUs during early startup. We need to turn the local APIC back * on on such CPUs now. */ if (cpu == CPU_686 && cpu_vendor_id == CPU_VENDOR_INTEL && (cpu_id & 0xff0) == 0x610) { apic_base = rdmsr(MSR_APICBASE); apic_base |= APICBASE_ENABLED; wrmsr(MSR_APICBASE, apic_base); } #endif /* Probe the CPU's in the system. */ retval = best_enum->apic_probe_cpus(); if (retval != 0) printf("%s: Failed to probe CPUs: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); /* * Setup the local APIC. We have to do this prior to starting up the APs * in the SMP case. */ static void apic_setup_local(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* Initialize the local APIC. */ retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); /* * Setup the I/O APICs. */ static void apic_setup_io(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* * Local APIC must be registered before other PICs and pseudo PICs * for proper suspend/resume order. */ #ifndef XEN intr_register_pic(&lapic_pic); #endif retval = best_enum->apic_setup_io(); if (retval != 0) printf("%s: Failed to setup I/O APICs: returned %d\n", best_enum->apic_name, retval); #ifdef XEN return; #endif /* * Finish setting up the local APIC on the BSP once we know how to * properly program the LINT pins. */ lapic_setup(1); if (bootverbose) lapic_dump("BSP"); /* Enable the MSI "pic". */ msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL); #ifdef SMP /* * Inter Processor Interrupt functions. The lapic_ipi_*() functions are * private to the MD code. The public interface for the rest of the * kernel is defined in mp_machdep.c. */ int lapic_ipi_wait(int delay) { int x, incr; /* * Wait delay loops for IPI to be sent. This is highly bogus * since this is sensitive to CPU clock speed. If delay is * -1, we wait forever. */ if (delay == -1) { incr = 0; delay = 1; } else incr = 1; for (x = 0; x < delay; x += incr) { if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); ia32_pause(); } return (0); } void lapic_ipi_raw(register_t icrlo, u_int dest) { register_t value, saveintr; /* XXX: Need more sanity checking of icrlo? */ KASSERT(lapic != NULL, ("%s called too early", __func__)); KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid dest field", __func__)); KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, ("%s: reserved bits set in ICR LO register", __func__)); /* Set destination in ICR HI register if it is being used. */ saveintr = intr_disable(); if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { value = lapic->icr_hi; value &= ~APIC_ID_MASK; value |= dest << APIC_ID_SHIFT; lapic->icr_hi = value; } /* Program the contents of the IPI and dispatch it. */ value = lapic->icr_lo; value &= APIC_ICRLO_RESV_MASK; value |= icrlo; lapic->icr_lo = value; intr_restore(saveintr); } #define BEFORE_SPIN 1000000 #ifdef DETECT_DEADLOCK #define AFTER_SPIN 1000 #endif void lapic_ipi_vectored(u_int vector, int dest) { register_t icrlo, destfield; KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE; /* * IPI_STOP_HARD is just a "fake" vector used to send a NMI. * Use special rules regard NMI if passed, otherwise specify * the vector. */ if (vector == IPI_STOP_HARD) icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT; else icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: icrlo |= APIC_DEST_SELF; break; case APIC_IPI_DEST_ALL: icrlo |= APIC_DEST_ALLISELF; break; case APIC_IPI_DEST_OTHERS: icrlo |= APIC_DEST_ALLESELF; break; default: KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid destination 0x%x", __func__, dest)); destfield = dest; } /* Wait for an earlier IPI to finish. */ if (!lapic_ipi_wait(BEFORE_SPIN)) { if (panicstr != NULL) return; else panic("APIC: Previous IPI is stuck"); } lapic_ipi_raw(icrlo, destfield); #ifdef DETECT_DEADLOCK /* Wait for IPI to be delivered. */ if (!lapic_ipi_wait(AFTER_SPIN)) { #ifdef needsattention /* * XXX FIXME: * * The above function waits for the message to actually be * delivered. It breaks out after an arbitrary timeout * since the message should eventually be delivered (at * least in theory) and that if it wasn't we would catch * the failure with the check above when the next IPI is * sent. * * We could skip this wait entirely, EXCEPT it probably * protects us from other routines that assume that the * message was delivered and acted upon when this function * returns. */ printf("APIC: IPI might be stuck\n"); #else /* !needsattention */ /* Wait until mesage is sent without a timeout. */ while (lapic->icr_lo & APIC_DELSTAT_PEND) ia32_pause(); #endif /* needsattention */ } #endif /* DETECT_DEADLOCK */ } #endif /* SMP */ diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c new file mode 100644 index 000000000000..0730d941afd9 --- /dev/null +++ b/sys/x86/xen/hvm.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2008 Citrix Systems, Inc. + * Copyright (c) 2012 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); + +DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); +DPCPU_DEFINE(struct vcpu_info *, vcpu_info); + +/*-------------------------------- Global Data -------------------------------*/ +/** + * If non-zero, the hypervisor has been configured to use a direct + * IDT event callback for interrupt injection. + */ +int xen_vector_callback_enabled; + +/*------------------ Hypervisor Access Shared Memory Regions -----------------*/ +/** Hypercall table accessed via HYPERVISOR_*_op() methods. */ +char *hypercall_stubs; +shared_info_t *HYPERVISOR_shared_info; +enum xen_domain_type xen_domain_type = XEN_NATIVE; + +static uint32_t +xen_hvm_cpuid_base(void) +{ + uint32_t base, regs[4]; + + for (base = 0x40000000; base < 0x40010000; base += 0x100) { + do_cpuid(base, regs); + if (!memcmp("XenVMMXenVMM", ®s[1], 12) + && (regs[0] - base) >= 2) + return (base); + } + return (0); +} + +/* + * Allocate and fill in the hypcall page. + */ +static int +xen_hvm_init_hypercall_stubs(void) +{ + uint32_t base, regs[4]; + int i; + + base = xen_hvm_cpuid_base(); + if (!base) + return (ENXIO); + + if (hypercall_stubs == NULL) { + do_cpuid(base + 1, regs); + printf("XEN: Hypervisor version %d.%d detected.\n", + regs[0] >> 16, regs[0] & 0xffff); + } + + /* + * Find the hypercall pages. + */ + do_cpuid(base + 2, regs); + + if (hypercall_stubs == NULL) { + size_t call_region_size; + + call_region_size = regs[0] * PAGE_SIZE; + hypercall_stubs = malloc(call_region_size, M_XENHVM, M_NOWAIT); + if (hypercall_stubs == NULL) + panic("Unable to allocate Xen hypercall region"); + } + + for (i = 0; i < regs[0]; i++) + wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); + + return (0); +} + +static void +xen_hvm_init_shared_info_page(void) +{ + struct xen_add_to_physmap xatp; + + if (HYPERVISOR_shared_info == NULL) { + HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); + if (HYPERVISOR_shared_info == NULL) + panic("Unable to allocate Xen shared info page"); + } + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + panic("HYPERVISOR_memory_op failed"); +} + +/* + * Tell the hypervisor how to contact us for event channel callbacks. + */ +void +xen_hvm_set_callback(device_t dev) +{ + struct xen_hvm_param xhp; + int irq; + + xhp.domid = DOMID_SELF; + xhp.index = HVM_PARAM_CALLBACK_IRQ; + if (xen_feature(XENFEAT_hvm_callback_vector)) { + int error; + + xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); + error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); + if (error == 0) { + xen_vector_callback_enabled = 1; + return; + } + printf("Xen HVM callback vector registration failed (%d). " + "Falling back to emulated device interrupt\n", + error); + } + xen_vector_callback_enabled = 0; + if (dev == NULL) { + /* + * Called from early boot or resume. + * xenpci will invoke us again later. + */ + return; + } + + irq = pci_get_irq(dev); + if (irq < 16) { + xhp.value = HVM_CALLBACK_GSI(irq); + } else { + u_int slot; + u_int pin; + + slot = pci_get_slot(dev); + pin = pci_get_intpin(dev) - 1; + xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); + } + + if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp)) + panic("Can't set evtchn callback"); +} + +#define XEN_MAGIC_IOPORT 0x10 +enum { + XMI_MAGIC = 0x49d2, + XMI_UNPLUG_IDE_DISKS = 0x01, + XMI_UNPLUG_NICS = 0x02, + XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 +}; + +static void +xen_hvm_disable_emulated_devices(void) +{ + if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) + return; + + if (bootverbose) + printf("XEN: Disabling emulated block and network devices\n"); + outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS); +} + +void +xen_hvm_suspend(void) +{ +} + +void +xen_hvm_resume(void) +{ + xen_hvm_init_hypercall_stubs(); + xen_hvm_init_shared_info_page(); +} + +static void +xen_hvm_init(void *dummy __unused) +{ + if (xen_hvm_init_hypercall_stubs() != 0) + return; + + xen_domain_type = XEN_HVM_DOMAIN; + setup_xen_features(); + xen_hvm_init_shared_info_page(); + xen_hvm_set_callback(NULL); + xen_hvm_disable_emulated_devices(); +} + +void xen_hvm_init_cpu(void) +{ + int cpu = PCPU_GET(acpi_id); + struct vcpu_info *vcpu_info; + struct vcpu_register_vcpu_info info; + int rc; + + vcpu_info = DPCPU_PTR(vcpu_local_info); + info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; + info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); + + rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + if (rc) { + DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]); + } else { + DPCPU_SET(vcpu_info, vcpu_info); + } +} + +SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL); +SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c new file mode 100644 index 000000000000..83bf4873f97d --- /dev/null +++ b/sys/x86/xen/xen_intr.c @@ -0,0 +1,1126 @@ +/****************************************************************************** + * xen_intr.c + * + * Xen event and interrupt services for x86 PV and HVM guests. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, Intel Corporation + * Copyright (c) 2012, Spectra Logic Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services"); + +/** + * Per-cpu event channel processing state. + */ +struct xen_intr_pcpu_data { + /** + * The last event channel bitmap section (level one bit) processed. + * This is used to ensure we scan all ports before + * servicing an already servied port again. + */ + u_int last_processed_l1i; + + /** + * The last event channel processed within the event channel + * bitmap being scanned. + */ + u_int last_processed_l2i; + + /** Pointer to this CPU's interrupt statistic counter. */ + u_long *evtchn_intrcnt; + + /** + * A bitmap of ports that can be serviced from this CPU. + * A set bit means interrupt handling is enabled. + */ + u_long evtchn_enabled[sizeof(u_long) * 8]; +}; + +/* + * Start the scan at port 0 by initializing the last scanned + * location as the highest numbered event channel port. + */ +DPCPU_DEFINE(struct xen_intr_pcpu_data, xen_intr_pcpu) = { + .last_processed_l1i = LONG_BIT - 1, + .last_processed_l2i = LONG_BIT - 1 +}; + +DPCPU_DECLARE(struct vcpu_info *, vcpu_info); + +#define is_valid_evtchn(x) ((x) != 0) + +struct xenisrc { + struct intsrc xi_intsrc; + enum evtchn_type xi_type; + int xi_cpu; /* VCPU for delivery. */ + int xi_vector; /* Global isrc vector number. */ + evtchn_port_t xi_port; + int xi_pirq; + int xi_virq; + u_int xi_close:1; /* close on unbind? */ + u_int xi_needs_eoi:1; + u_int xi_shared:1; /* Shared with other domains. */ +}; + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +static void xen_intr_suspend(struct pic *); +static void xen_intr_resume(struct pic *); +static void xen_intr_enable_source(struct intsrc *isrc); +static void xen_intr_disable_source(struct intsrc *isrc, int eoi); +static void xen_intr_eoi_source(struct intsrc *isrc); +static void xen_intr_enable_intr(struct intsrc *isrc); +static void xen_intr_disable_intr(struct intsrc *isrc); +static int xen_intr_vector(struct intsrc *isrc); +static int xen_intr_source_pending(struct intsrc *isrc); +static int xen_intr_config_intr(struct intsrc *isrc, + enum intr_trigger trig, enum intr_polarity pol); +static int xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id); + +static void xen_intr_pirq_enable_source(struct intsrc *isrc); +static void xen_intr_pirq_disable_source(struct intsrc *isrc, int eoi); +static void xen_intr_pirq_eoi_source(struct intsrc *isrc); +static void xen_intr_pirq_enable_intr(struct intsrc *isrc); + +/** + * PIC interface for all event channel port types except physical IRQs. + */ +struct pic xen_intr_pic = { + .pic_enable_source = xen_intr_enable_source, + .pic_disable_source = xen_intr_disable_source, + .pic_eoi_source = xen_intr_eoi_source, + .pic_enable_intr = xen_intr_enable_intr, + .pic_disable_intr = xen_intr_disable_intr, + .pic_vector = xen_intr_vector, + .pic_source_pending = xen_intr_source_pending, + .pic_suspend = xen_intr_suspend, + .pic_resume = xen_intr_resume, + .pic_config_intr = xen_intr_config_intr, + .pic_assign_cpu = xen_intr_assign_cpu +}; + +/** + * PIC interface for all event channel representing + * physical interrupt sources. + */ +struct pic xen_intr_pirq_pic = { + .pic_enable_source = xen_intr_pirq_enable_source, + .pic_disable_source = xen_intr_pirq_disable_source, + .pic_eoi_source = xen_intr_pirq_eoi_source, + .pic_enable_intr = xen_intr_pirq_enable_intr, + .pic_disable_intr = xen_intr_disable_intr, + .pic_vector = xen_intr_vector, + .pic_source_pending = xen_intr_source_pending, + .pic_suspend = xen_intr_suspend, + .pic_resume = xen_intr_resume, + .pic_config_intr = xen_intr_config_intr, + .pic_assign_cpu = xen_intr_assign_cpu +}; + +static struct mtx xen_intr_isrc_lock; +static int xen_intr_isrc_count; +static struct xenisrc *xen_intr_port_to_isrc[NR_EVENT_CHANNELS]; + +/*------------------------- Private Functions --------------------------------*/ +/** + * Disable signal delivery for an event channel port on the + * specified CPU. + * + * \param port The event channel port to mask. + * + * This API is used to manage the port<=>CPU binding of event + * channel handlers. + * + * \note This operation does not preclude reception of an event + * for this event channel on another CPU. To mask the + * event channel globally, use evtchn_mask(). + */ +static inline void +evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port) +{ + struct xen_intr_pcpu_data *pcpu; + + pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); + clear_bit(port, pcpu->evtchn_enabled); +} + +/** + * Enable signal delivery for an event channel port on the + * specified CPU. + * + * \param port The event channel port to unmask. + * + * This API is used to manage the port<=>CPU binding of event + * channel handlers. + * + * \note This operation does not guarantee that event delivery + * is enabled for this event channel port. The port must + * also be globally enabled. See evtchn_unmask(). + */ +static inline void +evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port) +{ + struct xen_intr_pcpu_data *pcpu; + + pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); + set_bit(port, pcpu->evtchn_enabled); +} + +/** + * Allocate and register a per-cpu Xen upcall interrupt counter. + * + * \param cpu The cpu for which to register this interrupt count. + */ +static void +xen_intr_intrcnt_add(u_int cpu) +{ + char buf[MAXCOMLEN + 1]; + struct xen_intr_pcpu_data *pcpu; + + pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); + if (pcpu->evtchn_intrcnt != NULL) + return; + + snprintf(buf, sizeof(buf), "cpu%d:xen", cpu); + intrcnt_add(buf, &pcpu->evtchn_intrcnt); +} + +/** + * Search for an already allocated but currently unused Xen interrupt + * source object. + * + * \param type Restrict the search to interrupt sources of the given + * type. + * + * \return A pointer to a free Xen interrupt source object or NULL. + */ +static struct xenisrc * +xen_intr_find_unused_isrc(enum evtchn_type type) +{ + int isrc_idx; + + KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held")); + + for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx ++) { + struct xenisrc *isrc; + u_int vector; + + vector = FIRST_EVTCHN_INT + isrc_idx; + isrc = (struct xenisrc *)intr_lookup_source(vector); + if (isrc != NULL + && isrc->xi_type == EVTCHN_TYPE_UNBOUND) { + KASSERT(isrc->xi_intsrc.is_handlers == 0, + ("Free evtchn still has handlers")); + isrc->xi_type = type; + return (isrc); + } + } + return (NULL); +} + +/** + * Allocate a Xen interrupt source object. + * + * \param type The type of interrupt source to create. + * + * \return A pointer to a newly allocated Xen interrupt source + * object or NULL. + */ +static struct xenisrc * +xen_intr_alloc_isrc(enum evtchn_type type) +{ + static int warned; + struct xenisrc *isrc; + int vector; + + KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held")); + + if (xen_intr_isrc_count > NR_EVENT_CHANNELS) { + if (!warned) { + warned = 1; + printf("xen_intr_alloc: Event channels exhausted.\n"); + } + return (NULL); + } + vector = FIRST_EVTCHN_INT + xen_intr_isrc_count; + xen_intr_isrc_count++; + + mtx_unlock(&xen_intr_isrc_lock); + isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO); + isrc->xi_intsrc.is_pic = &xen_intr_pic; + isrc->xi_vector = vector; + isrc->xi_type = type; + intr_register_source(&isrc->xi_intsrc); + mtx_lock(&xen_intr_isrc_lock); + + return (isrc); +} + +/** + * Attempt to free an active Xen interrupt source object. + * + * \param isrc The interrupt source object to release. + * + * \returns EBUSY if the source is still in use, otherwise 0. + */ +static int +xen_intr_release_isrc(struct xenisrc *isrc) +{ + + mtx_lock(&xen_intr_isrc_lock); + if (isrc->xi_intsrc.is_handlers != 0) { + mtx_unlock(&xen_intr_isrc_lock); + return (EBUSY); + } + evtchn_mask_port(isrc->xi_port); + evtchn_clear_port(isrc->xi_port); + + /* Rebind port to CPU 0. */ + evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); + evtchn_cpu_unmask_port(0, isrc->xi_port); + + if (isrc->xi_close != 0) { + struct evtchn_close close = { .port = isrc->xi_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + } + + xen_intr_port_to_isrc[isrc->xi_port] = NULL; + isrc->xi_cpu = 0; + isrc->xi_type = EVTCHN_TYPE_UNBOUND; + isrc->xi_port = 0; + mtx_unlock(&xen_intr_isrc_lock); + return (0); +} + +/** + * Associate an interrupt handler with an already allocated local Xen + * event channel port. + * + * \param isrcp The returned Xen interrupt object associated with + * the specified local port. + * \param local_port The event channel to bind. + * \param type The event channel type of local_port. + * \param intr_owner The device making this bind request. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. + */ +static int +xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port, + enum evtchn_type type, device_t intr_owner, driver_filter_t filter, + driver_intr_t handler, void *arg, enum intr_type flags, + xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + int error; + + *isrcp = NULL; + if (port_handlep == NULL) { + device_printf(intr_owner, + "xen_intr_bind_isrc: Bad event handle\n"); + return (EINVAL); + } + + mtx_lock(&xen_intr_isrc_lock); + isrc = xen_intr_find_unused_isrc(type); + if (isrc == NULL) { + isrc = xen_intr_alloc_isrc(type); + if (isrc == NULL) { + mtx_unlock(&xen_intr_isrc_lock); + return (ENOSPC); + } + } + isrc->xi_port = local_port; + xen_intr_port_to_isrc[local_port] = isrc; + mtx_unlock(&xen_intr_isrc_lock); + + error = intr_add_handler(device_get_nameunit(intr_owner), + isrc->xi_vector, filter, handler, arg, + flags|INTR_EXCL, port_handlep); + if (error != 0) { + device_printf(intr_owner, + "xen_intr_bind_irq: intr_add_handler failed\n"); + xen_intr_release_isrc(isrc); + return (error); + } + *isrcp = isrc; + return (0); +} + +/** + * Lookup a Xen interrupt source object given an interrupt binding handle. + * + * \param handle A handle initialized by a previous call to + * xen_intr_bind_isrc(). + * + * \returns A pointer to the Xen interrupt source object associated + * with the given interrupt handle. NULL if no association + * currently exists. + */ +static struct xenisrc * +xen_intr_isrc(xen_intr_handle_t handle) +{ + struct intr_handler *ih; + + ih = handle; + if (ih == NULL || ih->ih_event == NULL) + return (NULL); + + return (ih->ih_event->ie_source); +} + +/** + * Determine the event channel ports at the given section of the + * event port bitmap which have pending events for the given cpu. + * + * \param pcpu The Xen interrupt pcpu data for the cpu being querried. + * \param sh The Xen shared info area. + * \param idx The index of the section of the event channel bitmap to + * inspect. + * + * \returns A u_long with bits set for every event channel with pending + * events. + */ +static inline u_long +xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh, + u_int idx) +{ + return (sh->evtchn_pending[idx] + & ~sh->evtchn_mask[idx] + & pcpu->evtchn_enabled[idx]); +} + +/** + * Interrupt handler for processing all Xen event channel events. + * + * \param trap_frame The trap frame context for the current interrupt. + */ +void +xen_intr_handle_upcall(struct trapframe *trap_frame) +{ + u_int l1i, l2i, port, cpu; + u_long masked_l1, masked_l2; + struct xenisrc *isrc; + shared_info_t *s; + vcpu_info_t *v; + struct xen_intr_pcpu_data *pc; + u_long l1, l2; + + /* + * Disable preemption in order to always check and fire events + * on the right vCPU + */ + critical_enter(); + + cpu = PCPU_GET(cpuid); + pc = DPCPU_PTR(xen_intr_pcpu); + s = HYPERVISOR_shared_info; + v = DPCPU_GET(vcpu_info); + + if (xen_hvm_domain() && !xen_vector_callback_enabled) { + KASSERT((cpu == 0), ("Fired PCI event callback on wrong CPU")); + } + + v->evtchn_upcall_pending = 0; + +#if 0 +#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif +#endif + + l1 = atomic_readandclear_long(&v->evtchn_pending_sel); + + l1i = pc->last_processed_l1i; + l2i = pc->last_processed_l2i; + (*pc->evtchn_intrcnt)++; + + while (l1 != 0) { + + l1i = (l1i + 1) % LONG_BIT; + masked_l1 = l1 & ((~0UL) << l1i); + + if (masked_l1 == 0) { + /* + * if we masked out all events, wrap around + * to the beginning. + */ + l1i = LONG_BIT - 1; + l2i = LONG_BIT - 1; + continue; + } + l1i = ffsl(masked_l1) - 1; + + do { + l2 = xen_intr_active_ports(pc, s, l1i); + + l2i = (l2i + 1) % LONG_BIT; + masked_l2 = l2 & ((~0UL) << l2i); + + if (masked_l2 == 0) { + /* if we masked out all events, move on */ + l2i = LONG_BIT - 1; + break; + } + l2i = ffsl(masked_l2) - 1; + + /* process port */ + port = (l1i * LONG_BIT) + l2i; + synch_clear_bit(port, &s->evtchn_pending[0]); + + isrc = xen_intr_port_to_isrc[port]; + if (__predict_false(isrc == NULL)) + continue; + + /* Make sure we are firing on the right vCPU */ + KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)), + ("Received unexpected event on vCPU#%d, event bound to vCPU#%d", + PCPU_GET(cpuid), isrc->xi_cpu)); + + intr_execute_handlers(&isrc->xi_intsrc, trap_frame); + + /* + * If this is the final port processed, + * we'll pick up here+1 next time. + */ + pc->last_processed_l1i = l1i; + pc->last_processed_l2i = l2i; + + } while (l2i != LONG_BIT - 1); + + l2 = xen_intr_active_ports(pc, s, l1i); + if (l2 == 0) { + /* + * We handled all ports, so we can clear the + * selector bit. + */ + l1 &= ~(1UL << l1i); + } + } + critical_exit(); +} + +static int +xen_intr_init(void *dummy __unused) +{ + struct xen_intr_pcpu_data *pcpu; + int i; + + mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); + + /* + * Register interrupt count manually as we aren't + * guaranteed to see a call to xen_intr_assign_cpu() + * before our first interrupt. Also set the per-cpu + * mask of CPU#0 to enable all, since by default + * all event channels are bound to CPU#0. + */ + CPU_FOREACH(i) { + pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); + memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, + sizeof(pcpu->evtchn_enabled)); + xen_intr_intrcnt_add(i); + } + + intr_register_pic(&xen_intr_pic); + + return (0); +} +SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intr_init, NULL); + +/*--------------------------- Common PIC Functions ---------------------------*/ +/** + * Prepare this PIC for system suspension. + */ +static void +xen_intr_suspend(struct pic *unused) +{ +} + +/** + * Return this PIC to service after being suspended. + */ +static void +xen_intr_resume(struct pic *unused) +{ + u_int port; + + /* + * Mask events for all ports. They will be unmasked after + * drivers have re-registered their handlers. + */ + for (port = 0; port < NR_EVENT_CHANNELS; port++) + evtchn_mask_port(port); +} + +/** + * Disable a Xen interrupt source. + * + * \param isrc The interrupt source to disable. + */ +static void +xen_intr_disable_intr(struct intsrc *base_isrc) +{ + struct xenisrc *isrc = (struct xenisrc *)base_isrc; + + evtchn_mask_port(isrc->xi_port); +} + +/** + * Determine the global interrupt vector number for + * a Xen interrupt source. + * + * \param isrc The interrupt source to query. + * + * \return The vector number corresponding to the given interrupt source. + */ +static int +xen_intr_vector(struct intsrc *base_isrc) +{ + struct xenisrc *isrc = (struct xenisrc *)base_isrc; + + return (isrc->xi_vector); +} + +/** + * Determine whether or not interrupt events are pending on the + * the given interrupt source. + * + * \param isrc The interrupt source to query. + * + * \returns 0 if no events are pending, otherwise non-zero. + */ +static int +xen_intr_source_pending(struct intsrc *isrc) +{ + /* + * EventChannels are edge triggered and never masked. + * There can be no pending events. + */ + return (0); +} + +/** + * Perform configuration of an interrupt source. + * + * \param isrc The interrupt source to configure. + * \param trig Edge or level. + * \param pol Active high or low. + * + * \returns 0 if no events are pending, otherwise non-zero. + */ +static int +xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol) +{ + /* Configuration is only possible via the evtchn apis. */ + return (ENODEV); +} + +/** + * Configure CPU affinity for interrupt source event delivery. + * + * \param isrc The interrupt source to configure. + * \param apic_id The apic id of the CPU for handling future events. + * + * \returns 0 if successful, otherwise an errno. + */ +static int +xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) +{ + struct evtchn_bind_vcpu bind_vcpu; + struct xenisrc *isrc; + u_int to_cpu, acpi_id; + int error; + +#ifdef XENHVM + if (xen_vector_callback_enabled == 0) + return (EOPNOTSUPP); +#endif + + to_cpu = apic_cpuid(apic_id); + acpi_id = pcpu_find(to_cpu)->pc_acpi_id; + xen_intr_intrcnt_add(to_cpu); + + mtx_lock(&xen_intr_isrc_lock); + isrc = (struct xenisrc *)base_isrc; + if (!is_valid_evtchn(isrc->xi_port)) { + mtx_unlock(&xen_intr_isrc_lock); + return (EINVAL); + } + + if ((isrc->xi_type == EVTCHN_TYPE_VIRQ) || + (isrc->xi_type == EVTCHN_TYPE_IPI)) { + /* + * Virtual IRQs are associated with a cpu by + * the Hypervisor at evtchn_bind_virq time, so + * all we need to do is update the per-CPU masks. + */ + evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); + isrc->xi_cpu = to_cpu; + evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port); + mtx_unlock(&xen_intr_isrc_lock); + return (0); + } + + bind_vcpu.port = isrc->xi_port; + bind_vcpu.vcpu = acpi_id; + + /* + * Allow interrupts to be fielded on the new VCPU before + * we ask the hypervisor to deliver them there. + */ + evtchn_cpu_unmask_port(to_cpu, isrc->xi_port); + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu); + if (isrc->xi_cpu != to_cpu) { + if (error == 0) { + /* Commit to new binding by removing the old one. */ + evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); + isrc->xi_cpu = to_cpu; + } else { + /* Roll-back to previous binding. */ + evtchn_cpu_mask_port(to_cpu, isrc->xi_port); + } + } + mtx_unlock(&xen_intr_isrc_lock); + return (0); +} + +/*------------------- Virtual Interrupt Source PIC Functions -----------------*/ +/* + * Mask a level triggered interrupt source. + * + * \param isrc The interrupt source to mask (if necessary). + * \param eoi If non-zero, perform any necessary end-of-interrupt + * acknowledgements. + */ +static void +xen_intr_disable_source(struct intsrc *isrc, int eoi) +{ +} + +/* + * Unmask a level triggered interrupt source. + * + * \param isrc The interrupt source to unmask (if necessary). + */ +static void +xen_intr_enable_source(struct intsrc *isrc) +{ +} + +/* + * Perform any necessary end-of-interrupt acknowledgements. + * + * \param isrc The interrupt source to EOI. + */ +static void +xen_intr_eoi_source(struct intsrc *isrc) +{ +} + +/* + * Enable and unmask the interrupt source. + * + * \param isrc The interrupt source to enable. + */ +static void +xen_intr_enable_intr(struct intsrc *base_isrc) +{ + struct xenisrc *isrc = (struct xenisrc *)base_isrc; + + evtchn_unmask_port(isrc->xi_port); +} + +/*------------------ Physical Interrupt Source PIC Functions -----------------*/ +/* + * Mask a level triggered interrupt source. + * + * \param isrc The interrupt source to mask (if necessary). + * \param eoi If non-zero, perform any necessary end-of-interrupt + * acknowledgements. + */ +static void +xen_intr_pirq_disable_source(struct intsrc *base_isrc, int eoi) +{ + struct xenisrc *isrc; + + isrc = (struct xenisrc *)base_isrc; + evtchn_mask_port(isrc->xi_port); +} + +/* + * Unmask a level triggered interrupt source. + * + * \param isrc The interrupt source to unmask (if necessary). + */ +static void +xen_intr_pirq_enable_source(struct intsrc *base_isrc) +{ + struct xenisrc *isrc; + + isrc = (struct xenisrc *)base_isrc; + evtchn_unmask_port(isrc->xi_port); +} + +/* + * Perform any necessary end-of-interrupt acknowledgements. + * + * \param isrc The interrupt source to EOI. + */ +static void +xen_intr_pirq_eoi_source(struct intsrc *base_isrc) +{ + struct xenisrc *isrc; + + /* XXX Use shared page of flags for this. */ + isrc = (struct xenisrc *)base_isrc; + if (isrc->xi_needs_eoi != 0) { + struct physdev_eoi eoi = { .irq = isrc->xi_pirq }; + + (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + } +} + +/* + * Enable and unmask the interrupt source. + * + * \param isrc The interrupt source to enable. + */ +static void +xen_intr_pirq_enable_intr(struct intsrc *isrc) +{ +} + +/*--------------------------- Public Functions -------------------------------*/ +/*------- API comments for these methods can be found in xen/xenintr.h -------*/ +int +xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + int error; + + error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT, dev, + filter, handler, arg, flags, port_handlep); + if (error != 0) + return (error); + + /* + * The Event Channel API didn't open this port, so it is not + * responsible for closing it automatically on unbind. + */ + isrc->xi_close = 0; + return (0); +} + +int +xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + struct evtchn_alloc_unbound alloc_unbound; + int error; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); + } + + error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT, + dev, filter, handler, arg, flags, + port_handlep); + if (error != 0) { + evtchn_close_t close = { .port = alloc_unbound.port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + return (error); + } + + isrc->xi_close = 1; + return (0); +} + +int +xen_intr_bind_remote_port(device_t dev, u_int remote_domain, + u_int remote_port, driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + struct evtchn_bind_interdomain bind_interdomain; + int error; + + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); + } + + error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port, + EVTCHN_TYPE_PORT, dev, filter, handler, + arg, flags, port_handlep); + if (error) { + evtchn_close_t close = { .port = bind_interdomain.local_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + return (error); + } + + /* + * The Event Channel API opened this port, so it is + * responsible for closing it automatically on unbind. + */ + isrc->xi_close = 1; + return (0); +} + +int +xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + int acpi_id = pcpu_find(cpu)->pc_acpi_id; + struct xenisrc *isrc; + struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = acpi_id }; + int error; + + /* Ensure the target CPU is ready to handle evtchn interrupts. */ + xen_intr_intrcnt_add(cpu); + + isrc = NULL; + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); + } + + error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev, + filter, handler, arg, flags, port_handlep); + if (error == 0) + error = intr_event_bind(isrc->xi_intsrc.is_event, cpu); + + if (error != 0) { + evtchn_close_t close = { .port = bind_virq.port }; + + xen_intr_unbind(*port_handlep); + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + return (error); + } + + if (isrc->xi_cpu != cpu) { + /* + * Too early in the boot process for the generic interrupt + * code to perform the binding. Update our event channel + * masks manually so events can't fire on the wrong cpu + * during AP startup. + */ + xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); + } + + /* + * The Event Channel API opened this port, so it is + * responsible for closing it automatically on unbind. + */ + isrc->xi_close = 1; + return (0); +} + +int +xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu, + driver_filter_t filter, enum intr_type flags, + xen_intr_handle_t *port_handlep) +{ + int acpi_id = pcpu_find(cpu)->pc_acpi_id; + struct xenisrc *isrc; + struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id }; + int error; + + /* Ensure the target CPU is ready to handle evtchn interrupts. */ + xen_intr_intrcnt_add(cpu); + + isrc = NULL; + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); + } + + error = xen_intr_bind_isrc(&isrc, bind_ipi.port, EVTCHN_TYPE_IPI, + dev, filter, NULL, NULL, flags, + port_handlep); + if (error == 0) + error = intr_event_bind(isrc->xi_intsrc.is_event, cpu); + + if (error != 0) { + evtchn_close_t close = { .port = bind_ipi.port }; + + xen_intr_unbind(*port_handlep); + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + return (error); + } + + if (isrc->xi_cpu != cpu) { + /* + * Too early in the boot process for the generic interrupt + * code to perform the binding. Update our event channel + * masks manually so events can't fire on the wrong cpu + * during AP startup. + */ + xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); + } + + /* + * The Event Channel API opened this port, so it is + * responsible for closing it automatically on unbind. + */ + isrc->xi_close = 1; + return (0); +} + +int +xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) +{ + char descr[MAXCOMLEN + 1]; + struct xenisrc *isrc; + va_list ap; + + isrc = xen_intr_isrc(port_handle); + if (isrc == NULL) + return (EINVAL); + + va_start(ap, fmt); + vsnprintf(descr, sizeof(descr), fmt, ap); + va_end(ap); + return (intr_describe(isrc->xi_vector, port_handle, descr)); +} + +void +xen_intr_unbind(xen_intr_handle_t *port_handlep) +{ + struct intr_handler *handler; + struct xenisrc *isrc; + + handler = *port_handlep; + *port_handlep = NULL; + isrc = xen_intr_isrc(handler); + if (isrc == NULL) + return; + + intr_remove_handler(handler); + xen_intr_release_isrc(isrc); +} + +void +xen_intr_signal(xen_intr_handle_t handle) +{ + struct xenisrc *isrc; + + isrc = xen_intr_isrc(handle); + if (isrc != NULL) { + KASSERT(isrc->xi_type == EVTCHN_TYPE_PORT || + isrc->xi_type == EVTCHN_TYPE_IPI, + ("evtchn_signal on something other than a local port")); + struct evtchn_send send = { .port = isrc->xi_port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); + } +} + +evtchn_port_t +xen_intr_port(xen_intr_handle_t handle) +{ + struct xenisrc *isrc; + + isrc = xen_intr_isrc(handle); + if (isrc == NULL) + return (0); + + return (isrc->xi_port); +} diff --git a/sys/xen/evtchn.h b/sys/xen/evtchn.h index 721742f6de40..00fa67e0a4d1 100644 --- a/sys/xen/evtchn.h +++ b/sys/xen/evtchn.h @@ -1,94 +1,87 @@ /****************************************************************************** * evtchn.h * - * Communication via Xen event channels. - * Also definitions for the device that demuxes notifications to userspace. + * Interface to /dev/xen/evtchn. * - * Copyright (c) 2004, K A Fraser + * Copyright (c) 2003-2005, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. * * $FreeBSD$ */ -#ifndef __ASM_EVTCHN_H__ -#define __ASM_EVTCHN_H__ -#include -#include -#include -#include +#ifndef __XEN_EVTCHN_H__ +#define __XEN_EVTCHN_H__ /* - * LOW-LEVEL DEFINITIONS + * Bind a fresh port to VIRQ @virq. */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOWR('E', 4, struct ioctl_evtchn_bind_virq) +struct ioctl_evtchn_bind_virq { + unsigned int virq; + unsigned int port; +}; /* - * Unlike notify_remote_via_evtchn(), this is safe to use across - * save/restore. Notifications on a broken connection are silently dropped. + * Bind a fresh port to remote <@remote_domain, @remote_port>. */ -void notify_remote_via_irq(int irq); - - -/* Entry point for notifications into Linux subsystems. */ -void evtchn_do_upcall(struct trapframe *frame); - -/* Entry point for notifications into the userland character device. */ -void evtchn_device_upcall(int port); - -void mask_evtchn(int port); - -void unmask_evtchn(int port); - -#ifdef SMP -void rebind_evtchn_to_cpu(int port, unsigned int cpu); -#else -#define rebind_evtchn_to_cpu(port, cpu) ((void)0) -#endif - -static inline -int test_and_set_evtchn_mask(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - return synch_test_and_set_bit(port, s->evtchn_mask); -} - -static inline void -clear_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - synch_clear_bit(port, &s->evtchn_pending[0]); -} - -static inline void -notify_remote_via_evtchn(int port) -{ - struct evtchn_send send = { .port = port }; - (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); -} +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOWR('E', 5, struct ioctl_evtchn_bind_interdomain) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; + unsigned int port; +}; /* - * Use these to access the event channel underlying the IRQ handle returned - * by bind_*_to_irqhandler(). + * Allocate a fresh port for binding to @remote_domain. */ -int irq_to_evtchn_port(int irq); - -void ipi_pcpu(unsigned int cpu, int vector); +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOWR('E', 6, struct ioctl_evtchn_bind_unbound_port) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; + unsigned int port; +}; /* - * CHARACTER-DEVICE DEFINITIONS + * Unbind previously allocated @port. */ +#define IOCTL_EVTCHN_UNBIND \ + _IOW('E', 7, struct ioctl_evtchn_unbind) +struct ioctl_evtchn_unbind { + unsigned int port; +}; -#define PORT_NORMAL 0x0000 -#define PORT_EXCEPTION 0x8000 -#define PORTIDX_MASK 0x7fff - -/* /dev/xen/evtchn resides at device number major=10, minor=200 */ -#define EVTCHN_MINOR 200 +/* + * Send event to previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOW('E', 8, struct ioctl_evtchn_notify) +struct ioctl_evtchn_notify { + unsigned int port; +}; -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to the specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IO('E', 9) -#endif /* __ASM_EVTCHN_H__ */ +#endif /* __XEN_EVTCHN_H__ */ diff --git a/sys/xen/evtchn/evtchn.c b/sys/xen/evtchn/evtchn.c deleted file mode 100644 index baff9aaa2c80..000000000000 --- a/sys/xen/evtchn/evtchn.c +++ /dev/null @@ -1,1141 +0,0 @@ -/****************************************************************************** - * evtchn.c - * - * Communication via Xen event channels. - * - * Copyright (c) 2002-2005, K A Fraser - * Copyright (c) 2005-2006 Kip Macy - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static inline unsigned long __ffs(unsigned long word) -{ - __asm__("bsfl %1,%0" - :"=r" (word) - :"rm" (word)); - return word; -} - -/* - * irq_mapping_update_lock: in order to allow an interrupt to occur in a critical - * section, to set pcpu->ipending (etc...) properly, we - * must be able to get the icu lock, so it can't be - * under witness. - */ -static struct mtx irq_mapping_update_lock; -MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_SPIN); - -static struct xenpic *xp; -struct xenpic_intsrc { - struct intsrc xp_intsrc; - void *xp_cookie; - uint8_t xp_vector; - boolean_t xp_masked; -}; - -struct xenpic { - struct pic *xp_dynirq_pic; - struct pic *xp_pirq_pic; - uint16_t xp_numintr; - struct xenpic_intsrc xp_pins[0]; -}; - -#define TODO printf("%s: not implemented!\n", __func__) - -/* IRQ <-> event-channel mappings. */ -static int evtchn_to_irq[NR_EVENT_CHANNELS]; - -/* Packed IRQ information: binding type, sub-type index, and event channel. */ -static uint32_t irq_info[NR_IRQS]; -/* Binding types. */ -enum { - IRQT_UNBOUND, - IRQT_PIRQ, - IRQT_VIRQ, - IRQT_IPI, - IRQT_LOCAL_PORT, - IRQT_CALLER_PORT, - _IRQT_COUNT - -}; - - -#define _IRQT_BITS 4 -#define _EVTCHN_BITS 12 -#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS) - -/* Constructor for packed IRQ information. */ -static inline uint32_t -mk_irq_info(uint32_t type, uint32_t index, uint32_t evtchn) -{ - - return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn); -} - -/* Constructor for packed IRQ information. */ - -/* Convenient shorthand for packed representation of an unbound IRQ. */ -#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) - -/* - * Accessors for packed IRQ information. - */ - -static inline unsigned int evtchn_from_irq(int irq) -{ - return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1); -} - -static inline unsigned int index_from_irq(int irq) -{ - return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1); -} - -static inline unsigned int type_from_irq(int irq) -{ - return irq_info[irq] >> (32 - _IRQT_BITS); -} - - -/* IRQ <-> VIRQ mapping. */ - -/* IRQ <-> IPI mapping. */ -#ifndef NR_IPIS -#ifdef SMP -#error "NR_IPIS not defined" -#endif -#define NR_IPIS 1 -#endif - -/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ -static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)]; - -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - -#define VALID_EVTCHN(_chn) ((_chn) != 0) - -#ifdef SMP - -static uint8_t cpu_evtchn[NR_EVENT_CHANNELS]; -static unsigned long cpu_evtchn_mask[XEN_LEGACY_MAX_VCPUS][NR_EVENT_CHANNELS/LONG_BIT]; - -#define active_evtchns(cpu,sh,idx) \ - ((sh)->evtchn_pending[idx] & \ - cpu_evtchn_mask[cpu][idx] & \ - ~(sh)->evtchn_mask[idx]) - -static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) -{ - clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); - set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); - cpu_evtchn[chn] = cpu; -} - -static void init_evtchn_cpu_bindings(void) -{ - /* By default all event channels notify CPU#0. */ - memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); - memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); -} - -#define cpu_from_evtchn(evtchn) (cpu_evtchn[evtchn]) - -#else - -#define active_evtchns(cpu,sh,idx) \ - ((sh)->evtchn_pending[idx] & \ - ~(sh)->evtchn_mask[idx]) -#define bind_evtchn_to_cpu(chn,cpu) ((void)0) -#define init_evtchn_cpu_bindings() ((void)0) -#define cpu_from_evtchn(evtchn) (0) - -#endif - - -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} - -void -evtchn_do_upcall(struct trapframe *frame) -{ - unsigned long l1, l2; - unsigned int l1i, l2i, port; - int irq, cpu; - shared_info_t *s; - vcpu_info_t *vcpu_info; - - cpu = PCPU_GET(cpuid); - s = HYPERVISOR_shared_info; - vcpu_info = &s->vcpu_info[cpu]; - - vcpu_info->evtchn_upcall_pending = 0; - - /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ - l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0); - - while (l1 != 0) { - l1i = __ffs(l1); - l1 &= ~(1 << l1i); - - while ((l2 = active_evtchns(cpu, s, l1i)) != 0) { - l2i = __ffs(l2); - - port = (l1i * LONG_BIT) + l2i; - if ((irq = evtchn_to_irq[port]) != -1) { - struct intsrc *isrc = intr_lookup_source(irq); - /* - * ack - */ - mask_evtchn(port); - clear_evtchn(port); - - intr_execute_handlers(isrc, frame); - } else { - evtchn_device_upcall(port); - } - } - } -} - -/* - * Send an IPI from the current CPU to the destination CPU. - */ -void -ipi_pcpu(unsigned int cpu, int vector) -{ - int irq; - - irq = pcpu_find(cpu)->pc_ipi_to_irq[vector]; - - notify_remote_via_irq(irq); -} - -static int -find_unbound_irq(void) -{ - int dynirq, irq; - - for (dynirq = 0; dynirq < NR_IRQS; dynirq++) { - irq = dynirq_to_irq(dynirq); - if (irq_bindcount[irq] == 0) - break; - } - - if (irq == NR_IRQS) - panic("No available IRQ to bind to: increase NR_IRQS!\n"); - - return (irq); -} - -static int -bind_caller_port_to_irq(unsigned int caller_port, int * port) -{ - int irq; - - mtx_lock_spin(&irq_mapping_update_lock); - - if ((irq = evtchn_to_irq[caller_port]) == -1) { - if ((irq = find_unbound_irq()) < 0) - goto out; - - evtchn_to_irq[caller_port] = irq; - irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port); - } - - irq_bindcount[irq]++; - *port = caller_port; - - out: - mtx_unlock_spin(&irq_mapping_update_lock); - return irq; -} - -static int -bind_local_port_to_irq(unsigned int local_port, int * port) -{ - int irq; - - mtx_lock_spin(&irq_mapping_update_lock); - - KASSERT(evtchn_to_irq[local_port] == -1, - ("evtchn_to_irq inconsistent")); - - if ((irq = find_unbound_irq()) < 0) { - struct evtchn_close close = { .port = local_port }; - HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); - - goto out; - } - - evtchn_to_irq[local_port] = irq; - irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); - irq_bindcount[irq]++; - *port = local_port; - - out: - mtx_unlock_spin(&irq_mapping_update_lock); - return irq; -} - -static int -bind_listening_port_to_irq(unsigned int remote_domain, int * port) -{ - struct evtchn_alloc_unbound alloc_unbound; - int err; - - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = remote_domain; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - - return err ? : bind_local_port_to_irq(alloc_unbound.port, port); -} - -static int -bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port, int * port) -{ - struct evtchn_bind_interdomain bind_interdomain; - int err; - - bind_interdomain.remote_dom = remote_domain; - bind_interdomain.remote_port = remote_port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - - return err ? : bind_local_port_to_irq(bind_interdomain.local_port, port); -} - -static int -bind_virq_to_irq(unsigned int virq, unsigned int cpu, int * port) -{ - struct evtchn_bind_virq bind_virq; - int evtchn = 0, irq; - - mtx_lock_spin(&irq_mapping_update_lock); - - if ((irq = pcpu_find(cpu)->pc_virq_to_irq[virq]) == -1) { - if ((irq = find_unbound_irq()) < 0) - goto out; - - bind_virq.virq = virq; - bind_virq.vcpu = cpu; - HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); - - evtchn = bind_virq.port; - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); - - pcpu_find(cpu)->pc_virq_to_irq[virq] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - } - - irq_bindcount[irq]++; - *port = evtchn; -out: - mtx_unlock_spin(&irq_mapping_update_lock); - - return irq; -} - - -static int -bind_ipi_to_irq(unsigned int ipi, unsigned int cpu, int * port) -{ - struct evtchn_bind_ipi bind_ipi; - int irq; - int evtchn = 0; - - mtx_lock_spin(&irq_mapping_update_lock); - - if ((irq = pcpu_find(cpu)->pc_ipi_to_irq[ipi]) == -1) { - if ((irq = find_unbound_irq()) < 0) - goto out; - - bind_ipi.vcpu = cpu; - HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); - evtchn = bind_ipi.port; - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); - - pcpu_find(cpu)->pc_ipi_to_irq[ipi] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - } - irq_bindcount[irq]++; - *port = evtchn; -out: - - mtx_unlock_spin(&irq_mapping_update_lock); - - return irq; -} - - -static void -unbind_from_irq(int irq) -{ - struct evtchn_close close; - int evtchn = evtchn_from_irq(irq); - int cpu; - - mtx_lock_spin(&irq_mapping_update_lock); - - if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { - close.port = evtchn; - HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); - - switch (type_from_irq(irq)) { - case IRQT_VIRQ: - cpu = cpu_from_evtchn(evtchn); - pcpu_find(cpu)->pc_virq_to_irq[index_from_irq(irq)] = -1; - break; - case IRQT_IPI: - cpu = cpu_from_evtchn(evtchn); - pcpu_find(cpu)->pc_ipi_to_irq[index_from_irq(irq)] = -1; - break; - default: - break; - } - - /* Closed ports are implicitly re-bound to VCPU0. */ - bind_evtchn_to_cpu(evtchn, 0); - - evtchn_to_irq[evtchn] = -1; - irq_info[irq] = IRQ_UNBOUND; - } - - mtx_unlock_spin(&irq_mapping_update_lock); -} - -int -bind_caller_port_to_irqhandler(unsigned int caller_port, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp) -{ - unsigned int irq; - int port = -1; - int error; - - irq = bind_caller_port_to_irq(caller_port, &port); - intr_register_source(&xp->xp_pins[irq].xp_intsrc); - error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, - &xp->xp_pins[irq].xp_cookie); - - if (error) { - unbind_from_irq(irq); - return (error); - } - if (port != -1) - unmask_evtchn(port); - - if (irqp) - *irqp = irq; - - return (0); -} - -int -bind_listening_port_to_irqhandler(unsigned int remote_domain, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp) -{ - unsigned int irq; - int port = -1; - int error; - - irq = bind_listening_port_to_irq(remote_domain, &port); - intr_register_source(&xp->xp_pins[irq].xp_intsrc); - error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, - &xp->xp_pins[irq].xp_cookie); - if (error) { - unbind_from_irq(irq); - return (error); - } - if (port != -1) - unmask_evtchn(port); - if (irqp) - *irqp = irq; - - return (0); -} - -int -bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, const char *devname, - driver_intr_t handler, void *arg, unsigned long irqflags, - unsigned int *irqp) -{ - unsigned int irq; - int port = -1; - int error; - - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port, &port); - intr_register_source(&xp->xp_pins[irq].xp_intsrc); - error = intr_add_handler(devname, irq, NULL, handler, arg, - irqflags, &xp->xp_pins[irq].xp_cookie); - if (error) { - unbind_from_irq(irq); - return (error); - } - if (port != -1) - unmask_evtchn(port); - - if (irqp) - *irqp = irq; - return (0); -} - -int -bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - const char *devname, driver_filter_t filter, driver_intr_t handler, - void *arg, unsigned long irqflags, unsigned int *irqp) -{ - unsigned int irq; - int port = -1; - int error; - - irq = bind_virq_to_irq(virq, cpu, &port); - intr_register_source(&xp->xp_pins[irq].xp_intsrc); - error = intr_add_handler(devname, irq, filter, handler, - arg, irqflags, &xp->xp_pins[irq].xp_cookie); - if (error) { - unbind_from_irq(irq); - return (error); - } - if (port != -1) - unmask_evtchn(port); - - if (irqp) - *irqp = irq; - return (0); -} - -int -bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, - const char *devname, driver_filter_t filter, - unsigned long irqflags, unsigned int *irqp) -{ - unsigned int irq; - int port = -1; - int error; - - irq = bind_ipi_to_irq(ipi, cpu, &port); - intr_register_source(&xp->xp_pins[irq].xp_intsrc); - error = intr_add_handler(devname, irq, filter, NULL, - NULL, irqflags, &xp->xp_pins[irq].xp_cookie); - if (error) { - unbind_from_irq(irq); - return (error); - } - if (port != -1) - unmask_evtchn(port); - - if (irqp) - *irqp = irq; - return (0); -} - -void -unbind_from_irqhandler(unsigned int irq) -{ - intr_remove_handler(xp->xp_pins[irq].xp_cookie); - unbind_from_irq(irq); -} - -#if 0 -/* Rebind an evtchn so that it gets delivered to a specific cpu */ -static void -rebind_irq_to_cpu(unsigned irq, unsigned tcpu) -{ - evtchn_op_t op = { .cmd = EVTCHNOP_bind_vcpu }; - int evtchn; - - mtx_lock_spin(&irq_mapping_update_lock); - - evtchn = evtchn_from_irq(irq); - if (!VALID_EVTCHN(evtchn)) { - mtx_unlock_spin(&irq_mapping_update_lock); - return; - } - - /* Send future instances of this interrupt to other vcpu. */ - bind_vcpu.port = evtchn; - bind_vcpu.vcpu = tcpu; - - /* - * If this fails, it usually just indicates that we're dealing with a - * virq or IPI channel, which don't actually need to be rebound. Ignore - * it, but don't do the xenlinux-level rebind in that case. - */ - if (HYPERVISOR_event_channel_op(&op) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu); - - mtx_unlock_spin(&irq_mapping_update_lock); - -} - -static void set_affinity_irq(unsigned irq, cpumask_t dest) -{ - unsigned tcpu = ffs(dest) - 1; - rebind_irq_to_cpu(irq, tcpu); -} -#endif - -/* - * Interface to generic handling in intr_machdep.c - */ - - -/*------------ interrupt handling --------------------------------------*/ -#define TODO printf("%s: not implemented!\n", __func__) - - -static void xenpic_dynirq_enable_source(struct intsrc *isrc); -static void xenpic_dynirq_disable_source(struct intsrc *isrc, int); -static void xenpic_dynirq_eoi_source(struct intsrc *isrc); -static void xenpic_dynirq_enable_intr(struct intsrc *isrc); -static void xenpic_dynirq_disable_intr(struct intsrc *isrc); - -static void xenpic_pirq_enable_source(struct intsrc *isrc); -static void xenpic_pirq_disable_source(struct intsrc *isrc, int); -static void xenpic_pirq_eoi_source(struct intsrc *isrc); -static void xenpic_pirq_enable_intr(struct intsrc *isrc); - - -static int xenpic_vector(struct intsrc *isrc); -static int xenpic_source_pending(struct intsrc *isrc); -static void xenpic_suspend(struct pic* pic); -static void xenpic_resume(struct pic* pic); -static int xenpic_assign_cpu(struct intsrc *, u_int apic_id); - - -struct pic xenpic_dynirq_template = { - .pic_enable_source = xenpic_dynirq_enable_source, - .pic_disable_source = xenpic_dynirq_disable_source, - .pic_eoi_source = xenpic_dynirq_eoi_source, - .pic_enable_intr = xenpic_dynirq_enable_intr, - .pic_disable_intr = xenpic_dynirq_disable_intr, - .pic_vector = xenpic_vector, - .pic_source_pending = xenpic_source_pending, - .pic_suspend = xenpic_suspend, - .pic_resume = xenpic_resume -}; - -struct pic xenpic_pirq_template = { - .pic_enable_source = xenpic_pirq_enable_source, - .pic_disable_source = xenpic_pirq_disable_source, - .pic_eoi_source = xenpic_pirq_eoi_source, - .pic_enable_intr = xenpic_pirq_enable_intr, - .pic_vector = xenpic_vector, - .pic_source_pending = xenpic_source_pending, - .pic_suspend = xenpic_suspend, - .pic_resume = xenpic_resume, - .pic_assign_cpu = xenpic_assign_cpu -}; - - - -void -xenpic_dynirq_enable_source(struct intsrc *isrc) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - - mtx_lock_spin(&irq_mapping_update_lock); - if (xp->xp_masked) { - irq = xenpic_vector(isrc); - unmask_evtchn(evtchn_from_irq(irq)); - xp->xp_masked = FALSE; - } - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static void -xenpic_dynirq_disable_source(struct intsrc *isrc, int foo) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - - mtx_lock_spin(&irq_mapping_update_lock); - if (!xp->xp_masked) { - irq = xenpic_vector(isrc); - mask_evtchn(evtchn_from_irq(irq)); - xp->xp_masked = TRUE; - } - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static void -xenpic_dynirq_enable_intr(struct intsrc *isrc) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - mtx_lock_spin(&irq_mapping_update_lock); - xp->xp_masked = 0; - irq = xenpic_vector(isrc); - unmask_evtchn(evtchn_from_irq(irq)); - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static void -xenpic_dynirq_disable_intr(struct intsrc *isrc) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - mtx_lock_spin(&irq_mapping_update_lock); - irq = xenpic_vector(isrc); - mask_evtchn(evtchn_from_irq(irq)); - xp->xp_masked = 1; - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static void -xenpic_dynirq_eoi_source(struct intsrc *isrc) -{ - unsigned int irq; - struct xenpic_intsrc *xp; - - xp = (struct xenpic_intsrc *)isrc; - mtx_lock_spin(&irq_mapping_update_lock); - xp->xp_masked = 0; - irq = xenpic_vector(isrc); - unmask_evtchn(evtchn_from_irq(irq)); - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static int -xenpic_vector(struct intsrc *isrc) -{ - struct xenpic_intsrc *pin; - - pin = (struct xenpic_intsrc *)isrc; - //printf("xenpic_vector(): isrc=%p,vector=%u\n", pin, pin->xp_vector); - - return (pin->xp_vector); -} - -static int -xenpic_source_pending(struct intsrc *isrc) -{ - struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc; - - /* XXXEN: TODO */ - printf("xenpic_source_pending(): vector=%x,masked=%x\n", - pin->xp_vector, pin->xp_masked); - -/* notify_remote_via_evtchn(pin->xp_vector); // XXX RS: Is this correct? */ - return 0; -} - -static void -xenpic_suspend(struct pic* pic) -{ - TODO; -} - -static void -xenpic_resume(struct pic* pic) -{ - TODO; -} - -static int -xenpic_assign_cpu(struct intsrc *isrc, u_int apic_id) -{ - TODO; - return (EOPNOTSUPP); -} - -void -notify_remote_via_irq(int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - notify_remote_via_evtchn(evtchn); - else - panic("invalid evtchn %d", irq); -} - -/* required for support of physical devices */ -static inline void -pirq_unmask_notify(int pirq) -{ - struct physdev_eoi eoi = { .irq = pirq }; - - if (unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0]))) { - (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); - } -} - -static inline void -pirq_query_unmask(int pirq) -{ - struct physdev_irq_status_query irq_status_query; - - irq_status_query.irq = pirq; - (void)HYPERVISOR_physdev_op(PHYSDEVOP_IRQ_STATUS_QUERY, &irq_status_query); - clear_bit(pirq, &pirq_needs_unmask_notify[0]); - if ( irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY ) - set_bit(pirq, &pirq_needs_unmask_notify[0]); -} - -/* - * On startup, if there is no action associated with the IRQ then we are - * probing. In this case we should not share with others as it will confuse us. - */ -#define probing_irq(_irq) (intr_lookup_source(irq) == NULL) - -static void -xenpic_pirq_enable_intr(struct intsrc *isrc) -{ - struct evtchn_bind_pirq bind_pirq; - int evtchn; - unsigned int irq; - - mtx_lock_spin(&irq_mapping_update_lock); - irq = xenpic_vector(isrc); - evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - goto out; - - bind_pirq.pirq = irq; - /* NB. We are happy to share unless we are probing. */ - bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; - - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { -#ifndef XEN_PRIVILEGED_GUEST - panic("unexpected pirq call"); -#endif - if (!probing_irq(irq)) /* Some failures are expected when probing. */ - printf("Failed to obtain physical IRQ %d\n", irq); - mtx_unlock_spin(&irq_mapping_update_lock); - return; - } - evtchn = bind_pirq.port; - - pirq_query_unmask(irq_to_pirq(irq)); - - bind_evtchn_to_cpu(evtchn, 0); - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, evtchn); - - out: - unmask_evtchn(evtchn); - pirq_unmask_notify(irq_to_pirq(irq)); - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static void -xenpic_pirq_enable_source(struct intsrc *isrc) -{ - int evtchn; - unsigned int irq; - - mtx_lock_spin(&irq_mapping_update_lock); - irq = xenpic_vector(isrc); - evtchn = evtchn_from_irq(irq); - - if (!VALID_EVTCHN(evtchn)) - goto done; - - unmask_evtchn(evtchn); - pirq_unmask_notify(irq_to_pirq(irq)); - done: - mtx_unlock_spin(&irq_mapping_update_lock); -} - -static void -xenpic_pirq_disable_source(struct intsrc *isrc, int eoi) -{ - int evtchn; - unsigned int irq; - - mtx_lock_spin(&irq_mapping_update_lock); - irq = xenpic_vector(isrc); - evtchn = evtchn_from_irq(irq); - - if (!VALID_EVTCHN(evtchn)) - goto done; - - mask_evtchn(evtchn); - done: - mtx_unlock_spin(&irq_mapping_update_lock); -} - - -static void -xenpic_pirq_eoi_source(struct intsrc *isrc) -{ - int evtchn; - unsigned int irq; - - mtx_lock_spin(&irq_mapping_update_lock); - irq = xenpic_vector(isrc); - evtchn = evtchn_from_irq(irq); - - if (!VALID_EVTCHN(evtchn)) - goto done; - - unmask_evtchn(evtchn); - pirq_unmask_notify(irq_to_pirq(irq)); - done: - mtx_unlock_spin(&irq_mapping_update_lock); -} - -int -irq_to_evtchn_port(int irq) -{ - return evtchn_from_irq(irq); -} - -void -mask_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - synch_set_bit(port, &s->evtchn_mask[0]); -} - -void -unmask_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - unsigned int cpu = PCPU_GET(cpuid); - vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; - - /* Slow path (hypercall) if this is a non-local port. */ - if (unlikely(cpu != cpu_from_evtchn(port))) { - struct evtchn_unmask unmask = { .port = port }; - (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); - return; - } - - synch_clear_bit(port, &s->evtchn_mask); - - /* - * The following is basically the equivalent of 'hw_resend_irq'. Just - * like a real IO-APIC we 'lose the interrupt edge' if the channel is - * masked. - */ - if (synch_test_bit(port, &s->evtchn_pending) && - !synch_test_and_set_bit(port / LONG_BIT, - &vcpu_info->evtchn_pending_sel)) { - vcpu_info->evtchn_upcall_pending = 1; - if (!vcpu_info->evtchn_upcall_mask) - force_evtchn_callback(); - } -} - -void irq_resume(void) -{ - evtchn_op_t op; - int cpu, pirq, virq, ipi, irq, evtchn; - - struct evtchn_bind_virq bind_virq; - struct evtchn_bind_ipi bind_ipi; - - init_evtchn_cpu_bindings(); - - /* New event-channel space is not 'live' yet. */ - for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) - mask_evtchn(evtchn); - - /* Check that no PIRQs are still bound. */ - for (pirq = 0; pirq < NR_PIRQS; pirq++) { - KASSERT(irq_info[pirq_to_irq(pirq)] == IRQ_UNBOUND, - ("pirq_to_irq inconsistent")); - } - - /* Secondary CPUs must have no VIRQ or IPI bindings. */ - for (cpu = 1; cpu < XEN_LEGACY_MAX_VCPUS; cpu++) { - for (virq = 0; virq < NR_VIRQS; virq++) { - KASSERT(pcpu_find(cpu)->pc_virq_to_irq[virq] == -1, - ("virq_to_irq inconsistent")); - } - for (ipi = 0; ipi < NR_IPIS; ipi++) { - KASSERT(pcpu_find(cpu)->pc_ipi_to_irq[ipi] == -1, - ("ipi_to_irq inconsistent")); - } - } - - /* No IRQ <-> event-channel mappings. */ - for (irq = 0; irq < NR_IRQS; irq++) - irq_info[irq] &= ~0xFFFF; /* zap event-channel binding */ - for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) - evtchn_to_irq[evtchn] = -1; - - /* Primary CPU: rebind VIRQs automatically. */ - for (virq = 0; virq < NR_VIRQS; virq++) { - if ((irq = pcpu_find(0)->pc_virq_to_irq[virq]) == -1) - continue; - - KASSERT(irq_info[irq] == mk_irq_info(IRQT_VIRQ, virq, 0), - ("irq_info inconsistent")); - - /* Get a new binding from Xen. */ - bind_virq.virq = virq; - bind_virq.vcpu = 0; - HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); - evtchn = bind_virq.port; - - /* Record the new mapping. */ - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); - - /* Ready for use. */ - unmask_evtchn(evtchn); - } - - /* Primary CPU: rebind IPIs automatically. */ - for (ipi = 0; ipi < NR_IPIS; ipi++) { - if ((irq = pcpu_find(0)->pc_ipi_to_irq[ipi]) == -1) - continue; - - KASSERT(irq_info[irq] == mk_irq_info(IRQT_IPI, ipi, 0), - ("irq_info inconsistent")); - - /* Get a new binding from Xen. */ - memset(&op, 0, sizeof(op)); - bind_ipi.vcpu = 0; - HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); - evtchn = bind_ipi.port; - - /* Record the new mapping. */ - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); - - /* Ready for use. */ - unmask_evtchn(evtchn); - } -} - -static void -evtchn_init(void *dummy __unused) -{ - int i, cpu; - struct xenpic_intsrc *pin, *tpin; - - - init_evtchn_cpu_bindings(); - - /* No VIRQ or IPI bindings. */ - for (cpu = 0; cpu < mp_ncpus; cpu++) { - for (i = 0; i < NR_VIRQS; i++) - pcpu_find(cpu)->pc_virq_to_irq[i] = -1; - for (i = 0; i < NR_IPIS; i++) - pcpu_find(cpu)->pc_ipi_to_irq[i] = -1; - } - - /* No event-channel -> IRQ mappings. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - evtchn_to_irq[i] = -1; - mask_evtchn(i); /* No event channels are 'live' right now. */ - } - - /* No IRQ -> event-channel mappings. */ - for (i = 0; i < NR_IRQS; i++) - irq_info[i] = IRQ_UNBOUND; - - xp = malloc(sizeof(struct xenpic) + NR_IRQS*sizeof(struct xenpic_intsrc), - M_DEVBUF, M_WAITOK); - - xp->xp_dynirq_pic = &xenpic_dynirq_template; - xp->xp_pirq_pic = &xenpic_pirq_template; - xp->xp_numintr = NR_IRQS; - bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_IRQS); - - - /* We need to register our PIC's beforehand */ - if (intr_register_pic(&xenpic_pirq_template)) - panic("XEN: intr_register_pic() failure"); - if (intr_register_pic(&xenpic_dynirq_template)) - panic("XEN: intr_register_pic() failure"); - - /* - * Initialize the dynamic IRQ's - we initialize the structures, but - * we do not bind them (bind_evtchn_to_irqhandle() does this) - */ - pin = xp->xp_pins; - for (i = 0; i < NR_DYNIRQS; i++) { - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - irq_bindcount[dynirq_to_irq(i)] = 0; - - tpin = &pin[dynirq_to_irq(i)]; - tpin->xp_intsrc.is_pic = xp->xp_dynirq_pic; - tpin->xp_vector = dynirq_to_irq(i); - - } - /* - * Now, we go ahead and claim every PIRQ there is. - */ - pin = xp->xp_pins; - for (i = 0; i < NR_PIRQS; i++) { - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - irq_bindcount[pirq_to_irq(i)] = 0; - -#ifdef RTC_IRQ - /* If not domain 0, force our RTC driver to fail its probe. */ - if ((i == RTC_IRQ) && - !(xen_start_info->flags & SIF_INITDOMAIN)) - continue; -#endif - tpin = &pin[pirq_to_irq(i)]; - tpin->xp_intsrc.is_pic = xp->xp_pirq_pic; - tpin->xp_vector = pirq_to_irq(i); - - } -} - -SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_MIDDLE, evtchn_init, NULL); - diff --git a/sys/xen/evtchn/evtchn_dev.c b/sys/xen/evtchn/evtchn_dev.c index ab3bd173304f..9da26421e520 100644 --- a/sys/xen/evtchn/evtchn_dev.c +++ b/sys/xen/evtchn/evtchn_dev.c @@ -1,391 +1,358 @@ /****************************************************************************** * evtchn.c * * Xenolinux driver for receiving and demuxing event-channel signals. * * Copyright (c) 2004, K A Fraser */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include +#include -#include +#include +#include #include + #include -#include #include #include -#include -#include +#include typedef struct evtchn_sotfc { struct selinfo ev_rsel; } evtchn_softc_t; - -#ifdef linuxcrap -/* NB. This must be shared amongst drivers if more things go in /dev/xen */ -static devfs_handle_t xen_dev_dir; -#endif - /* Only one process may open /dev/xen/evtchn at any time. */ static unsigned long evtchn_dev_inuse; /* Notification ring, accessed via /dev/xen/evtchn. */ #define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */ #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) static uint16_t *ring; static unsigned int ring_cons, ring_prod, ring_overflow; /* Which ports is user-space bound to? */ static uint32_t bound_ports[32]; /* Unique address for processes to sleep on */ static void *evtchn_waddr = ˚ static struct mtx lock, upcall_lock; static d_read_t evtchn_read; static d_write_t evtchn_write; static d_ioctl_t evtchn_ioctl; static d_poll_t evtchn_poll; static d_open_t evtchn_open; static d_close_t evtchn_close; void -evtchn_device_upcall(int port) +evtchn_device_upcall(evtchn_port_t port) { mtx_lock(&upcall_lock); - mask_evtchn(port); - clear_evtchn(port); + evtchn_mask_port(port); + evtchn_clear_port(port); if ( ring != NULL ) { if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port; if ( ring_cons == ring_prod++ ) { wakeup(evtchn_waddr); } } else { ring_overflow = 1; } } mtx_unlock(&upcall_lock); } static void __evtchn_reset_buffer_ring(void) { /* Initialise the ring to empty. Clear errors. */ ring_cons = ring_prod = ring_overflow = 0; } static int evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) { int rc; unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0; count = uio->uio_resid; count &= ~1; /* even number of bytes */ if ( count == 0 ) { rc = 0; goto out; } if ( count > PAGE_SIZE ) count = PAGE_SIZE; for ( ; ; ) { if ( (c = ring_cons) != (p = ring_prod) ) break; if ( ring_overflow ) { rc = EFBIG; goto out; } if (sst != 0) { rc = EINTR; goto out; } /* PCATCH == check for signals before and after sleeping * PWAIT == priority of waiting on resource */ sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10); } /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) { bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t); bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t); } else { bytes1 = (p - c) * sizeof(uint16_t); bytes2 = 0; } /* Truncate chunks according to caller's maximum byte count. */ if ( bytes1 > count ) { bytes1 = count; bytes2 = 0; } else if ( (bytes1 + bytes2) > count ) { bytes2 = count - bytes1; } if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) || ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio))) /* keeping this around as its replacement is not equivalent * copyout(&ring[0], &buf[bytes1], bytes2) */ { rc = EFAULT; goto out; } ring_cons += (bytes1 + bytes2) / sizeof(uint16_t); rc = bytes1 + bytes2; out: return rc; } static int evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) { int rc, i, count; count = uio->uio_resid; uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); if ( kbuf == NULL ) return ENOMEM; count &= ~1; /* even number of bytes */ if ( count == 0 ) { rc = 0; goto out; } if ( count > PAGE_SIZE ) count = PAGE_SIZE; if ( uiomove(kbuf, count, uio) != 0 ) { rc = EFAULT; goto out; } mtx_lock_spin(&lock); for ( i = 0; i < (count/2); i++ ) if ( test_bit(kbuf[i], &bound_ports[0]) ) - unmask_evtchn(kbuf[i]); + evtchn_unmask_port(kbuf[i]); mtx_unlock_spin(&lock); rc = count; out: free(kbuf, M_DEVBUF); return rc; } static int evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int mode, struct thread *td __unused) { int rc = 0; +#ifdef NOTYET mtx_lock_spin(&lock); switch ( cmd ) { case EVTCHN_RESET: __evtchn_reset_buffer_ring(); break; case EVTCHN_BIND: if ( !synch_test_and_set_bit((uintptr_t)arg, &bound_ports[0]) ) unmask_evtchn((uintptr_t)arg); else rc = EINVAL; break; case EVTCHN_UNBIND: if ( synch_test_and_clear_bit((uintptr_t)arg, &bound_ports[0]) ) mask_evtchn((uintptr_t)arg); else rc = EINVAL; break; default: rc = ENOSYS; break; } mtx_unlock_spin(&lock); +#endif return rc; } static int evtchn_poll(struct cdev *dev, int poll_events, struct thread *td) { evtchn_softc_t *sc; unsigned int mask = POLLOUT | POLLWRNORM; sc = dev->si_drv1; if ( ring_cons != ring_prod ) mask |= POLLIN | POLLRDNORM; else if ( ring_overflow ) mask = POLLERR; else selrecord(td, &sc->ev_rsel); return mask; } static int evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) { uint16_t *_ring; if (flag & O_NONBLOCK) return EBUSY; if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) ) return EBUSY; if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL ) return ENOMEM; mtx_lock_spin(&lock); ring = _ring; __evtchn_reset_buffer_ring(); mtx_unlock_spin(&lock); return 0; } static int evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused) { int i; if (ring != NULL) { free(ring, M_DEVBUF); ring = NULL; } mtx_lock_spin(&lock); for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) - mask_evtchn(i); + evtchn_mask_port(i); mtx_unlock_spin(&lock); evtchn_dev_inuse = 0; return 0; } static struct cdevsw evtchn_devsw = { .d_version = D_VERSION, .d_open = evtchn_open, .d_close = evtchn_close, .d_read = evtchn_read, .d_write = evtchn_write, .d_ioctl = evtchn_ioctl, .d_poll = evtchn_poll, .d_name = "evtchn", }; /* XXX - if this device is ever supposed to support use by more than one process * this global static will have to go away */ static struct cdev *evtchn_dev; static int evtchn_dev_init(void *dummy __unused) { /* XXX I believe we don't need these leaving them here for now until we * have some semblance of it working */ mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF); /* (DEVFS) create '/dev/misc/evtchn'. */ evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS); evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); - /* XXX I don't think we need any of this rubbish */ -#if 0 - if ( err != 0 ) - { - printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); - return err; - } - - /* (DEVFS) create directory '/dev/xen'. */ - xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); - - /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ - pos = devfs_generate_path(evtchn_miscdev.devfs_handle, - &link_dest[3], - sizeof(link_dest) - 3); - if ( pos >= 0 ) - strncpy(&link_dest[pos], "../", 3); - /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ - (void)devfs_mk_symlink(xen_dev_dir, - "evtchn", - DEVFS_FL_DEFAULT, - &link_dest[pos], - &symlink_handle, - NULL); - - /* (DEVFS) automatically destroy the symlink with its destination. */ - devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); -#endif if (bootverbose) printf("Event-channel device installed.\n"); return 0; } SYSINIT(evtchn_dev_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_dev_init, NULL); - - diff --git a/sys/xen/evtchn/evtchnvar.h b/sys/xen/evtchn/evtchnvar.h new file mode 100644 index 000000000000..8008d23297d3 --- /dev/null +++ b/sys/xen/evtchn/evtchnvar.h @@ -0,0 +1,105 @@ +/****************************************************************************** + * evtchn.h + * + * Data structures and definitions private to the FreeBSD implementation + * of the Xen event channel API. + * + * Copyright (c) 2004, K A Fraser + * Copyright (c) 2012, Spectra Logic Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef __XEN_EVTCHN_EVTCHNVAR_H__ +#define __XEN_EVTCHN_EVTCHNVAR_H__ + +#include +#include + +enum evtchn_type { + EVTCHN_TYPE_UNBOUND, + EVTCHN_TYPE_PIRQ, + EVTCHN_TYPE_VIRQ, + EVTCHN_TYPE_IPI, + EVTCHN_TYPE_PORT, + EVTCHN_TYPE_COUNT +}; + +/** Submit a port notification for delivery to a userland evtchn consumer */ +void evtchn_device_upcall(evtchn_port_t port); + +/** + * Disable signal delivery for an event channel port, returning its + * previous mask state. + * + * \param port The event channel port to query and mask. + * + * \returns 1 if event delivery was previously disabled. Otherwise 0. + */ +static inline int +evtchn_test_and_set_mask(evtchn_port_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + return synch_test_and_set_bit(port, s->evtchn_mask); +} + +/** + * Clear any pending event for the given event channel port. + * + * \param port The event channel port to clear. + */ +static inline void +evtchn_clear_port(evtchn_port_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_clear_bit(port, &s->evtchn_pending[0]); +} + +/** + * Disable signal delivery for an event channel port. + * + * \param port The event channel port to mask. + */ +static inline void +evtchn_mask_port(evtchn_port_t port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + + synch_set_bit(port, &s->evtchn_mask[0]); +} + +/** + * Enable signal delivery for an event channel port. + * + * \param port The event channel port to enable. + */ +static inline void +evtchn_unmask_port(evtchn_port_t port) +{ + evtchn_unmask_t op = { .port = port }; + + HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op); +} + +#endif /* __XEN_EVTCHN_EVTCHNVAR_H__ */ diff --git a/sys/xen/features.c b/sys/xen/features.c index f28fe049177c..bbb28968935a 100644 --- a/sys/xen/features.c +++ b/sys/xen/features.c @@ -1,26 +1,26 @@ #include __FBSDID("$FreeBSD$"); #include #include -#include +#include #include #include uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32] /* __read_mostly */; void setup_xen_features(void) { xen_feature_info_t fi; int i, j; for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { fi.submap_idx = i; if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) break; for (j = 0; j < 32; j++) xen_features[i*32 + j] = !!(fi.submap & 1< __FBSDID("$FreeBSD$"); #include "opt_global.h" #include "opt_pmap.h" #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include #include #include #include #include #include #define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c)) /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; static unsigned int boot_max_nr_grant_frames; static int gnttab_free_count; static grant_ref_t gnttab_free_head; static struct mtx gnttab_list_lock; static grant_entry_t *shared; static struct gnttab_free_callback *gnttab_free_callback_list = NULL; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) static int get_free_entries(int count, int *entries) { int ref, error; grant_ref_t head; mtx_lock(&gnttab_list_lock); if ((gnttab_free_count < count) && ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { mtx_unlock(&gnttab_list_lock); return (error); } ref = head = gnttab_free_head; gnttab_free_count -= count; while (count-- > 1) head = gnttab_entry(head); gnttab_free_head = gnttab_entry(head); gnttab_entry(head) = GNTTAB_LIST_END; mtx_unlock(&gnttab_list_lock); *entries = ref; return (0); } static void do_free_callbacks(void) { struct gnttab_free_callback *callback, *next; callback = gnttab_free_callback_list; gnttab_free_callback_list = NULL; while (callback != NULL) { next = callback->next; if (gnttab_free_count >= callback->count) { callback->next = NULL; callback->fn(callback->arg); } else { callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; } callback = next; } } static inline void check_free_callbacks(void) { - if (unlikely(gnttab_free_callback_list != NULL)) + if (__predict_false(gnttab_free_callback_list != NULL)) do_free_callbacks(); } static void put_free_entry(grant_ref_t ref) { mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } /* * Public grant-issuing interface functions */ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); - if (unlikely(error)) + if (__predict_false(error)) return (error); shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); if (result) *result = ref; return (0); } void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); } int gnttab_query_foreign_access(grant_ref_t ref) { uint16_t nflags; nflags = shared[ref].flags; return (nflags & (GTF_reading|GTF_writing)); } int gnttab_end_foreign_access_ref(grant_ref_t ref) { uint16_t flags, nflags; nflags = shared[ref].flags; do { if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { printf("%s: WARNING: g.e. still in use!\n", __func__); return (0); } } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != flags); return (1); } void gnttab_end_foreign_access(grant_ref_t ref, void *page) { if (gnttab_end_foreign_access_ref(ref)) { put_free_entry(ref); if (page != NULL) { free(page, M_DEVBUF); } } else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. and page still in use!\n", __func__); } } void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) { grant_ref_t *last_ref; grant_ref_t head; grant_ref_t tail; head = GNTTAB_LIST_END; tail = *refs; last_ref = refs + count; while (refs != last_ref) { if (gnttab_end_foreign_access_ref(*refs)) { gnttab_entry(*refs) = head; head = *refs; } else { /* * XXX This needs to be fixed so that the ref * is placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. still in use!\n", __func__); count--; } refs++; } if (count != 0) { mtx_lock(&gnttab_list_lock); gnttab_free_count += count; gnttab_entry(tail) = gnttab_free_head; gnttab_free_head = head; mtx_unlock(&gnttab_list_lock); } } int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); - if (unlikely(error)) + if (__predict_false(error)) return (error); gnttab_grant_foreign_transfer_ref(ref, domid, pfn); *result = ref; return (0); } void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { shared[ref].frame = pfn; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_accept_transfer; } unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { unsigned long frame; uint16_t flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) return (0); cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { flags = shared[ref].flags; cpu_relax(); } /* Read the frame number /after/ reading completion status. */ rmb(); frame = shared[ref].frame; KASSERT(frame != 0, ("grant table inconsistent")); return (frame); } unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) { unsigned long frame = gnttab_end_foreign_transfer_ref(ref); put_free_entry(ref); return (frame); } void gnttab_free_grant_reference(grant_ref_t ref) { put_free_entry(ref); } void gnttab_free_grant_references(grant_ref_t head) { grant_ref_t ref; int count = 1; if (head == GNTTAB_LIST_END) return; ref = head; while (gnttab_entry(ref) != GNTTAB_LIST_END) { ref = gnttab_entry(ref); count++; } mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) { int ref, error; error = get_free_entries(count, &ref); - if (unlikely(error)) + if (__predict_false(error)) return (error); *head = ref; return (0); } int gnttab_empty_grant_references(const grant_ref_t *private_head) { return (*private_head == GNTTAB_LIST_END); } int gnttab_claim_grant_reference(grant_ref_t *private_head) { grant_ref_t g = *private_head; - if (unlikely(g == GNTTAB_LIST_END)) + if (__predict_false(g == GNTTAB_LIST_END)) return (g); *private_head = gnttab_entry(g); return (g); } void gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) { gnttab_entry(release) = *private_head; *private_head = release; } void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, uint16_t count) { mtx_lock(&gnttab_list_lock); if (callback->next) goto out; callback->fn = fn; callback->arg = arg; callback->count = count; callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; check_free_callbacks(); out: mtx_unlock(&gnttab_list_lock); } void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) { struct gnttab_free_callback **pcb; mtx_lock(&gnttab_list_lock); for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { if (*pcb == callback) { *pcb = callback->next; break; } } mtx_unlock(&gnttab_list_lock); } static int grow_gnttab_list(unsigned int more_frames) { unsigned int new_nr_grant_frames, extra_entries, i; new_nr_grant_frames = nr_grant_frames + more_frames; extra_entries = more_frames * GREFS_PER_GRANT_FRAME; for (i = nr_grant_frames; i < new_nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (!gnttab_list[i]) goto grow_nomem; } for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(i) = gnttab_free_head; gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; gnttab_free_count += extra_entries; nr_grant_frames = new_nr_grant_frames; check_free_callbacks(); return (0); grow_nomem: for ( ; i >= nr_grant_frames; i--) free(gnttab_list[i], M_DEVBUF); return (ENOMEM); } static unsigned int __max_nr_grant_frames(void) { struct gnttab_query_size query; int rc; query.dom = DOMID_SELF; rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); if ((rc < 0) || (query.status != GNTST_okay)) return (4); /* Legacy max supported number of frames */ return (query.max_nr_frames); } static inline unsigned int max_nr_grant_frames(void) { unsigned int xen_max = __max_nr_grant_frames(); if (xen_max > boot_max_nr_grant_frames) return (boot_max_nr_grant_frames); return (xen_max); } #ifdef notyet /* * XXX needed for backend support * */ static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { unsigned long **frames = (unsigned long **)data; set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); (*frames)++; return 0; } static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } #endif #ifndef XENHVM static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; u_long *frames; unsigned int nr_gframes = end_idx + 1; int i, rc; frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT); if (!frames) return (ENOMEM); setup.dom = DOMID_SELF; setup.nr_frames = nr_gframes; set_xen_guest_handle(setup.frame_list, frames); rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); if (rc == -ENOSYS) { free(frames, M_DEVBUF); return (ENOSYS); } KASSERT(!(rc || setup.status), ("unexpected result from grant_table_op")); if (shared == NULL) { vm_offset_t area; area = kva_alloc(PAGE_SIZE * max_nr_grant_frames()); KASSERT(area, ("can't allocate VM space for grant table")); shared = (grant_entry_t *)area; } for (i = 0; i < nr_gframes; i++) PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V); free(frames, M_DEVBUF); return (0); } int gnttab_resume(void) { if (max_nr_grant_frames() < nr_grant_frames) return (ENOSYS); return (gnttab_map(0, nr_grant_frames - 1)); } int gnttab_suspend(void) { int i; for (i = 0; i < nr_grant_frames; i++) pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE); return (0); } #else /* XENHVM */ #include static vm_paddr_t resume_frames; static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct xen_add_to_physmap xatp; unsigned int i = end_idx; /* * Loop backwards, so that the first hypercall has the largest index, * ensuring that the table will grow only once. */ do { xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed to map gnttab"); } while (i-- > start_idx); if (shared == NULL) { vm_offset_t area; area = kva_alloc(PAGE_SIZE * max_nr_grant_frames()); KASSERT(area, ("can't allocate VM space for grant table")); shared = (grant_entry_t *)area; } for (i = start_idx; i <= end_idx; i++) { pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE, resume_frames + i * PAGE_SIZE); } return (0); } int gnttab_resume(void) { int error; unsigned int max_nr_gframes, nr_gframes; nr_gframes = nr_grant_frames; max_nr_gframes = max_nr_grant_frames(); if (max_nr_gframes < nr_gframes) return (ENOSYS); if (!resume_frames) { error = xenpci_alloc_space(PAGE_SIZE * max_nr_gframes, &resume_frames); if (error) { printf("error mapping gnttab share frames\n"); return (error); } } return (gnttab_map(0, nr_gframes - 1)); } #endif static int gnttab_expand(unsigned int req_entries) { int error; unsigned int cur, extra; cur = nr_grant_frames; extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / GREFS_PER_GRANT_FRAME); if (cur + extra > max_nr_grant_frames()) return (ENOSPC); error = gnttab_map(cur, cur + extra - 1); if (!error) error = grow_gnttab_list(extra); return (error); } int gnttab_init() { int i; unsigned int max_nr_glist_frames; unsigned int nr_init_grefs; if (!is_running_on_xen()) return (ENODEV); nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ max_nr_glist_frames = (boot_max_nr_grant_frames * GREFS_PER_GRANT_FRAME / (PAGE_SIZE / sizeof(grant_ref_t))); gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), M_DEVBUF, M_NOWAIT); if (gnttab_list == NULL) return (ENOMEM); for (i = 0; i < nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (gnttab_list[i] == NULL) goto ini_nomem; } if (gnttab_resume()) return (ENODEV); nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; if (bootverbose) printf("Grant table initialized\n"); return (0); ini_nomem: for (i--; i >= 0; i--) free(gnttab_list[i], M_DEVBUF); free(gnttab_list, M_DEVBUF); return (ENOMEM); } MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); diff --git a/sys/xen/gnttab.h b/sys/xen/gnttab.h index 1741ec338720..d81e965fc43a 100644 --- a/sys/xen/gnttab.h +++ b/sys/xen/gnttab.h @@ -1,173 +1,172 @@ /****************************************************************************** * gnttab.h * * Two sets of functionality: * 1. Granting foreign access to our memory reservation. * 2. Accessing others' memory reservations via grant references. * (i.e., mechanisms for both sender and recipient of grant references) * * Copyright (c) 2004-2005, K A Fraser * Copyright (c) 2005, Christopher Clark * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef __ASM_GNTTAB_H__ -#include - +#include #include -#include -#include #include +#include + #define GNTTAB_LIST_END GRANT_REF_INVALID struct gnttab_free_callback { struct gnttab_free_callback *next; void (*fn)(void *); void *arg; uint16_t count; }; int gnttab_init(void); /* * Allocate a grant table reference and return it in *result. Returns * zero on success or errno on error. */ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int flags, grant_ref_t *result); /* * End access through the given grant reference, iff the grant entry is no * longer in use. Return 1 if the grant entry was freed, 0 if it is still in * use. */ int gnttab_end_foreign_access_ref(grant_ref_t ref); /* * Eventually end access through the given grant reference, and once that * access has been ended, free the given page too. Access will be ended * immediately iff the grant entry is not in use, otherwise it will happen * some time later. page may be 0, in which case no freeing will occur. */ void gnttab_end_foreign_access(grant_ref_t ref, void *page); /* * Eventually end access through the given array of grant references. * Access will be ended immediately iff the grant entry is not in use, * otherwise it will happen some time later */ void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs); int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); int gnttab_query_foreign_access(grant_ref_t ref); /* * operations on reserved batches of grant references */ int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *pprivate_head); void gnttab_free_grant_reference(grant_ref_t ref); void gnttab_free_grant_references(grant_ref_t head); int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); void gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release); void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, uint16_t count); void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int flags); void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, unsigned long pfn); int gnttab_suspend(void); int gnttab_resume(void); #if 0 #include static inline void gnttab_set_map_op(struct gnttab_map_grant_ref *map, vm_paddr_t addr, uint32_t flags, grant_ref_t ref, domid_t domid) { if (flags & GNTMAP_contains_pte) map->host_addr = addr; else if (xen_feature(XENFEAT_auto_translated_physmap)) map->host_addr = vtophys(addr); else map->host_addr = addr; map->flags = flags; map->ref = ref; map->dom = domid; } static inline void gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, vm_paddr_t addr, uint32_t flags, grant_handle_t handle) { if (flags & GNTMAP_contains_pte) unmap->host_addr = addr; else if (xen_feature(XENFEAT_auto_translated_physmap)) unmap->host_addr = vtophys(addr); else unmap->host_addr = addr; unmap->handle = handle; unmap->dev_bus_addr = 0; } static inline void gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, vm_paddr_t addr, vm_paddr_t new_addr, grant_handle_t handle) { if (xen_feature(XENFEAT_auto_translated_physmap)) { unmap->host_addr = vtophys(addr); unmap->new_addr = vtophys(new_addr); } else { unmap->host_addr = addr; unmap->new_addr = new_addr; } unmap->handle = handle; } #endif #endif /* __ASM_GNTTAB_H__ */ diff --git a/sys/xen/hvm.h b/sys/xen/hvm.h index 338a107ab84d..562aaf93ab05 100644 --- a/sys/xen/hvm.h +++ b/sys/xen/hvm.h @@ -1,94 +1,98 @@ /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef __XEN_HVM_H__ #define __XEN_HVM_H__ +#include +#include + #include /** * \brief Wrapper function to obtain a HVM parameter value. * * \param index HVM parameter index; see . * * \returns 0 on failure; the value of the parameter otherwise. */ static inline unsigned long hvm_get_parameter(int index) { struct xen_hvm_param xhv; int error; xhv.domid = DOMID_SELF; xhv.index = index; error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); if (error) { printf("%s: error %d trying to get %d\n", __func__, error, index); return (0); } return (xhv.value); } /** The callback method types for Hypervisor event delivery to our domain. */ enum { HVM_CB_TYPE_GSI, HVM_CB_TYPE_PCI_INTX, HVM_CB_TYPE_VECTOR, HVM_CB_TYPE_MASK = 0xFF, HVM_CB_TYPE_SHIFT = 56 }; /** Format for specifying a GSI type callback. */ enum { HVM_CB_GSI_GSI_MASK = 0xFFFFFFFF, HVM_CB_GSI_GSI_SHIFT = 0 }; #define HVM_CALLBACK_GSI(gsi) \ (((uint64_t)HVM_CB_TYPE_GSI << HVM_CB_TYPE_SHIFT) \ | ((gsi) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT) /** Format for specifying a virtual PCI interrupt line GSI style callback. */ enum { HVM_CB_PCI_INTX_INTPIN_MASK = 0x3, HVM_CB_PCI_INTX_INTPIN_SHIFT = 0, HVM_CB_PCI_INTX_SLOT_MASK = 0x1F, HVM_CB_PCI_INTX_SLOT_SHIFT = 11, }; #define HVM_CALLBACK_PCI_INTX(slot, pin) \ (((uint64_t)HVM_CB_TYPE_PCI_INTX << HVM_CB_TYPE_SHIFT) \ | (((slot) & HVM_CB_PCI_INTX_SLOT_MASK) << HVM_CB_PCI_INTX_SLOT_SHIFT) \ | (((pin) & HVM_CB_PCI_INTX_INTPIN_MASK) << HVM_CB_PCI_INTX_INTPIN_SHIFT)) /** Format for specifying a direct IDT vector injection style callback. */ enum { HVM_CB_VECTOR_VECTOR_MASK = 0xFFFFFFFF, HVM_CB_VECTOR_VECTOR_SHIFT = 0 }; #define HVM_CALLBACK_VECTOR(vector) \ (((uint64_t)HVM_CB_TYPE_VECTOR << HVM_CB_TYPE_SHIFT) \ | (((vector) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT)) void xen_hvm_set_callback(device_t); void xen_hvm_suspend(void); void xen_hvm_resume(void); +void xen_hvm_init_cpu(void); #endif /* __XEN_HVM_H__ */ diff --git a/sys/xen/interface/event_channel.h b/sys/xen/interface/event_channel.h index 07ff32194b40..65d19113fc23 100644 --- a/sys/xen/interface/event_channel.h +++ b/sys/xen/interface/event_channel.h @@ -1,294 +1,297 @@ /****************************************************************************** * event_channel.h * * Event channels between domains. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2003-2004, K A Fraser. */ #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ #include "xen.h" /* * `incontents 150 evtchn Event Channels * * Event channels are the basic primitive provided by Xen for event * notifications. An event is the Xen equivalent of a hardware * interrupt. They essentially store one bit of information, the event * of interest is signalled by transitioning this bit from 0 to 1. * * Notifications are received by a guest via an upcall from Xen, * indicating when an event arrives (setting the bit). Further * notifications are masked until the bit is cleared again (therefore, * guests must check the value of the bit after re-enabling event * delivery to ensure no missed notifications). * * Event notifications can be masked by setting a flag; this is * equivalent to disabling interrupts and can be used to ensure * atomicity of certain operations in the guest kernel. * * Event channels are represented by the evtchn_* fields in * struct shared_info and struct vcpu_info. */ /* * ` enum neg_errnoval * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) * ` * @cmd == EVTCHNOP_* (event-channel operation). * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). */ /* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ #define EVTCHNOP_bind_interdomain 0 #define EVTCHNOP_bind_virq 1 #define EVTCHNOP_bind_pirq 2 #define EVTCHNOP_close 3 #define EVTCHNOP_send 4 #define EVTCHNOP_status 5 #define EVTCHNOP_alloc_unbound 6 #define EVTCHNOP_bind_ipi 7 #define EVTCHNOP_bind_vcpu 8 #define EVTCHNOP_unmask 9 #define EVTCHNOP_reset 10 /* ` } */ +#ifndef __XEN_EVTCHN_PORT_DEFINED__ typedef uint32_t evtchn_port_t; DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); +#define __XEN_EVTCHN_PORT_DEFINED__ 1 +#endif /* * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as * accepting interdomain bindings from domain . A fresh port * is allocated in and returned as . * NOTES: * 1. If the caller is unprivileged then must be DOMID_SELF. * 2. may be DOMID_SELF, allowing loopback connections. */ struct evtchn_alloc_unbound { /* IN parameters */ domid_t dom, remote_dom; /* OUT parameters */ evtchn_port_t port; }; typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between * the calling domain and . must identify * a port that is unbound and marked as accepting bindings from the calling * domain. A fresh port is allocated in the calling domain and returned as * . * NOTES: * 1. may be DOMID_SELF, allowing loopback connections. */ struct evtchn_bind_interdomain { /* IN parameters. */ domid_t remote_dom; evtchn_port_t remote_port; /* OUT parameters. */ evtchn_port_t local_port; }; typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; /* * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified * vcpu. * NOTES: * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list * in xen.h for the classification of each VIRQ. * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be * re-bound via EVTCHNOP_bind_vcpu. * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. * The allocated event channel is bound to the specified vcpu and the * binding cannot be changed. */ struct evtchn_bind_virq { /* IN parameters. */ uint32_t virq; /* enum virq */ uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; }; typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). * NOTES: * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ struct evtchn_bind_pirq { /* IN parameters. */ uint32_t pirq; #define BIND_PIRQ__WILL_SHARE 1 uint32_t flags; /* BIND_PIRQ__* */ /* OUT parameters. */ evtchn_port_t port; }; typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. * NOTES: * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ struct evtchn_bind_ipi { uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; }; typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; /* * EVTCHNOP_close: Close a local event channel . If the channel is * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ struct evtchn_close { /* IN parameters. */ evtchn_port_t port; }; typedef struct evtchn_close evtchn_close_t; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ struct evtchn_send { /* IN parameters. */ evtchn_port_t port; }; typedef struct evtchn_send evtchn_send_t; /* * EVTCHNOP_status: Get the current status of the communication channel which * has an endpoint at . * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which is not DOMID_SELF. */ struct evtchn_status { /* IN parameters */ domid_t dom; evtchn_port_t port; /* OUT parameters */ #define EVTCHNSTAT_closed 0 /* Channel is not in use. */ #define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ #define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ #define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ #define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ #define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ uint32_t status; uint32_t vcpu; /* VCPU to which this channel is bound. */ union { struct { domid_t dom; } unbound; /* EVTCHNSTAT_unbound */ struct { domid_t dom; evtchn_port_t port; } interdomain; /* EVTCHNSTAT_interdomain */ uint32_t pirq; /* EVTCHNSTAT_pirq */ uint32_t virq; /* EVTCHNSTAT_virq */ } u; }; typedef struct evtchn_status evtchn_status_t; /* * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an * event is pending. * NOTES: * 1. IPI-bound channels always notify the vcpu specified at bind time. * This binding cannot be changed. * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. * This binding cannot be changed. * 3. All other channels notify vcpu0 by default. This default is set when * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ struct evtchn_bind_vcpu { /* IN parameters. */ evtchn_port_t port; uint32_t vcpu; }; typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; /* * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ struct evtchn_unmask { /* IN parameters. */ evtchn_port_t port; }; typedef struct evtchn_unmask evtchn_unmask_t; /* * EVTCHNOP_reset: Close all event channels associated with specified domain. * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. */ struct evtchn_reset { /* IN parameters. */ domid_t dom; }; typedef struct evtchn_reset evtchn_reset_t; /* * ` enum neg_errnoval * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) * ` * Superceded by new event_channel_op() hypercall since 0x00030202. */ struct evtchn_op { uint32_t cmd; /* enum event_channel_op */ union { struct evtchn_alloc_unbound alloc_unbound; struct evtchn_bind_interdomain bind_interdomain; struct evtchn_bind_virq bind_virq; struct evtchn_bind_pirq bind_pirq; struct evtchn_bind_ipi bind_ipi; struct evtchn_close close; struct evtchn_send send; struct evtchn_status status; struct evtchn_bind_vcpu bind_vcpu; struct evtchn_unmask unmask; } u; }; typedef struct evtchn_op evtchn_op_t; DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */ diff --git a/sys/xen/xen-os.h b/sys/xen/xen-os.h new file mode 100644 index 000000000000..95e8c6a3a4b7 --- /dev/null +++ b/sys/xen/xen-os.h @@ -0,0 +1,95 @@ +/****************************************************************************** + * xen/xen-os.h + * + * Random collection of macros and definition + * + * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _XEN_XEN_OS_H_ +#define _XEN_XEN_OS_H_ + +#if !defined(__XEN_INTERFACE_VERSION__) +#define __XEN_INTERFACE_VERSION__ 0x00030208 +#endif + +#define GRANT_REF_INVALID 0xffffffff + +#ifdef LOCORE +#define __ASSEMBLY__ +#endif + +#include + +#include + +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + +/* Force a proper event-channel callback from Xen. */ +void force_evtchn_callback(void); + +extern int gdtset; + +extern shared_info_t *HYPERVISOR_shared_info; + +enum xen_domain_type { + XEN_NATIVE, /* running on bare hardware */ + XEN_PV_DOMAIN, /* running in a PV domain */ + XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ +}; + +extern enum xen_domain_type xen_domain_type; + +static inline int +xen_domain(void) +{ + return (xen_domain_type != XEN_NATIVE); +} + +static inline int +xen_pv_domain(void) +{ + return (xen_domain_type == XEN_PV_DOMAIN); +} + +static inline int +xen_hvm_domain(void) +{ + return (xen_domain_type == XEN_HVM_DOMAIN); +} + +#ifndef xen_mb +#define xen_mb() mb() +#endif +#ifndef xen_rmb +#define xen_rmb() rmb() +#endif +#ifndef xen_wmb +#define xen_wmb() wmb() +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif /* _XEN_XEN_OS_H_ */ diff --git a/sys/xen/xen_intr.h b/sys/xen/xen_intr.h index 2e753e65ecb3..109608ffa81f 100644 --- a/sys/xen/xen_intr.h +++ b/sys/xen/xen_intr.h @@ -1,103 +1,216 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ +/****************************************************************************** + * xen_intr.h + * + * APIs for managing Xen event channel, virtual IRQ, and physical IRQ + * notifications. + * + * Copyright (c) 2004, K A Fraser + * Copyright (c) 2012, Spectra Logic Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD$ + */ #ifndef _XEN_INTR_H_ #define _XEN_INTR_H_ -/* -* The flat IRQ space is divided into two regions: -* 1. A one-to-one mapping of real physical IRQs. This space is only used -* if we have physical device-access privilege. This region is at the -* start of the IRQ space so that existing device drivers do not need -* to be modified to translate physical IRQ numbers into our IRQ space. -* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These -* are bound using the provided bind/unbind functions. -* -* -* $FreeBSD$ -*/ - -#define PIRQ_BASE 0 -#define NR_PIRQS 128 - -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) -#define NR_DYNIRQS 128 - -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) - -#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) -#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) - -#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) -#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) - -/* - * Dynamic binding of event channels and VIRQ sources to guest IRQ space. +#ifndef __XEN_EVTCHN_PORT_DEFINED__ +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); +#define __XEN_EVTCHN_PORT_DEFINED__ 1 +#endif + +/** Registered Xen interrupt callback handle. */ +typedef void * xen_intr_handle_t; + +/** If non-zero, the hypervisor has been configured to use a direct vector */ +extern int xen_vector_callback_enabled; + +/** + * Associate an already allocated local event channel port an interrupt + * handler. + * + * \param dev The device making this bind request. + * \param local_port The event channel to bind. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ - -/* - * Bind a caller port event channel to an interrupt handler. If - * successful, the guest IRQ number is returned in *irqp. Return zero - * on success or errno otherwise. +int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type irqflags, xen_intr_handle_t *handlep); + +/** + * Allocate a local event channel port, accessible by the specified + * remote/foreign domain and, if successful, associate the port with + * the specified interrupt handler. + * + * \param dev The device making this bind request. + * \param remote_domain Remote domain grant permission to signal the + * newly allocated local port. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_caller_port_to_irqhandler(unsigned int caller_port, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); - -/* - * Bind a listening port to an interrupt handler. If successful, the - * guest IRQ number is returned in *irqp. Return zero on success or - * errno otherwise. +int xen_intr_alloc_and_bind_local_port(device_t dev, + u_int remote_domain, driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); + +/** + * Associate the specified interrupt handler with the remote event + * channel port specified by remote_domain and remote_port. + * + * \param dev The device making this bind request. + * \param remote_domain The domain peer for this event channel connection. + * \param remote_port Remote domain's local port number for this event + * channel port. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_listening_port_to_irqhandler(unsigned int remote_domain, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); - -/* - * Bind a VIRQ to an interrupt handler. If successful, the guest IRQ - * number is returned in *irqp. Return zero on success or errno - * otherwise. +int xen_intr_bind_remote_port(device_t dev, u_int remote_domain, + evtchn_port_t remote_port, driver_filter_t filter, + driver_intr_t handler, void *arg, enum intr_type irqflags, + xen_intr_handle_t *handlep); + +/** + * Associate the specified interrupt handler with the specified Xen + * virtual interrupt source. + * + * \param dev The device making this bind request. + * \param virq The Xen virtual IRQ number for the Xen interrupt + * source being hooked. + * \param cpu The cpu on which interrupt events should be delivered. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - const char *devname, driver_filter_t filter, driver_intr_t handler, - void *arg, unsigned long irqflags, unsigned int *irqp); - -/* - * Bind an IPI to an interrupt handler. If successful, the guest - * IRQ number is returned in *irqp. Return zero on success or errno - * otherwise. +int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, + driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); + +/** + * Associate an interprocessor interrupt vector with an interrupt handler. + * + * \param dev The device making this bind request. + * \param ipi The interprocessor interrupt vector number of the + * interrupt source being hooked. + * \param cpu The cpu receiving the IPI. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, - const char *devname, driver_filter_t filter, - unsigned long irqflags, unsigned int *irqp); - -/* - * Bind an interdomain event channel to an interrupt handler. If - * successful, the guest IRQ number is returned in *irqp. Return zero - * on success or errno otherwise. +int xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu, + driver_filter_t filter, enum intr_type irqflags, + xen_intr_handle_t *handlep); + +/** + * Unbind an interrupt handler from its interrupt source. + * + * \param handlep A pointer to the opaque handle that was initialized + * at the time the interrupt source was bound. + * + * \returns 0 on success, otherwise an errno. + * + * \note The event channel, if any, that was allocated at bind time is + * closed upon successful return of this method. + * + * \note It is always safe to call xen_intr_unbind() on a handle that + * has been initilized to NULL. */ -extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, const char *devname, - driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); - -/* - * Unbind an interrupt handler using the guest IRQ number returned - * when it was bound. +void xen_intr_unbind(xen_intr_handle_t *handle); + +/** + * Add a description to an interrupt handler. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \param fmt The sprintf compatible format string for the description, + * followed by optional sprintf arguments. + * + * \returns 0 on success, otherwise an errno. */ -extern void unbind_from_irqhandler(unsigned int irq); - -static __inline__ int irq_cannonicalize(unsigned int irq) -{ - return (irq == 2) ? 9 : irq; -} - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -extern void irq_suspend(void); -extern void irq_resume(void); - -extern void idle_block(void); -extern int ap_cpu_initclocks(int cpu); +int +xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +/** + * Signal the remote peer of an interrupt source associated with an + * event channel port. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \note For xen interrupt sources other than event channel ports, + * this method takes no action. + */ +void xen_intr_signal(xen_intr_handle_t handle); + +/** + * Get the local event channel port number associated with this interrupt + * source. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \returns 0 if the handle is invalid, otherwise positive port number. + */ +evtchn_port_t xen_intr_port(xen_intr_handle_t handle); #endif /* _XEN_INTR_H_ */ diff --git a/sys/xen/xenbus/xenbus.c b/sys/xen/xenbus/xenbus.c index 8887066c7e4b..c59d4aec4532 100644 --- a/sys/xen/xenbus/xenbus.c +++ b/sys/xen/xenbus/xenbus.c @@ -1,294 +1,259 @@ /****************************************************************************** * Copyright (C) 2005 XenSource Ltd * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /** * \file xenbus.c * * \brief Client-facing interface for the Xenbus driver. * * In other words, the interface between the Xenbus and the device-specific * code, be it the frontend or the backend of that driver. */ #if 0 #define DPRINTK(fmt, args...) \ printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) #else #define DPRINTK(fmt, args...) ((void)0) #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include -#include +#include #include #include #include #include + #include MALLOC_DEFINE(M_XENBUS, "xenbus", "XenBus Support"); /*------------------------- Private Functions --------------------------------*/ /** * \brief Construct the error path corresponding to the given XenBus * device. * * \param dev The XenBus device for which we are constructing an error path. * * \return On success, the contructed error path. Otherwise NULL. * * It is the caller's responsibility to free any returned error path * node using the M_XENBUS malloc type. */ static char * error_path(device_t dev) { char *path_buffer = malloc(strlen("error/") + strlen(xenbus_get_node(dev)) + 1,M_XENBUS, M_WAITOK); strcpy(path_buffer, "error/"); strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev)); return (path_buffer); } /*--------------------------- Public Functions -------------------------------*/ /*-------- API comments for these methods can be found in xenbusvar.h --------*/ const char * xenbus_strstate(XenbusState state) { static const char *const name[] = { [ XenbusStateUnknown ] = "Unknown", [ XenbusStateInitialising ] = "Initialising", [ XenbusStateInitWait ] = "InitWait", [ XenbusStateInitialised ] = "Initialised", [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", [ XenbusStateClosed ] = "Closed", }; return ((state < (XenbusStateClosed + 1)) ? name[state] : "INVALID"); } int xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, xs_watch_cb_t *callback, uintptr_t callback_data) { int error; watch->node = path; watch->callback = callback; watch->callback_data = callback_data; error = xs_register_watch(watch); if (error) { watch->node = NULL; watch->callback = NULL; xenbus_dev_fatal(dev, error, "adding watch on %s", path); } return (error); } int xenbus_watch_path2(device_t dev, const char *path, const char *path2, struct xs_watch *watch, xs_watch_cb_t *callback, uintptr_t callback_data) { int error; char *state = malloc(strlen(path) + 1 + strlen(path2) + 1, M_XENBUS, M_WAITOK); strcpy(state, path); strcat(state, "/"); strcat(state, path2); error = xenbus_watch_path(dev, state, watch, callback, callback_data); if (error) { free(state,M_XENBUS); } return (error); } void xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap) { int ret; unsigned int len; char *printf_buffer = NULL, *path_buffer = NULL; #define PRINTF_BUFFER_SIZE 4096 printf_buffer = malloc(PRINTF_BUFFER_SIZE,M_XENBUS, M_WAITOK); len = sprintf(printf_buffer, "%i ", err); ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big")); device_printf(dev, "Error %s\n", printf_buffer); path_buffer = error_path(dev); if (path_buffer == NULL) { printf("xenbus: failed to write error node for %s (%s)\n", xenbus_get_node(dev), printf_buffer); goto fail; } if (xs_write(XST_NIL, path_buffer, "error", printf_buffer) != 0) { printf("xenbus: failed to write error node for %s (%s)\n", xenbus_get_node(dev), printf_buffer); goto fail; } fail: if (printf_buffer) free(printf_buffer,M_XENBUS); if (path_buffer) free(path_buffer,M_XENBUS); } void xenbus_dev_error(device_t dev, int err, const char *fmt, ...) { va_list ap; va_start(ap, fmt); xenbus_dev_verror(dev, err, fmt, ap); va_end(ap); } void xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list ap) { xenbus_dev_verror(dev, err, fmt, ap); device_printf(dev, "Fatal error. Transitioning to Closing State\n"); xenbus_set_state(dev, XenbusStateClosing); } void xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) { va_list ap; va_start(ap, fmt); xenbus_dev_vfatal(dev, err, fmt, ap); va_end(ap); } int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp) { int error; error = gnttab_grant_foreign_access( xenbus_get_otherend_id(dev), ring_mfn, 0, refp); if (error) { xenbus_dev_fatal(dev, error, "granting access to ring page"); return (error); } return (0); } -int -xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port) -{ - struct evtchn_alloc_unbound alloc_unbound; - int err; - - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = xenbus_get_otherend_id(dev); - - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - - if (err) { - xenbus_dev_fatal(dev, -err, "allocating event channel"); - return (-err); - } - *port = alloc_unbound.port; - return (0); -} - -int -xenbus_free_evtchn(device_t dev, evtchn_port_t port) -{ - struct evtchn_close close; - int err; - - close.port = port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); - if (err) { - xenbus_dev_error(dev, -err, "freeing event channel %d", port); - return (-err); - } - return (0); -} - XenbusState xenbus_read_driver_state(const char *path) { XenbusState result; int error; error = xs_gather(XST_NIL, path, "state", "%d", &result, NULL); if (error) result = XenbusStateClosed; return (result); } int xenbus_dev_is_online(device_t dev) { const char *path; int error; int value; path = xenbus_get_node(dev); error = xs_gather(XST_NIL, path, "online", "%d", &value, NULL); if (error != 0) { /* Default to not online. */ value = 0; } return (value); } void xenbus_localend_changed(device_t dev, const char *path) { } diff --git a/sys/xen/xenbus/xenbus_if.m b/sys/xen/xenbus/xenbus_if.m index 87d7c7fef9d2..fd9ae51ce1cf 100644 --- a/sys/xen/xenbus/xenbus_if.m +++ b/sys/xen/xenbus/xenbus_if.m @@ -1,63 +1,64 @@ #- # Copyright (c) 2008 Doug Rabson # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include #include -#include + +#include #include #include INTERFACE xenbus; /** * \brief Callback triggered when the state of the otherend * of a split device changes. * * \param _dev NewBus device_t for this XenBus device whose otherend's * state has changed.. * \param _newstate The new state of the otherend device. */ METHOD void otherend_changed { device_t _dev; enum xenbus_state _newstate; }; /** * \brief Callback triggered when the XenStore tree of the local end * of a split device changes. * * \param _dev NewBus device_t for this XenBus device whose otherend's * state has changed.. * \param _path The tree relative sub-path to the modified node. The empty * string indicates the root of the tree was destroyed. */ METHOD void localend_changed { device_t _dev; const char * _path; } DEFAULT xenbus_localend_changed; diff --git a/sys/xen/xenbus/xenbusb_front.c b/sys/xen/xenbus/xenbusb_front.c index 818e7f0a2cc6..145d52718868 100644 --- a/sys/xen/xenbus/xenbusb_front.c +++ b/sys/xen/xenbus/xenbusb_front.c @@ -1,196 +1,196 @@ /****************************************************************************** * Talks to Xen Store to figure out what devices we have. * * Copyright (C) 2009, 2010 Spectra Logic Corporation * Copyright (C) 2008 Doug Rabson * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard * Copyright (C) 2005 XenSource Ltd * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /** * \file xenbusb_front.c * * XenBus management of the NewBus bus containing the frontend instances of * Xen split devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include -#include #include +#include #include #include #include /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Probe for the existance of the XenBus front bus. * * \param dev NewBus device_t for this XenBus front bus instance. * * \return Always returns 0 indicating success. */ static int xenbusb_front_probe(device_t dev) { device_set_desc(dev, "Xen Frontend Devices"); return (0); } /** * \brief Attach the XenBus front bus. * * \param dev NewBus device_t for this XenBus front bus instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenbusb_front_attach(device_t dev) { return (xenbusb_attach(dev, "device", /*id_components*/1)); } /** * \brief Enumerate all devices of the given type on this bus. * * \param dev NewBus device_t for this XenBus front bus instance. * \param type String indicating the device sub-tree (e.g. "vfb", "vif") * to enumerate. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * Devices that are found are entered into the NewBus hierarchy via * xenbusb_add_device(). xenbusb_add_device() ignores duplicate detects * and ignores duplicate devices, so it can be called unconditionally * for any device found in the XenStore. */ static int xenbusb_front_enumerate_type(device_t dev, const char *type) { struct xenbusb_softc *xbs; const char **dir; unsigned int i, count; int error; xbs = device_get_softc(dev); error = xs_directory(XST_NIL, xbs->xbs_node, type, &count, &dir); if (error) return (error); for (i = 0; i < count; i++) xenbusb_add_device(dev, type, dir[i]); free(dir, M_XENSTORE); return (0); } /** * \brief Determine and store the XenStore path for the other end of * a split device whose local end is represented by ivars. * * If successful, the xd_otherend_path field of the child's instance * variables will be updated. * * \param dev NewBus device_t for this XenBus front bus instance. * \param ivars Instance variables from the XenBus child device for * which to perform this function. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenbusb_front_get_otherend_node(device_t dev, struct xenbus_device_ivars *ivars) { char *otherend_path; int error; if (ivars->xd_otherend_path != NULL) { free(ivars->xd_otherend_path, M_XENBUS); ivars->xd_otherend_path = NULL; } error = xs_gather(XST_NIL, ivars->xd_node, "backend-id", "%i", &ivars->xd_otherend_id, "backend", NULL, &otherend_path, NULL); if (error == 0) { ivars->xd_otherend_path = strdup(otherend_path, M_XENBUS); ivars->xd_otherend_path_len = strlen(otherend_path); free(otherend_path, M_XENSTORE); } return (error); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenbusb_front_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xenbusb_identify), DEVMETHOD(device_probe, xenbusb_front_probe), DEVMETHOD(device_attach, xenbusb_front_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, xenbusb_resume), /* Bus Interface */ DEVMETHOD(bus_print_child, xenbusb_print_child), DEVMETHOD(bus_read_ivar, xenbusb_read_ivar), DEVMETHOD(bus_write_ivar, xenbusb_write_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), /* XenBus Bus Interface */ DEVMETHOD(xenbusb_enumerate_type, xenbusb_front_enumerate_type), DEVMETHOD(xenbusb_get_otherend_node, xenbusb_front_get_otherend_node), { 0, 0 } }; DEFINE_CLASS_0(xenbusb_front, xenbusb_front_driver, xenbusb_front_methods, sizeof(struct xenbusb_softc)); devclass_t xenbusb_front_devclass; DRIVER_MODULE(xenbusb_front, xenstore, xenbusb_front_driver, xenbusb_front_devclass, 0, 0); diff --git a/sys/xen/xenbus/xenbusvar.h b/sys/xen/xenbus/xenbusvar.h index 1c730fb900a5..ab5d01fa6b64 100644 --- a/sys/xen/xenbus/xenbusvar.h +++ b/sys/xen/xenbus/xenbusvar.h @@ -1,308 +1,275 @@ /****************************************************************************** * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 XenSource Ltd. * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * $FreeBSD$ */ /** * \file xenbusvar.h * * \brief Datastructures and function declarations for usedby device * drivers operating on the XenBus. */ #ifndef _XEN_XENBUS_XENBUSVAR_H #define _XEN_XENBUS_XENBUSVAR_H #include #include #include #include #include #include -#include +#include #include #include #include #include /* XenBus allocations including XenStore data returned to clients. */ MALLOC_DECLARE(M_XENBUS); enum { /** * Path of this device node. */ XENBUS_IVAR_NODE, /** * The device type (e.g. vif, vbd). */ XENBUS_IVAR_TYPE, /** * The state of this device (not the otherend's state). */ XENBUS_IVAR_STATE, /** * Domain ID of the other end device. */ XENBUS_IVAR_OTHEREND_ID, /** * Path of the other end device. */ XENBUS_IVAR_OTHEREND_PATH }; /** * Simplified accessors for xenbus devices */ #define XENBUS_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(xenbus, var, XENBUS, ivar, type) XENBUS_ACCESSOR(node, NODE, const char *) XENBUS_ACCESSOR(type, TYPE, const char *) XENBUS_ACCESSOR(state, STATE, enum xenbus_state) XENBUS_ACCESSOR(otherend_id, OTHEREND_ID, int) XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *) /** * Return the state of a XenBus device. * * \param path The root XenStore path for the device. * * \return The current state of the device or XenbusStateClosed if no * state can be read. */ XenbusState xenbus_read_driver_state(const char *path); /** * Return the state of the "other end" (peer) of a XenBus device. * * \param dev The XenBus device whose peer to query. * * \return The current state of the peer device or XenbusStateClosed if no * state can be read. */ static inline XenbusState xenbus_get_otherend_state(device_t dev) { return (xenbus_read_driver_state(xenbus_get_otherend_path(dev))); } /** * Initialize and register a watch on the given path (client suplied storage). * * \param dev The XenBus device requesting the watch service. * \param path The XenStore path of the object to be watched. The * storage for this string must be stable for the lifetime * of the watch. * \param watch The watch object to use for this request. This object * must be stable for the lifetime of the watch. * \param callback The function to call when XenStore objects at or below * path are modified. * \param cb_data Client data that can be retrieved from the watch object * during the callback. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * \note On error, the device 'dev' will be switched to the XenbusStateClosing * state and the returned error is saved in the per-device error node * for dev in the XenStore. */ int xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, xs_watch_cb_t *callback, uintptr_t cb_data); /** * Initialize and register a watch at path/path2 in the XenStore. * * \param dev The XenBus device requesting the watch service. * \param path The base XenStore path of the object to be watched. * \param path2 The tail XenStore path of the object to be watched. * \param watch The watch object to use for this request. This object * must be stable for the lifetime of the watch. * \param callback The function to call when XenStore objects at or below * path are modified. * \param cb_data Client data that can be retrieved from the watch object * during the callback. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * \note On error, \a dev will be switched to the XenbusStateClosing * state and the returned error is saved in the per-device error node * for \a dev in the XenStore. * * Similar to xenbus_watch_path, however the storage for the path to the * watched object is allocated from the heap and filled with "path '/' path2". * Should a call to this function succeed, it is the callers responsibility * to free watch->node using the M_XENBUS malloc type. */ int xenbus_watch_path2(device_t dev, const char *path, const char *path2, struct xs_watch *watch, xs_watch_cb_t *callback, uintptr_t cb_data); /** * Grant access to the given ring_mfn to the peer of the given device. * * \param dev The device granting access to the ring page. * \param ring_mfn The guest machine page number of the page to grant * peer access rights. * \param refp[out] The grant reference for the page. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * A successful call to xenbus_grant_ring should be paired with a call * to gnttab_end_foreign_access() when foregn access to this page is no * longer requried. * * \note On error, \a dev will be switched to the XenbusStateClosing * state and the returned error is saved in the per-device error node * for \a dev in the XenStore. */ int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp); -/** - * Allocate an event channel for the given XenBus device. - * - * \param dev The device for which to allocate the event channel. - * \param port[out] The port identifier for the allocated event channel. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * A successfully allocated event channel should be free'd using - * xenbus_free_evtchn(). - * - * \note On error, \a dev will be switched to the XenbusStateClosing - * state and the returned error is saved in the per-device error node - * for \a dev in the XenStore. - */ -int xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port); - -/** - * Free an existing event channel. - * - * \param dev The device which allocated this event channel. - * \param port The port identifier for the event channel to free. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * \note On error, \a dev will be switched to the XenbusStateClosing - * state and the returned error is saved in the per-device error node - * for \a dev in the XenStore. - */ -int xenbus_free_evtchn(device_t dev, evtchn_port_t port); - /** * Record the given errno, along with the given, printf-style, formatted * message in dev's device specific error node in the XenStore. * * \param dev The device which encountered the error. * \param err The errno value corresponding to the error. * \param fmt Printf format string followed by a variable number of * printf arguments. */ void xenbus_dev_error(device_t dev, int err, const char *fmt, ...) __attribute__((format(printf, 3, 4))); /** * va_list version of xenbus_dev_error(). * * \param dev The device which encountered the error. * \param err The errno value corresponding to the error. * \param fmt Printf format string. * \param ap Va_list of printf arguments. */ void xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap) __attribute__((format(printf, 3, 0))); /** * Equivalent to xenbus_dev_error(), followed by * xenbus_set_state(dev, XenbusStateClosing). * * \param dev The device which encountered the error. * \param err The errno value corresponding to the error. * \param fmt Printf format string followed by a variable number of * printf arguments. */ void xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) __attribute__((format(printf, 3, 4))); /** * va_list version of xenbus_dev_fatal(). * * \param dev The device which encountered the error. * \param err The errno value corresponding to the error. * \param fmt Printf format string. * \param ap Va_list of printf arguments. */ void xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list) __attribute__((format(printf, 3, 0))); /** * Convert a member of the xenbus_state enum into an ASCII string. * * /param state The XenBus state to lookup. * * /return A string representing state or, for unrecognized states, * the string "Unknown". */ const char *xenbus_strstate(enum xenbus_state state); /** * Return the value of a XenBus device's "online" node within the XenStore. * * \param dev The XenBus device to query. * * \return The value of the "online" node for the device. If the node * does not exist, 0 (offline) is returned. */ int xenbus_dev_is_online(device_t dev); /** * Default callback invoked when a change to the local XenStore sub-tree * for a device is modified. * * \param dev The XenBus device whose tree was modified. * \param path The tree relative sub-path to the modified node. The empty * string indicates the root of the tree was destroyed. */ void xenbus_localend_changed(device_t dev, const char *path); #include "xenbus_if.h" #endif /* _XEN_XENBUS_XENBUSVAR_H */ diff --git a/sys/xen/xenstore/xenstore.c b/sys/xen/xenstore/xenstore.c index a07a26266e43..d404862ab55d 100644 --- a/sys/xen/xenstore/xenstore.c +++ b/sys/xen/xenstore/xenstore.c @@ -1,1663 +1,1659 @@ /****************************************************************************** * xenstore.c * * Low-level kernel interface to the XenStore. * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2009,2010 Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include -#include +#include #include #include #include #include #include #include #include #include #include /** * \file xenstore.c * \brief XenStore interface * * The XenStore interface is a simple storage system that is a means of * communicating state and configuration data between the Xen Domain 0 * and the various guest domains. All configuration data other than * a small amount of essential information required during the early * boot process of launching a Xen aware guest, is managed using the * XenStore. * * The XenStore is ASCII string based, and has a structure and semantics * similar to a filesystem. There are files and directories, the directories * able to contain files or other directories. The depth of the hierachy * is only limited by the XenStore's maximum path length. * * The communication channel between the XenStore service and other * domains is via two, guest specific, ring buffers in a shared memory * area. One ring buffer is used for communicating in each direction. * The grant table references for this shared memory are given to the * guest either via the xen_start_info structure for a fully para- * virtualized guest, or via HVM hypercalls for a hardware virtualized * guest. * * The XenStore communication relies on an event channel and thus * interrupts. For this reason, the attachment of the XenStore * relies on an interrupt driven configuration hook to hold off * boot processing until communication with the XenStore service * can be established. * * Several Xen services depend on the XenStore, most notably the * XenBus used to discover and manage Xen devices. These services * are implemented as NewBus child attachments to a bus exported * by this XenStore driver. */ static struct xs_watch *find_watch(const char *token); MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results"); /** * Pointer to shared memory communication structures allowing us * to communicate with the XenStore service. * * When operating in full PV mode, this pointer is set early in kernel * startup from within xen_machdep.c. In HVM mode, we use hypercalls * to get the guest frame number for the shared page and then map it * into kva. See xs_init() for details. */ struct xenstore_domain_interface *xen_store; /*-------------------------- Private Data Structures ------------------------*/ /** * Structure capturing messages received from the XenStore service. */ struct xs_stored_msg { TAILQ_ENTRY(xs_stored_msg) list; struct xsd_sockmsg hdr; union { /* Queued replies. */ struct { char *body; } reply; /* Queued watch events. */ struct { struct xs_watch *handle; const char **vec; u_int vec_size; } watch; } u; }; TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg); /** * Container for all XenStore related state. */ struct xs_softc { /** Newbus device for the XenStore. */ device_t xs_dev; /** * Lock serializing access to ring producer/consumer * indexes. Use of this lock guarantees that wakeups * of blocking readers/writers are not missed due to * races with the XenStore service. */ struct mtx ring_lock; /* * Mutex used to insure exclusive access to the outgoing * communication ring. We use a lock type that can be * held while sleeping so that xs_write() can block waiting * for space in the ring to free up, without allowing another * writer to come in and corrupt a partial message write. */ struct sx request_mutex; /** * A list of replies to our requests. * * The reply list is filled by xs_rcv_thread(). It * is consumed by the context that issued the request * to which a reply is made. The requester blocks in * xs_read_reply(). * * /note Only one requesting context can be active at a time. * This is guaranteed by the request_mutex and insures * that the requester sees replies matching the order * of its requests. */ struct xs_stored_msg_list reply_list; /** Lock protecting the reply list. */ struct mtx reply_lock; /** * List of registered watches. */ struct xs_watch_list registered_watches; /** Lock protecting the registered watches list. */ struct mtx registered_watches_lock; /** * List of pending watch callback events. */ struct xs_stored_msg_list watch_events; /** Lock protecting the watch calback list. */ struct mtx watch_events_lock; /** * Sleepable lock used to prevent VM suspension while a * xenstore transaction is outstanding. * * Each active transaction holds a shared lock on the * suspend mutex. Our suspend method blocks waiting * to acquire an exclusive lock. This guarantees that * suspend processing will only proceed once all active * transactions have been retired. */ struct sx suspend_mutex; /** * The processid of the xenwatch thread. */ pid_t xenwatch_pid; /** * Sleepable mutex used to gate the execution of XenStore * watch event callbacks. * * xenwatch_thread holds an exclusive lock on this mutex * while delivering event callbacks, and xenstore_unregister_watch() * uses an exclusive lock of this mutex to guarantee that no * callbacks of the just unregistered watch are pending * before returning to its caller. */ struct sx xenwatch_mutex; #ifdef XENHVM /** * The HVM guest pseudo-physical frame number. This is Xen's mapping * of the true machine frame number into our "physical address space". */ unsigned long gpfn; #endif /** * The event channel for communicating with the * XenStore service. */ int evtchn; - /** Interrupt number for our event channel. */ - u_int irq; + /** Handle for XenStore interrupts. */ + xen_intr_handle_t xen_intr_handle; /** * Interrupt driven config hook allowing us to defer * attaching children until interrupts (and thus communication * with the XenStore service) are available. */ struct intr_config_hook xs_attachcb; }; /*-------------------------------- Global Data ------------------------------*/ static struct xs_softc xs; /*------------------------- Private Utility Functions -----------------------*/ /** * Count and optionally record pointers to a number of NUL terminated * strings in a buffer. * * \param strings A pointer to a contiguous buffer of NUL terminated strings. * \param dest An array to store pointers to each string found in strings. * \param len The length of the buffer pointed to by strings. * * \return A count of the number of strings found. */ static u_int extract_strings(const char *strings, const char **dest, u_int len) { u_int num; const char *p; for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) { if (dest != NULL) *dest++ = p; num++; } return (num); } /** * Convert a contiguous buffer containing a series of NUL terminated * strings into an array of pointers to strings. * * The returned pointer references the array of string pointers which * is followed by the storage for the string data. It is the client's * responsibility to free this storage. * * The storage addressed by strings is free'd prior to split returning. * * \param strings A pointer to a contiguous buffer of NUL terminated strings. * \param len The length of the buffer pointed to by strings. * \param num The number of strings found and returned in the strings * array. * * \return An array of pointers to the strings found in the input buffer. */ static const char ** split(char *strings, u_int len, u_int *num) { const char **ret; /* Protect against unterminated buffers. */ if (len > 0) strings[len - 1] = '\0'; /* Count the strings. */ *num = extract_strings(strings, /*dest*/NULL, len); /* Transfer to one big alloc for easy freeing by the caller. */ ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK); memcpy(&ret[*num], strings, len); free(strings, M_XENSTORE); /* Extract pointers to newly allocated array. */ strings = (char *)&ret[*num]; (void)extract_strings(strings, /*dest*/ret, len); return (ret); } /*------------------------- Public Utility Functions -------------------------*/ /*------- API comments for these methods can be found in xenstorevar.h -------*/ struct sbuf * xs_join(const char *dir, const char *name) { struct sbuf *sb; sb = sbuf_new_auto(); sbuf_cat(sb, dir); if (name[0] != '\0') { sbuf_putc(sb, '/'); sbuf_cat(sb, name); } sbuf_finish(sb); return (sb); } /*-------------------- Low Level Communication Management --------------------*/ /** * Interrupt handler for the XenStore event channel. * * XenStore reads and writes block on "xen_store" for buffer * space. Wakeup any blocking operations when the XenStore * service has modified the queues. */ static void xs_intr(void * arg __unused /*__attribute__((unused))*/) { /* * Hold ring lock across wakeup so that clients * cannot miss a wakeup. */ mtx_lock(&xs.ring_lock); wakeup(xen_store); mtx_unlock(&xs.ring_lock); } /** * Verify that the indexes for a ring are valid. * * The difference between the producer and consumer cannot * exceed the size of the ring. * * \param cons The consumer index for the ring to test. * \param prod The producer index for the ring to test. * * \retval 1 If indexes are in range. * \retval 0 If the indexes are out of range. */ static int xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) { return ((prod - cons) <= XENSTORE_RING_SIZE); } /** * Return a pointer to, and the length of, the contiguous * free region available for output in a ring buffer. * * \param cons The consumer index for the ring. * \param prod The producer index for the ring. * \param buf The base address of the ring's storage. * \param len The amount of contiguous storage available. * * \return A pointer to the start location of the free region. */ static void * xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, char *buf, uint32_t *len) { *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) *len = XENSTORE_RING_SIZE - (prod - cons); return (buf + MASK_XENSTORE_IDX(prod)); } /** * Return a pointer to, and the length of, the contiguous * data available to read from a ring buffer. * * \param cons The consumer index for the ring. * \param prod The producer index for the ring. * \param buf The base address of the ring's storage. * \param len The amount of contiguous data available to read. * * \return A pointer to the start location of the available data. */ static const void * xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, const char *buf, uint32_t *len) { *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); if ((prod - cons) < *len) *len = prod - cons; return (buf + MASK_XENSTORE_IDX(cons)); } /** * Transmit data to the XenStore service. * * \param tdata A pointer to the contiguous data to send. * \param len The amount of data to send. * * \return On success 0, otherwise an errno value indicating the * cause of failure. * * \invariant Called from thread context. * \invariant The buffer pointed to by tdata is at least len bytes * in length. * \invariant xs.request_mutex exclusively locked. */ static int xs_write_store(const void *tdata, unsigned len) { XENSTORE_RING_IDX cons, prod; const char *data = (const char *)tdata; int error; sx_assert(&xs.request_mutex, SX_XLOCKED); while (len != 0) { void *dst; u_int avail; /* Hold lock so we can't miss wakeups should we block. */ mtx_lock(&xs.ring_lock); cons = xen_store->req_cons; prod = xen_store->req_prod; if ((prod - cons) == XENSTORE_RING_SIZE) { /* * Output ring is full. Wait for a ring event. * * Note that the events from both queues * are combined, so being woken does not * guarantee that data exist in the read * ring. * * To simplify error recovery and the retry, * we specify PDROP so our lock is *not* held * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, "xbwrite", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); /* Try again. */ continue; } mtx_unlock(&xs.ring_lock); /* Verify queue sanity. */ if (!xs_check_indexes(cons, prod)) { xen_store->req_cons = xen_store->req_prod = 0; return (EIO); } dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail); if (avail > len) avail = len; memcpy(dst, data, avail); data += avail; len -= avail; /* * The store to the producer index, which indicates * to the other side that new data has arrived, must * be visible only after our copy of the data into the * ring has completed. */ wmb(); xen_store->req_prod += avail; /* - * notify_remote_via_evtchn implies mb(). The other side - * will see the change to req_prod at the time of the - * interrupt. + * xen_intr_signal() implies mb(). The other side will see + * the change to req_prod at the time of the interrupt. */ - notify_remote_via_evtchn(xs.evtchn); + xen_intr_signal(xs.xen_intr_handle); } return (0); } /** * Receive data from the XenStore service. * * \param tdata A pointer to the contiguous buffer to receive the data. * \param len The amount of data to receive. * * \return On success 0, otherwise an errno value indicating the * cause of failure. * * \invariant Called from thread context. * \invariant The buffer pointed to by tdata is at least len bytes * in length. * * \note xs_read does not perform any internal locking to guarantee * serial access to the incoming ring buffer. However, there * is only one context processing reads: xs_rcv_thread(). */ static int xs_read_store(void *tdata, unsigned len) { XENSTORE_RING_IDX cons, prod; char *data = (char *)tdata; int error; while (len != 0) { u_int avail; const char *src; /* Hold lock so we can't miss wakeups should we block. */ mtx_lock(&xs.ring_lock); cons = xen_store->rsp_cons; prod = xen_store->rsp_prod; if (cons == prod) { /* * Nothing to read. Wait for a ring event. * * Note that the events from both queues * are combined, so being woken does not * guarantee that data exist in the read * ring. * * To simplify error recovery and the retry, * we specify PDROP so our lock is *not* held * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, "xbread", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); continue; } mtx_unlock(&xs.ring_lock); /* Verify queue sanity. */ if (!xs_check_indexes(cons, prod)) { xen_store->rsp_cons = xen_store->rsp_prod = 0; return (EIO); } src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail); if (avail > len) avail = len; /* * Insure the data we read is related to the indexes * we read above. */ rmb(); memcpy(data, src, avail); data += avail; len -= avail; /* * Insure that the producer of this ring does not see * the ring space as free until after we have copied it * out. */ mb(); xen_store->rsp_cons += avail; /* - * notify_remote_via_evtchn implies mb(). The producer - * will see the updated consumer index when the event - * is delivered. + * xen_intr_signal() implies mb(). The producer will see + * the updated consumer index when the event is delivered. */ - notify_remote_via_evtchn(xs.evtchn); + xen_intr_signal(xs.xen_intr_handle); } return (0); } /*----------------------- Received Message Processing ------------------------*/ /** * Block reading the next message from the XenStore service and * process the result. * * \param type The returned type of the XenStore message received. * * \return 0 on success. Otherwise an errno value indicating the * type of failure encountered. */ static int xs_process_msg(enum xsd_sockmsg_type *type) { struct xs_stored_msg *msg; char *body; int error; msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK); error = xs_read_store(&msg->hdr, sizeof(msg->hdr)); if (error) { free(msg, M_XENSTORE); return (error); } body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK); error = xs_read_store(body, msg->hdr.len); if (error) { free(body, M_XENSTORE); free(msg, M_XENSTORE); return (error); } body[msg->hdr.len] = '\0'; *type = msg->hdr.type; if (msg->hdr.type == XS_WATCH_EVENT) { msg->u.watch.vec = split(body, msg->hdr.len, &msg->u.watch.vec_size); mtx_lock(&xs.registered_watches_lock); msg->u.watch.handle = find_watch( msg->u.watch.vec[XS_WATCH_TOKEN]); if (msg->u.watch.handle != NULL) { mtx_lock(&xs.watch_events_lock); TAILQ_INSERT_TAIL(&xs.watch_events, msg, list); wakeup(&xs.watch_events); mtx_unlock(&xs.watch_events_lock); } else { free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } mtx_unlock(&xs.registered_watches_lock); } else { msg->u.reply.body = body; mtx_lock(&xs.reply_lock); TAILQ_INSERT_TAIL(&xs.reply_list, msg, list); wakeup(&xs.reply_list); mtx_unlock(&xs.reply_lock); } return (0); } /** * Thread body of the XenStore receive thread. * * This thread blocks waiting for data from the XenStore service * and processes and received messages. */ static void xs_rcv_thread(void *arg __unused) { int error; enum xsd_sockmsg_type type; for (;;) { error = xs_process_msg(&type); if (error) printf("XENSTORE error %d while reading message\n", error); } } /*---------------- XenStore Message Request/Reply Processing -----------------*/ /** * Filter invoked before transmitting any message to the XenStore service. * * The role of the filter may expand, but currently serves to manage * the interactions of messages with transaction state. * * \param request_msg_type The message type for the request. */ static inline void xs_request_filter(uint32_t request_msg_type) { if (request_msg_type == XS_TRANSACTION_START) sx_slock(&xs.suspend_mutex); } /** * Filter invoked after transmitting any message to the XenStore service. * * The role of the filter may expand, but currently serves to manage * the interactions of messages with transaction state. * * \param request_msg_type The message type for the original request. * \param reply_msg_type The message type for any received reply. * \param request_reply_error The error status from the attempt to send * the request or retrieve the reply. */ static inline void xs_reply_filter(uint32_t request_msg_type, uint32_t reply_msg_type, int request_reply_error) { /* * The count of transactions drops if we attempted * to end a transaction (even if that attempt fails * in error), we receive a transaction end acknowledgement, * or if our attempt to begin a transaction fails. */ if (request_msg_type == XS_TRANSACTION_END || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END) || (request_msg_type == XS_TRANSACTION_START && (request_reply_error != 0 || reply_msg_type == XS_ERROR))) sx_sunlock(&xs.suspend_mutex); } #define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0])) /** * Convert a XenStore error string into an errno number. * * \param errorstring The error string to convert. * * \return The errno best matching the input string. * * \note Unknown error strings are converted to EINVAL. */ static int xs_get_error(const char *errorstring) { u_int i; for (i = 0; i < xsd_error_count; i++) { if (!strcmp(errorstring, xsd_errors[i].errstring)) return (xsd_errors[i].errnum); } log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s", errorstring); return (EINVAL); } /** * Block waiting for a reply to a message request. * * \param type The returned type of the reply. * \param len The returned body length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result) { struct xs_stored_msg *msg; char *body; int error; mtx_lock(&xs.reply_lock); while (TAILQ_EMPTY(&xs.reply_list)) { error = mtx_sleep(&xs.reply_list, &xs.reply_lock, PCATCH, "xswait", hz/10); if (error && error != EWOULDBLOCK) { mtx_unlock(&xs.reply_lock); return (error); } } msg = TAILQ_FIRST(&xs.reply_list); TAILQ_REMOVE(&xs.reply_list, msg, list); mtx_unlock(&xs.reply_lock); *type = msg->hdr.type; if (len) *len = msg->hdr.len; body = msg->u.reply.body; free(msg, M_XENSTORE); *result = body; return (0); } /** * Pass-thru interface for XenStore access by userland processes * via the XenStore device. * * Reply type and length data are returned by overwriting these * fields in the passed in request message. * * \param msg A properly formatted message to transmit to * the XenStore service. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating the cause * of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(result, M_XENSTORE); */ int xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) { uint32_t request_type; int error; request_type = msg->type; xs_request_filter(request_type); sx_xlock(&xs.request_mutex); if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0) error = xs_read_reply(&msg->type, &msg->len, result); sx_xunlock(&xs.request_mutex); xs_reply_filter(request_type, msg->type, error); return (error); } /** * Send a message with an optionally muti-part body to the XenStore service. * * \param t The transaction to use for this request. * \param request_type The type of message to send. * \param iovec Pointers to the body sections of the request. * \param num_vecs The number of body sections in the request. * \param len The returned length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating * the cause of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type, const struct iovec *iovec, u_int num_vecs, u_int *len, void **result) { struct xsd_sockmsg msg; void *ret = NULL; u_int i; int error; msg.tx_id = t.id; msg.req_id = 0; msg.type = request_type; msg.len = 0; for (i = 0; i < num_vecs; i++) msg.len += iovec[i].iov_len; xs_request_filter(request_type); sx_xlock(&xs.request_mutex); error = xs_write_store(&msg, sizeof(msg)); if (error) { printf("xs_talkv failed %d\n", error); goto error_lock_held; } for (i = 0; i < num_vecs; i++) { error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len); if (error) { printf("xs_talkv failed %d\n", error); goto error_lock_held; } } error = xs_read_reply(&msg.type, len, &ret); error_lock_held: sx_xunlock(&xs.request_mutex); xs_reply_filter(request_type, msg.type, error); if (error) return (error); if (msg.type == XS_ERROR) { error = xs_get_error(ret); free(ret, M_XENSTORE); return (error); } /* Reply is either error or an echo of our request message type. */ KASSERT(msg.type == request_type, ("bad xenstore message type")); if (result) *result = ret; else free(ret, M_XENSTORE); return (0); } /** * Wrapper for xs_talkv allowing easy transmission of a message with * a single, contiguous, message body. * * \param t The transaction to use for this request. * \param request_type The type of message to send. * \param body The body of the request. * \param len The returned length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating * the cause of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type, const char *body, u_int *len, void **result) { struct iovec iovec; iovec.iov_base = (void *)(uintptr_t)body; iovec.iov_len = strlen(body) + 1; return (xs_talkv(t, request_type, &iovec, 1, len, result)); } /*------------------------- XenStore Watch Support ---------------------------*/ /** * Transmit a watch request to the XenStore service. * * \param path The path in the XenStore to watch. * \param tocken A unique identifier for this watch. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_watch(const char *path, const char *token) { struct iovec iov[2]; iov[0].iov_base = (void *)(uintptr_t) path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)(uintptr_t) token; iov[1].iov_len = strlen(token) + 1; return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL)); } /** * Transmit an uwatch request to the XenStore service. * * \param path The path in the XenStore to watch. * \param tocken A unique identifier for this watch. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_unwatch(const char *path, const char *token) { struct iovec iov[2]; iov[0].iov_base = (void *)(uintptr_t) path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)(uintptr_t) token; iov[1].iov_len = strlen(token) + 1; return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); } /** * Convert from watch token (unique identifier) to the associated * internal tracking structure for this watch. * * \param tocken The unique identifier for the watch to find. * * \return A pointer to the found watch structure or NULL. */ static struct xs_watch * find_watch(const char *token) { struct xs_watch *i, *cmp; cmp = (void *)strtoul(token, NULL, 16); LIST_FOREACH(i, &xs.registered_watches, list) if (i == cmp) return (i); return (NULL); } /** * Thread body of the XenStore watch event dispatch thread. */ static void xenwatch_thread(void *unused) { struct xs_stored_msg *msg; for (;;) { mtx_lock(&xs.watch_events_lock); while (TAILQ_EMPTY(&xs.watch_events)) mtx_sleep(&xs.watch_events, &xs.watch_events_lock, PWAIT | PCATCH, "waitev", hz/10); mtx_unlock(&xs.watch_events_lock); sx_xlock(&xs.xenwatch_mutex); mtx_lock(&xs.watch_events_lock); msg = TAILQ_FIRST(&xs.watch_events); if (msg) TAILQ_REMOVE(&xs.watch_events, msg, list); mtx_unlock(&xs.watch_events_lock); if (msg != NULL) { /* * XXX There are messages coming in with a NULL * XXX callback. This deserves further investigation; * XXX the workaround here simply prevents the kernel * XXX from panic'ing on startup. */ if (msg->u.watch.handle->callback != NULL) msg->u.watch.handle->callback( msg->u.watch.handle, (const char **)msg->u.watch.vec, msg->u.watch.vec_size); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } sx_xunlock(&xs.xenwatch_mutex); } } /*----------- XenStore Configuration, Initialization, and Control ------------*/ /** * Setup communication channels with the XenStore service. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xs_init_comms(void) { int error; if (xen_store->rsp_prod != xen_store->rsp_cons) { log(LOG_WARNING, "XENSTORE response ring is not quiescent " "(%08x:%08x): fixing up\n", xen_store->rsp_cons, xen_store->rsp_prod); xen_store->rsp_cons = xen_store->rsp_prod; } - if (xs.irq) - unbind_from_irqhandler(xs.irq); + xen_intr_unbind(&xs.xen_intr_handle); - error = bind_caller_port_to_irqhandler(xs.evtchn, "xenstore", - xs_intr, NULL, INTR_TYPE_NET, &xs.irq); + error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn, + /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE, + &xs.xen_intr_handle); if (error) { log(LOG_WARNING, "XENSTORE request irq failed %i\n", error); return (error); } return (0); } /*------------------ Private Device Attachment Functions --------------------*/ static void xs_identify(driver_t *driver, device_t parent) { BUS_ADD_CHILD(parent, 0, "xenstore", 0); } /** * Probe for the existance of the XenStore. * * \param dev */ static int xs_probe(device_t dev) { /* * We are either operating within a PV kernel or being probed * as the child of the successfully attached xenpci device. * Thus we are in a Xen environment and there will be a XenStore. * Unconditionally return success. */ device_set_desc(dev, "XenStore"); return (0); } static void xs_attach_deferred(void *arg) { xs_dev_init(); bus_generic_probe(xs.xs_dev); bus_generic_attach(xs.xs_dev); config_intrhook_disestablish(&xs.xs_attachcb); } /** * Attach to the XenStore. * * This routine also prepares for the probe/attach of drivers that rely * on the XenStore. */ static int xs_attach(device_t dev) { int error; /* Allow us to get device_t from softc and vice-versa. */ xs.xs_dev = dev; device_set_softc(dev, &xs); /* * This seems to be a layering violation. The XenStore is just * one of many clients of the Grant Table facility. It happens * to be the first and a gating consumer to all other devices, * so this does work. A better place would be in the PV support * code for fully PV kernels and the xenpci driver for HVM kernels. */ error = gnttab_init(); if (error != 0) { log(LOG_WARNING, "XENSTORE: Error initializing grant tables: %d\n", error); return (ENXIO); } /* Initialize the interface to xenstore. */ struct proc *p; #ifdef XENHVM xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE); #else xs.evtchn = xen_start_info->store_evtchn; #endif TAILQ_INIT(&xs.reply_list); TAILQ_INIT(&xs.watch_events); mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF); mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF); sx_init(&xs.xenwatch_mutex, "xenwatch"); sx_init(&xs.request_mutex, "xenstore request"); sx_init(&xs.suspend_mutex, "xenstore suspend"); mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF); mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF); - xs.irq = 0; /* Initialize the shared memory rings to talk to xenstored */ error = xs_init_comms(); if (error) return (error); error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID, 0, "xenwatch"); if (error) return (error); xs.xenwatch_pid = p->p_pid; error = kproc_create(xs_rcv_thread, NULL, NULL, RFHIGHPID, 0, "xenstore_rcv"); xs.xs_attachcb.ich_func = xs_attach_deferred; xs.xs_attachcb.ich_arg = NULL; config_intrhook_establish(&xs.xs_attachcb); return (error); } /** * Prepare for suspension of this VM by halting XenStore access after * all transactions and individual requests have completed. */ static int xs_suspend(device_t dev) { int error; /* Suspend child Xen devices. */ error = bus_generic_suspend(dev); if (error != 0) return (error); sx_xlock(&xs.suspend_mutex); sx_xlock(&xs.request_mutex); return (0); } /** * Resume XenStore operations after this VM is resumed. */ static int xs_resume(device_t dev __unused) { struct xs_watch *watch; char token[sizeof(watch) * 2 + 1]; xs_init_comms(); sx_xunlock(&xs.request_mutex); /* * No need for registered_watches_lock: the suspend_mutex * is sufficient. */ LIST_FOREACH(watch, &xs.registered_watches, list) { sprintf(token, "%lX", (long)watch); xs_watch(watch->node, token); } sx_xunlock(&xs.suspend_mutex); /* Resume child Xen devices. */ bus_generic_resume(dev); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenstore_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xs_identify), DEVMETHOD(device_probe, xs_probe), DEVMETHOD(device_attach, xs_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xs_suspend), DEVMETHOD(device_resume, xs_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD_END }; DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0); static devclass_t xenstore_devclass; #ifdef XENHVM DRIVER_MODULE(xenstore, xenpci, xenstore_driver, xenstore_devclass, 0, 0); #else DRIVER_MODULE(xenstore, nexus, xenstore_driver, xenstore_devclass, 0, 0); #endif /*------------------------------- Sysctl Data --------------------------------*/ /* XXX Shouldn't the node be somewhere else? */ SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, ""); SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); /*-------------------------------- Public API --------------------------------*/ /*------- API comments for these methods can be found in xenstorevar.h -------*/ int xs_directory(struct xs_transaction t, const char *dir, const char *node, u_int *num, const char ***result) { struct sbuf *path; char *strings; u_int len = 0; int error; path = xs_join(dir, node); error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len, (void **)&strings); sbuf_delete(path); if (error) return (error); *result = split(strings, len, num); return (0); } int xs_exists(struct xs_transaction t, const char *dir, const char *node) { const char **d; int error, dir_n; error = xs_directory(t, dir, node, &dir_n, &d); if (error) return (0); free(d, M_XENSTORE); return (1); } int xs_read(struct xs_transaction t, const char *dir, const char *node, u_int *len, void **result) { struct sbuf *path; void *ret; int error; path = xs_join(dir, node); error = xs_single(t, XS_READ, sbuf_data(path), len, &ret); sbuf_delete(path); if (error) return (error); *result = ret; return (0); } int xs_write(struct xs_transaction t, const char *dir, const char *node, const char *string) { struct sbuf *path; struct iovec iovec[2]; int error; path = xs_join(dir, node); iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path); iovec[0].iov_len = sbuf_len(path) + 1; iovec[1].iov_base = (void *)(uintptr_t) string; iovec[1].iov_len = strlen(string); error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL); sbuf_delete(path); return (error); } int xs_mkdir(struct xs_transaction t, const char *dir, const char *node) { struct sbuf *path; int ret; path = xs_join(dir, node); ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL); sbuf_delete(path); return (ret); } int xs_rm(struct xs_transaction t, const char *dir, const char *node) { struct sbuf *path; int ret; path = xs_join(dir, node); ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL); sbuf_delete(path); return (ret); } int xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node) { struct xs_transaction local_xbt; struct sbuf *root_path_sbuf; struct sbuf *cur_path_sbuf; char *root_path; char *cur_path; const char **dir; int error; int empty; retry: root_path_sbuf = xs_join(base, node); cur_path_sbuf = xs_join(base, node); root_path = sbuf_data(root_path_sbuf); cur_path = sbuf_data(cur_path_sbuf); dir = NULL; local_xbt.id = 0; if (xbt.id == 0) { error = xs_transaction_start(&local_xbt); if (error != 0) goto out; xbt = local_xbt; } empty = 0; while (1) { u_int count; u_int i; error = xs_directory(xbt, cur_path, "", &count, &dir); if (error) goto out; for (i = 0; i < count; i++) { error = xs_rm(xbt, cur_path, dir[i]); if (error == ENOTEMPTY) { struct sbuf *push_dir; /* * Descend to clear out this sub directory. * We'll return to cur_dir once push_dir * is empty. */ push_dir = xs_join(cur_path, dir[i]); sbuf_delete(cur_path_sbuf); cur_path_sbuf = push_dir; cur_path = sbuf_data(cur_path_sbuf); break; } else if (error != 0) { goto out; } } free(dir, M_XENSTORE); dir = NULL; if (i == count) { char *last_slash; /* Directory is empty. It is now safe to remove. */ error = xs_rm(xbt, cur_path, ""); if (error != 0) goto out; if (!strcmp(cur_path, root_path)) break; /* Return to processing the parent directory. */ last_slash = strrchr(cur_path, '/'); KASSERT(last_slash != NULL, ("xs_rm_tree: mangled path %s", cur_path)); *last_slash = '\0'; } } out: sbuf_delete(cur_path_sbuf); sbuf_delete(root_path_sbuf); if (dir != NULL) free(dir, M_XENSTORE); if (local_xbt.id != 0) { int terror; terror = xs_transaction_end(local_xbt, /*abort*/error != 0); xbt.id = 0; if (terror == EAGAIN && error == 0) goto retry; } return (error); } int xs_transaction_start(struct xs_transaction *t) { char *id_str; int error; error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL, (void **)&id_str); if (error == 0) { t->id = strtoul(id_str, NULL, 0); free(id_str, M_XENSTORE); } return (error); } int xs_transaction_end(struct xs_transaction t, int abort) { char abortstr[2]; if (abort) strcpy(abortstr, "F"); else strcpy(abortstr, "T"); return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL)); } int xs_scanf(struct xs_transaction t, const char *dir, const char *node, int *scancountp, const char *fmt, ...) { va_list ap; int error, ns; char *val; error = xs_read(t, dir, node, NULL, (void **) &val); if (error) return (error); va_start(ap, fmt); ns = vsscanf(val, fmt, ap); va_end(ap); free(val, M_XENSTORE); /* Distinctive errno. */ if (ns == 0) return (ERANGE); if (scancountp) *scancountp = ns; return (0); } int xs_vprintf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, va_list ap) { struct sbuf *sb; int error; sb = sbuf_new_auto(); sbuf_vprintf(sb, fmt, ap); sbuf_finish(sb); error = xs_write(t, dir, node, sbuf_data(sb)); sbuf_delete(sb); return (error); } int xs_printf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, ...) { va_list ap; int error; va_start(ap, fmt); error = xs_vprintf(t, dir, node, fmt, ap); va_end(ap); return (error); } int xs_gather(struct xs_transaction t, const char *dir, ...) { va_list ap; const char *name; int error; va_start(ap, dir); error = 0; while (error == 0 && (name = va_arg(ap, char *)) != NULL) { const char *fmt = va_arg(ap, char *); void *result = va_arg(ap, void *); char *p; error = xs_read(t, dir, name, NULL, (void **) &p); if (error) break; if (fmt) { if (sscanf(p, fmt, result) == 0) error = EINVAL; free(p, M_XENSTORE); } else *(char **)result = p; } va_end(ap); return (error); } int xs_register_watch(struct xs_watch *watch) { /* Pointer in ascii is the token. */ char token[sizeof(watch) * 2 + 1]; int error; sprintf(token, "%lX", (long)watch); sx_slock(&xs.suspend_mutex); mtx_lock(&xs.registered_watches_lock); KASSERT(find_watch(token) == NULL, ("watch already registered")); LIST_INSERT_HEAD(&xs.registered_watches, watch, list); mtx_unlock(&xs.registered_watches_lock); error = xs_watch(watch->node, token); /* Ignore errors due to multiple registration. */ if (error == EEXIST) error = 0; if (error != 0) { mtx_lock(&xs.registered_watches_lock); LIST_REMOVE(watch, list); mtx_unlock(&xs.registered_watches_lock); } sx_sunlock(&xs.suspend_mutex); return (error); } void xs_unregister_watch(struct xs_watch *watch) { struct xs_stored_msg *msg, *tmp; char token[sizeof(watch) * 2 + 1]; int error; sprintf(token, "%lX", (long)watch); sx_slock(&xs.suspend_mutex); mtx_lock(&xs.registered_watches_lock); if (find_watch(token) == NULL) { mtx_unlock(&xs.registered_watches_lock); sx_sunlock(&xs.suspend_mutex); return; } LIST_REMOVE(watch, list); mtx_unlock(&xs.registered_watches_lock); error = xs_unwatch(watch->node, token); if (error) log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n", watch->node, error); sx_sunlock(&xs.suspend_mutex); /* Cancel pending watch events. */ mtx_lock(&xs.watch_events_lock); TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) { if (msg->u.watch.handle != watch) continue; TAILQ_REMOVE(&xs.watch_events, msg, list); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } mtx_unlock(&xs.watch_events_lock); /* Flush any currently-executing callback, unless we are it. :-) */ if (curproc->p_pid != xs.xenwatch_pid) { sx_xlock(&xs.xenwatch_mutex); sx_xunlock(&xs.xenwatch_mutex); } } diff --git a/sys/xen/xenstore/xenstore_dev.c b/sys/xen/xenstore/xenstore_dev.c index 1fa419795ed1..e1b4091447ab 100644 --- a/sys/xen/xenstore/xenstore_dev.c +++ b/sys/xen/xenstore/xenstore_dev.c @@ -1,224 +1,224 @@ /* * xenstore_dev.c * * Driver giving user-space access to the kernel's connection to the * XenStore service. * * Copyright (c) 2005, Christian Limpach * Copyright (c) 2005, Rusty Russell, IBM Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include -#include +#include #include #include #include struct xs_dev_transaction { LIST_ENTRY(xs_dev_transaction) list; struct xs_transaction handle; }; struct xs_dev_data { /* In-progress transaction. */ LIST_HEAD(xdd_list_head, xs_dev_transaction) transactions; /* Partial request. */ unsigned int len; union { struct xsd_sockmsg msg; char buffer[PAGE_SIZE]; } u; /* Response queue. */ #define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1)) char read_buffer[PAGE_SIZE]; unsigned int read_cons, read_prod; }; static int xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { int error; struct xs_dev_data *u = dev->si_drv1; while (u->read_prod == u->read_cons) { error = tsleep(u, PCATCH, "xsdread", hz/10); if (error && error != EWOULDBLOCK) return (error); } while (uio->uio_resid > 0) { if (u->read_cons == u->read_prod) break; error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)], 1, uio); if (error) return (error); u->read_cons++; } return (0); } static void xs_queue_reply(struct xs_dev_data *u, char *data, unsigned int len) { int i; for (i = 0; i < len; i++, u->read_prod++) u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i]; KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer), ("xenstore reply too big")); wakeup(u); } static int xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { int error; struct xs_dev_data *u = dev->si_drv1; struct xs_dev_transaction *trans; void *reply; int len = uio->uio_resid; if ((len + u->len) > sizeof(u->u.buffer)) return (EINVAL); error = uiomove(u->u.buffer + u->len, len, uio); if (error) return (error); u->len += len; if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) return (0); switch (u->u.msg.type) { case XS_TRANSACTION_START: case XS_TRANSACTION_END: case XS_DIRECTORY: case XS_READ: case XS_GET_PERMS: case XS_RELEASE: case XS_GET_DOMAIN_PATH: case XS_WRITE: case XS_MKDIR: case XS_RM: case XS_SET_PERMS: error = xs_dev_request_and_reply(&u->u.msg, &reply); if (!error) { if (u->u.msg.type == XS_TRANSACTION_START) { trans = malloc(sizeof(*trans), M_XENSTORE, M_WAITOK); trans->handle.id = strtoul(reply, NULL, 0); LIST_INSERT_HEAD(&u->transactions, trans, list); } else if (u->u.msg.type == XS_TRANSACTION_END) { LIST_FOREACH(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; #if 0 /* XXX does this mean the list is empty? */ BUG_ON(&trans->list == &u->transactions); #endif LIST_REMOVE(trans, list); free(trans, M_XENSTORE); } xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); xs_queue_reply(u, (char *)reply, u->u.msg.len); free(reply, M_XENSTORE); } break; default: error = EINVAL; break; } if (error == 0) u->len = 0; return (error); } static int xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct xs_dev_data *u; #if 0 /* XXX figure out if equiv needed */ nonseekable_open(inode, filp); #endif u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO); LIST_INIT(&u->transactions); dev->si_drv1 = u; return (0); } static int xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { struct xs_dev_data *u = dev->si_drv1; struct xs_dev_transaction *trans, *tmp; LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { xs_transaction_end(trans->handle, 1); LIST_REMOVE(trans, list); free(trans, M_XENSTORE); } free(u, M_XENSTORE); return (0); } static struct cdevsw xs_dev_cdevsw = { .d_version = D_VERSION, .d_read = xs_dev_read, .d_write = xs_dev_write, .d_open = xs_dev_open, .d_close = xs_dev_close, .d_name = "xs_dev", }; void xs_dev_init() { make_dev(&xs_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, "xen/xenstore"); } diff --git a/sys/xen/xenstore/xenstorevar.h b/sys/xen/xenstore/xenstorevar.h index 4a1382d8f746..208e5bf09b8e 100644 --- a/sys/xen/xenstore/xenstorevar.h +++ b/sys/xen/xenstore/xenstorevar.h @@ -1,341 +1,341 @@ /****************************************************************************** * xenstorevar.h * * Method declarations and structures for accessing the XenStore.h * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 XenSource Ltd. * Copyright (C) 2009,2010 Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _XEN_XENSTORE_XENSTOREVAR_H #define _XEN_XENSTORE_XENSTOREVAR_H #include #include #include #include #include #include -#include +#include #include #include #include #include "xenbus_if.h" /* XenStore allocations including XenStore data returned to clients. */ MALLOC_DECLARE(M_XENSTORE); struct xenstore_domain_interface; struct xs_watch; extern struct xenstore_domain_interface *xen_store; typedef void (xs_watch_cb_t)(struct xs_watch *, const char **vec, unsigned int len); /* Register callback to watch subtree (node) in the XenStore. */ struct xs_watch { LIST_ENTRY(xs_watch) list; /* Path being watched. */ char *node; /* Callback (executed in a process context with no locks held). */ xs_watch_cb_t *callback; /* Callback client data untouched by the XenStore watch mechanism. */ uintptr_t callback_data; }; LIST_HEAD(xs_watch_list, xs_watch); typedef int (*xs_event_handler_t)(void *); struct xs_transaction { uint32_t id; }; #define XST_NIL ((struct xs_transaction) { 0 }) /** * Fetch the contents of a directory in the XenStore. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param num The returned number of directory entries. * \param result An array of directory entry strings. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * \note The results buffer is malloced and should be free'd by the * caller with 'free(*result, M_XENSTORE)'. */ int xs_directory(struct xs_transaction t, const char *dir, const char *node, unsigned int *num, const char ***result); /** * Determine if a path exists in the XenStore. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * * \retval 1 The path exists. * \retval 0 The path does not exist or an error occurred attempting * to make that determination. */ int xs_exists(struct xs_transaction t, const char *dir, const char *node); /** * Get the contents of a single "file". Returns the contents in * *result which should be freed with free(*result, M_XENSTORE) after * use. The length of the value in bytes is returned in *len. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the file to read. * \param node The basename of the file to read. * \param len The amount of data read. * \param result The returned contents from this file. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * \note The results buffer is malloced and should be free'd by the * caller with 'free(*result, M_XENSTORE)'. */ int xs_read(struct xs_transaction t, const char *dir, const char *node, unsigned int *len, void **result); /** * Write to a single file. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the file to write. * \param node The basename of the file to write. * \param string The NUL terminated string of data to write. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_write(struct xs_transaction t, const char *dir, const char *node, const char *string); /** * Create a new directory. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the directory to create. * \param node The basename of the directory to create. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_mkdir(struct xs_transaction t, const char *dir, const char *node); /** * Remove a file or directory (directories must be empty). * * \param t The XenStore transaction covering this request. * \param dir The dirname of the directory to remove. * \param node The basename of the directory to remove. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_rm(struct xs_transaction t, const char *dir, const char *node); /** * Destroy a tree of files rooted at dir/node. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the directory to remove. * \param node The basename of the directory to remove. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_rm_tree(struct xs_transaction t, const char *dir, const char *node); /** * Start a transaction. * * Changes by others will not be seen during the lifetime of this * transaction, and changes will not be visible to others until it * is committed (xs_transaction_end). * * \param t The returned transaction. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_transaction_start(struct xs_transaction *t); /** * End a transaction. * * \param t The transaction to end/commit. * \param abort If non-zero, the transaction is discarded * instead of committed. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_transaction_end(struct xs_transaction t, int abort); /* * Single file read and scanf parsing of the result. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param scancountp The number of input values assigned (i.e. the result * of scanf). * \param fmt Scanf format string followed by a variable number of * scanf input arguments. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_scanf(struct xs_transaction t, const char *dir, const char *node, int *scancountp, const char *fmt, ...) __attribute__((format(scanf, 5, 6))); /** * Printf formatted write to a XenStore file. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param fmt Printf format string followed by a variable number of * printf arguments. * * \return On success, 0. Otherwise an errno value indicating the * type of write failure. */ int xs_printf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, ...) __attribute__((format(printf, 4, 5))); /** * va_list version of xenbus_printf(). * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param fmt Printf format string. * \param ap Va_list of printf arguments. * * \return On success, 0. Otherwise an errno value indicating the * type of write failure. */ int xs_vprintf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, va_list ap); /** * Multi-file read within a single directory and scanf parsing of * the results. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the paths to read. * \param ... A variable number of argument triples specifying * the file name, scanf-style format string, and * output variable (pointer to storage of the results). * The last triple in the call must be terminated * will a final NULL argument. A NULL format string * will cause the entire contents of the given file * to be assigned as a NUL terminated, M_XENSTORE heap * backed, string to the output parameter of that tuple. * * \return On success, 0. Otherwise an errno value indicating the * type of read failure. * * Example: * char protocol_abi[64]; * uint32_t ring_ref; * char *dev_type; * int error; * * error = xenbus_gather(XBT_NIL, xenbus_get_node(dev), * "ring-ref", "%" PRIu32, &ring_ref, * "protocol", "%63s", protocol_abi, * "device-type", NULL, &dev_type, * NULL); * * ... * * free(dev_type, M_XENSTORE); */ int xs_gather(struct xs_transaction t, const char *dir, ...); /** * Register a XenStore watch. * * XenStore watches allow a client to be notified via a callback (embedded * within the watch object) of changes to an object in the XenStore. * * \param watch An xs_watch struct with it's node and callback fields * properly initialized. * * \return On success, 0. Otherwise an errno value indicating the * type of write failure. EEXIST errors from the XenStore * are supressed, allowing multiple, physically different, * xenbus_watch objects, to watch the same path in the XenStore. */ int xs_register_watch(struct xs_watch *watch); /** * Unregister a XenStore watch. * * \param watch An xs_watch object previously used in a successful call * to xs_register_watch(). * * The xs_watch object's node field is not altered by this call. * It is the caller's responsibility to properly dispose of both the * watch object and the data pointed to by watch->node. */ void xs_unregister_watch(struct xs_watch *watch); /** * Allocate and return an sbuf containing the XenStore path string * /. If name is the NUL string, the returned sbuf contains * the path string . * * \param dir The NUL terminated directory prefix for new path. * \param name The NUL terminated basename for the new path. * * \return A buffer containing the joined path. */ struct sbuf *xs_join(const char *, const char *); #endif /* _XEN_XENSTORE_XENSTOREVAR_H */